├── MANIFEST.in
├── requirements.txt
├── setup.cfg
├── bashlex
    ├── state.py
    ├── __init__.py
    ├── errors.py
    ├── shutils.py
    ├── heredoc.py
    ├── utils.py
    ├── flags.py
    ├── ast.py
    ├── subst.py
    ├── parser.py
    └── tokenizer.py
├── pyproject.toml
├── .gitignore
├── Makefile
├── setup.py
├── .github
    └── workflows
    │   └── test.yml
├── examples
    └── commandsubstitution-remover.py
├── README.md
├── tests
    ├── test_tokenizer.py
    └── test_parser.py
└── LICENSE


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | enum34; python_version < "3.4"
2 | build
3 | twine
4 | pytest
5 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [tool:pytest]
5 | addopts = --doctest-modules -ra
6 | 


--------------------------------------------------------------------------------
/bashlex/state.py:
--------------------------------------------------------------------------------
1 | from bashlex import flags, utils
2 | 
3 | parserstate = lambda: utils.typedset(flags.parser)
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | 
7 | build-backend = "setuptools.build_meta"
8 | 


--------------------------------------------------------------------------------
/bashlex/__init__.py:
--------------------------------------------------------------------------------
1 | from bashlex import parser, tokenizer
2 | 
3 | parse = parser.parse
4 | parsesingle = parser.parsesingle
5 | split = parser.split
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.swp
 3 | .coverage
 4 | .vagrant
 5 | bashlex/parser.out
 6 | bashlex/parsetab.py
 7 | 
 8 | build/
 9 | dist/
10 | bashlex.egg-info/
11 | *env*/
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | tests:
2 | 	@python -c "import pytest" >/dev/null 2>&1 || (echo "error: pytest missing, run 'pip install pytest'\n" && false)
3 | 	python -m pytest
4 | 
5 | .PHONY: tests
6 | 


--------------------------------------------------------------------------------
/bashlex/errors.py:
--------------------------------------------------------------------------------
1 | class ParsingError(Exception):
2 |     def __init__(self, message, s, position):
3 |         self.message = message
4 |         self.s = s
5 |         self.position = position
6 | 
7 |         assert position <= len(s)
8 |         super(ParsingError, self).__init__('%s (position %d)' % (message, position))
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='bashlex',
 6 |     version='0.18',
 7 |     url='https://github.com/idank/bashlex.git',
 8 |     license='GPLv3+',
 9 |     author='Idan Kamara',
10 |     author_email='i@idank.me',
11 |     description='Python parser for bash',
12 |     long_description='''bashlex is a Python port of the parser used internally by GNU bash.
13 | 
14 | For the most part it's transliterated from C, the major differences are:
15 | 
16 | 1. it does not execute anything
17 | 2. it is reentrant
18 | 3. it generates a complete AST
19 | 
20 | See https://github.com/idank/bashlex/blob/master/README.md for more info.''',
21 |     classifiers=[
22 |         'Development Status :: 4 - Beta',
23 |         'Environment :: Console',
24 |         'Intended Audience :: Developers',
25 |         'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
26 |         'Operating System :: OS Independent',
27 |         'Programming Language :: Python',
28 |         'Topic :: Software Development :: Libraries :: Python Modules',
29 |         'Topic :: System :: System Shells',
30 |         'Topic :: Text Processing',
31 |     ],
32 |     python_requires=">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4",
33 |     install_requires=['enum34; python_version < "3.4"'],
34 |     packages=['bashlex'],
35 | )
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push, pull_request, workflow_dispatch]
 4 | 
 5 | env:
 6 |   FORCE_COLOR: 1
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
15 |         os: [ubuntu-latest, macos-latest, windows-latest]
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v2
19 | 
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v2
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 | 
25 |       - name: Get pip cache dir
26 |         id: pip-cache
27 |         run: |
28 |           echo "::set-output name=dir::$(pip cache dir)"
29 | 
30 |       - name: Cache
31 |         uses: actions/cache@v2
32 |         with:
33 |           path: ${{ steps.pip-cache.outputs.dir }}
34 |           key:
35 |             ${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/requirements.txt') }}
36 |           restore-keys: |
37 |             ${{ matrix.os }}-${{ matrix.python-version }}-v1-
38 | 
39 |       - name: Install dependencies
40 |         run: |
41 |           python -m pip install -U pip
42 |           python -m pip install -U wheel
43 |           python -m pip install -Ur requirements.txt
44 |           python setup.py install
45 | 
46 |       - name: Test
47 |         run: |
48 |           make tests
49 | 


--------------------------------------------------------------------------------
/bashlex/shutils.py:
--------------------------------------------------------------------------------
 1 | def single_quote(s):
 2 |     if s[0] == "'" and len(s) == 1:
 3 |         return "\\'"
 4 | 
 5 |     l = ["'"]
 6 | 
 7 |     for c in s:
 8 |         l.append(c)
 9 |         if c == "'":
10 |             l.extend(["\\''"])
11 | 
12 |     l.append("'")
13 | 
14 |     return ''.join(l)
15 | 
16 | def double_quote(s):
17 |     return s
18 | 
19 | def legal_number(s):
20 |     try:
21 |         x = int(s)
22 |         return True
23 |     except ValueError:
24 |         return False
25 | 
26 | def legal_identifier(name):
27 |     pass
28 | 
29 | def removequotes(s, heredoc=False, doublequotes=False):
30 |     r = ''
31 |     sindex = 0
32 |     dquote = False
33 |     while sindex < len(s):
34 |         c = s[sindex]
35 |         if c == '\\':
36 |             sindex += 1
37 |             if sindex == len(s):
38 |                 r += '\\'
39 |                 return r
40 |             c = s[sindex]
41 |             if ((heredoc and doublequotes) or dquote) and not _shellquote(c):
42 |                 r += '\\'
43 |             r += c
44 |         elif c == "'":
45 |             if (heredoc and doublequotes) or dquote:
46 |                 r += c
47 |                 sindex += 1
48 |             else:
49 |                 t = s.find("'", sindex + 1)
50 |                 if t == -1:
51 |                     t = len(s)
52 |                 else:
53 |                     t += 1
54 | 
55 |                 r += s[sindex + 1:t-1]
56 |                 sindex = t
57 |         elif c == '"':
58 |             dquote = not dquote
59 |             sindex += 1
60 |         else:
61 |             r += c
62 |             sindex += 1
63 |     return r
64 | 


--------------------------------------------------------------------------------
/bashlex/heredoc.py:
--------------------------------------------------------------------------------
 1 | from bashlex import ast, errors
 2 | 
 3 | def gatherheredocuments(tokenizer):
 4 |     # if we're at the end of the input and we're not strict, allow skipping
 5 |     # reading the heredoc
 6 |     while tokenizer.redirstack:
 7 |         if tokenizer._peekc() is None and not tokenizer._strictmode:
 8 |             tokenizer._shell_input_line_index += 1
 9 |             return
10 | 
11 |         redirnode, killleading = tokenizer.redirstack.pop(0)
12 |         makeheredoc(tokenizer, redirnode, 0, killleading)
13 | 
14 | def makeheredoc(tokenizer, redirnode, lineno, killleading):
15 |     # redirword = string_quote_removal(redirectnode.word)
16 |     redirword = redirnode.output.word
17 |     document = []
18 | 
19 |     startpos = tokenizer._shell_input_line_index
20 | 
21 |     #fullline = self.tok.readline(bool(redirword.output.flags & flags.word.QUOTED))
22 |     fullline = tokenizer.readline(False)
23 |     while fullline:
24 |         if killleading:
25 |             while fullline[0] == '\t':
26 |                 fullline = fullline[1:]
27 | 
28 |         if not fullline:
29 |             continue
30 | 
31 |         if fullline[:-1] == redirword and fullline[len(redirword)] == '\n':
32 |             document.append(fullline[:-1])
33 |             # document_done
34 |             break
35 | 
36 |         document.append(fullline)
37 |         #fullline = self.readline(bool(redirnode.flags & flags.word.QUOTED))
38 |         fullline = tokenizer.readline(False)
39 | 
40 |     if not fullline:
41 |         raise errors.ParsingError("here-document at line %d delimited by end-of-file (wanted %r)" % (lineno, redirword), tokenizer._shell_input_line, tokenizer._shell_input_line_index)
42 | 
43 |     document = ''.join(document)
44 |     endpos = tokenizer._shell_input_line_index - 1
45 | 
46 |     assert hasattr(redirnode, 'heredoc')
47 |     redirnode.heredoc = ast.node(kind='heredoc', value=document,
48 |                                  pos=(startpos, endpos))
49 | 
50 |     # if the heredoc immediately follows this node, fix its end pos
51 |     if redirnode.pos[1] + 1 == startpos:
52 |         redirnode.pos = (redirnode.pos[0], endpos)
53 | 
54 |     return document
55 | 


--------------------------------------------------------------------------------
/bashlex/utils.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from collections.abc import MutableSet, Mapping
 3 | except ImportError:
 4 |     # Python 2 fallback
 5 |     from collections import MutableSet, Mapping
 6 | 
 7 | 
 8 | class typedset(MutableSet):
 9 |     def __init__(self, type_, iterable=[]):
10 |         self._s = set()
11 |         self._type = type_
12 |         for v in iterable:
13 |             self.add(v)
14 | 
15 |     def add(self, value):
16 |         if not isinstance(value, self._type):
17 |             raise ValueError('can only add items of type %s to this set' % self._type)
18 |         self._s.add(value)
19 | 
20 |     def discard(self, value):
21 |         self._s.discard(value)
22 | 
23 |     def __contains__(self, value):
24 |         return self._s.__contains__(value)
25 | 
26 |     def __iter__(self):
27 |         return self._s.__iter__()
28 | 
29 |     def __len__(self):
30 |         return len(self._s)
31 | 
32 |     def __and__(self, value):
33 |         if isinstance(value, self._type):
34 |             value = set([value])
35 |         return self._s.__and__(value)
36 | 
37 |     def __or__(self, value):
38 |         if isinstance(value, self._type):
39 |             value = set([value])
40 |         return self._s.__or__(value)
41 | 
42 |     def __ior__(self, value):
43 |         if isinstance(value, self._type):
44 |             value = set([value])
45 |         self._s.__ior__(value)
46 |         return self
47 | 
48 |     #def __sub__(self, value):
49 |     #    if isinstance(value, self._type):
50 |     #        value = set([value])
51 |     #    return self._s.__sub__(value)
52 | 
53 |     def __repr__(self):
54 |         return self._s.__repr__()
55 | 
56 | class frozendict(Mapping):
57 |     def __init__(self, *args, **kwargs):
58 |         self.__dict = dict(*args, **kwargs)
59 |         self.__hash = None
60 | 
61 |     def __getitem__(self, key):
62 |         return self.__dict[key]
63 | 
64 |     def copy(self, **add_or_replace):
65 |         return frozendict(self, **add_or_replace)
66 | 
67 |     def __iter__(self):
68 |         return iter(self.__dict)
69 | 
70 |     def __len__(self):
71 |         return len(self.__dict)
72 | 
73 |     def __repr__(self):
74 |         return '<frozendict %s>' % repr(self.__dict)
75 | 


--------------------------------------------------------------------------------
/examples/commandsubstitution-remover.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | 
 5 | import argparse
 6 | from argparse import RawTextHelpFormatter
 7 | 
 8 | from bashlex import parser, ast
 9 | 
10 | class nodevisitor(ast.nodevisitor):
11 |     def __init__(self, positions):
12 |         self.positions = positions
13 | 
14 |     def visitcommandsubstitution(self, n, command):
15 |         # log the start and end positions of this command substitution
16 |         self.positions.append(n.pos)
17 | 
18 |         # do not recurse into child nodes
19 |         return False
20 | 
21 | desc = '''replace all occurrences of $() and `` with the string given in -s
22 | 
23 |   $ commandsubstitution-remover.py -s nope -c 'foo $(bar)'
24 |   foo nope
25 | 
26 | within words:
27 | 
28 |   $ commandsubstitution-remover.py -c '"foo $(bar) baz"'
29 |   "foo XXX baz"
30 | 
31 | but not within single quotes, since they cancel special meaning:
32 | 
33 |   $ commandsubstitution-remover.py -c "foo '"'$(bar)'"'"
34 |   foo '$(bar)'
35 | 
36 | (this a simple script to demonstrate how to traverse the ast produced
37 | by bashlex)
38 | '''
39 | 
40 | if __name__ == '__main__':
41 |     argparser = argparse.ArgumentParser(description=desc,
42 |                                         formatter_class=RawTextHelpFormatter)
43 |     argparser.add_argument('-s', dest='replacement', metavar='S', default='XXX',
44 |                            help='replace occurrences with S (default: XXX)')
45 | 
46 |     group = argparser.add_mutually_exclusive_group()
47 |     group.add_argument('file', metavar='file', type=file, nargs='?',
48 |                        help='file to parse')
49 |     group.add_argument('-c', dest='expression',
50 |                        help='string to parse')
51 | 
52 |     args = argparser.parse_args()
53 | 
54 |     if args.expression:
55 |         s = args.expression
56 |     elif args.file:
57 |         s = args.file.read()
58 |     else:
59 |         s = sys.stdin.read()
60 | 
61 |     trees = parser.parse(s)
62 |     positions = []
63 |     for tree in trees:
64 |         visitor = nodevisitor(positions)
65 |         visitor.visit(tree)
66 | 
67 |     # do replacements from the end so the indicies will be correct
68 |     positions.reverse()
69 | 
70 |     postprocessed = list(s)
71 | 
72 |     for start, end in positions:
73 |         # replace the portion of the input where the substitution occurred
74 |         # with the replacement string
75 |         postprocessed[start:end] = args.replacement
76 | 
77 |     print(''.join(postprocessed))
78 | 


--------------------------------------------------------------------------------
/bashlex/flags.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | parser = enum.Enum('parserflags', [
 4 |     'CASEPAT', # in a case pattern list
 5 |     'ALEXPNEXT', # expand next word for aliases
 6 |     'ALLOWOPNBRC', # allow open brace for function def
 7 |     'NEEDCLOSBRC', # need close brace
 8 |     'DBLPAREN', # double-paren parsing
 9 |     'SUBSHELL', # ( ... ) subshell
10 |     'CMDSUBST', # $( ... ) command substitution
11 |     'CASESTMT', # parsing a case statement
12 |     'CONDCMD', # parsing a [[...]] command
13 |     'CONDEXPR', # parsing the guts of [[...]]
14 |     'ARITHFOR', # parsing an arithmetic for command - unused
15 |     'ALEXPAND', # OK to expand aliases - unused
16 |     'EXTPAT', # parsing an extended shell pattern
17 |     'COMPASSIGN', # parsing x=(...) compound assignment
18 |     'ASSIGNOK', # assignment statement ok in this context
19 |     'EOFTOKEN', # yylex checks against shell_eof_token
20 |     'REGEXP', # parsing an ERE/BRE as a single word
21 |     'HEREDOC', # reading body of here-document
22 |     'REPARSE', # re-parsing in parse_string_to_word_list
23 |     'REDIRLIST', # parsing a list of redirections preceding a simple command name
24 |     ])
25 | 
26 | word = enum.Enum('wordflags', [
27 |     'HASDOLLAR', # Dollar sign present
28 |     'QUOTED', # Some form of quote character is present
29 |     'ASSIGNMENT', # This word is a variable assignment
30 |     'SPLITSPACE', # Split this word on " " regardless of IFS
31 |     'NOSPLIT', # Do not perform word splitting on this word because ifs is empty string
32 |     'NOGLOB', # Do not perform globbing on this word
33 |     'NOSPLIT2', # Don't split word except for $@ expansion (using spaces) because context does not allow it
34 |     'TILDEEXP', # Tilde expand this assignment word
35 |     'DOLLARAT', # $@ and its special handling
36 |     'DOLLARSTAR', # $* and its special handling
37 |     'NOCOMSUB', # Don't perform command substitution on this word
38 |     'ASSIGNRHS', # Word is rhs of an assignment statement
39 |     'NOTILDE', # Don't perform tilde expansion on this word
40 |     'ITILDE', # Internal flag for word expansion
41 |     'NOEXPAND', # Don't expand at all -- do quote removal
42 |     'COMPASSIGN', # Compound assignment
43 |     'ASSNBLTIN', # word is a builtin command that takes assignments
44 |     'ASSIGNARG', # word is assignment argument to command
45 |     'HASQUOTEDNULL', # word contains a quoted null character
46 |     'DQUOTE', # word should be treated as if double-quoted
47 |     'NOPROCSUB', # don't perform process substitution
48 |     'HASCTLESC', # word contains literal CTLESC characters
49 |     'ASSIGNASSOC', # word looks like associative array assignment
50 |     'ASSIGNARRAY', # word looks like a compound indexed array assignment
51 |     'ARRAYIND', # word is an array index being expanded
52 |     'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g)
53 |     'NOBRACE', # Don't perform brace expansion
54 |     'ASSIGNINT', # word is an integer assignment to declare
55 |     ])
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # bashlex - Python parser for bash
  2 | 
  3 | [![GitHub Actions status](https://github.com/idank/bashlex/workflows/Test/badge.svg)](https://github.com/idank/bashlex/actions)
  4 | 
  5 | bashlex is a Python port of the parser used internally by GNU bash.
  6 | 
  7 | For the most part it's transliterated from C, the major differences are:
  8 | 
  9 | 1. it does not execute anything
 10 | 2. it is reentrant
 11 | 3. it generates a complete AST
 12 | 
 13 | ## Installation:
 14 | 
 15 |     $ pip install bashlex
 16 | 
 17 | ## Usage
 18 | 
 19 |     $ python
 20 |     >>> import bashlex
 21 |     >>> parts = bashlex.parse('true && cat <(echo $(echo foo))')
 22 |     >>> for ast in parts:
 23 |     ...     print ast.dump()
 24 |     ListNode(pos=(0, 31), parts=[
 25 |       CommandNode(pos=(0, 4), parts=[
 26 |         WordNode(pos=(0, 4), word='true'),
 27 |       ]),
 28 |       OperatorNode(op='&&', pos=(5, 7)),
 29 |       CommandNode(pos=(8, 31), parts=[
 30 |         WordNode(pos=(8, 11), word='cat'),
 31 |         WordNode(pos=(12, 31), word='<(echo $(echo foo))', parts=[
 32 |           ProcesssubstitutionNode(command=
 33 |             CommandNode(pos=(14, 30), parts=[
 34 |               WordNode(pos=(14, 18), word='echo'),
 35 |               WordNode(pos=(19, 30), word='$(echo foo)', parts=[
 36 |                 CommandsubstitutionNode(command=
 37 |                   CommandNode(pos=(21, 29), parts=[
 38 |                     WordNode(pos=(21, 25), word='echo'),
 39 |                     WordNode(pos=(26, 29), word='foo'),
 40 |                   ]), pos=(19, 30)),
 41 |               ]),
 42 |             ]), pos=(12, 31)),
 43 |         ]),
 44 |       ]),
 45 |     ])
 46 | 
 47 | It is also possible to only use the tokenizer and get similar behaviour to
 48 | shlex.split, but bashlex understands more complex constructs such as command
 49 | and process substitutions:
 50 | 
 51 |     >>> list(bashlex.split('cat <(echo "a $(echo b)") | tee'))
 52 |     ['cat', '<(echo "a $(echo b)")', '|', 'tee']
 53 | 
 54 | ..compared to shlex:
 55 | 
 56 |     >>> shlex.split('cat <(echo "a $(echo b)") | tee')
 57 |     ['cat', '<(echo', 'a $(echo b))', '|', 'tee']
 58 | 
 59 | The examples/ directory contains a sample script that demonstrate how to
 60 | traverse the ast to do more complicated things.
 61 | 
 62 | ## Limitations
 63 | 
 64 | Currently the parser has no support for:
 65 | 
 66 | - arithmetic expressions $((..))
 67 | - the more complicated parameter expansions such as ${parameter#word} are taken
 68 |   literally and do not produce child nodes
 69 | 
 70 | ## Debugging
 71 | 
 72 | It can be useful to debug bashlex in conjunction to GNU bash, since it's mostly
 73 | a transliteration. Comments in the code sometimes contain line references to
 74 | bash's source code, e.g. `# bash/parse.y L2626`.
 75 | 
 76 |     $ git clone git://git.sv.gnu.org/bash.git
 77 |     $ cd bash
 78 |     $ git checkout df2c55de9c87c2ee8904280d26e80f5c48dd6434 # commit used in
 79 |     translating the code
 80 |     $ ./configure
 81 |     $ make CFLAGS=-g CFLAGS_FOR_BUILD=-g # debug info and don't optimize
 82 |     $ gdb --args ./bash -c 'echo foo'
 83 | 
 84 | Useful things to look at when debugging bash:
 85 | 
 86 | - variables yylval, shell_input_line, shell_input_line_index
 87 | - breakpoint at `yylex` (token numbers to names is in file parser-built)
 88 | - breakpoint at `read_token_word` (corresponds to `bashlex/tokenizer._readtokenword`)
 89 | - `xparse_dolparen, expand_word_internal` (called when parsing $())
 90 | 
 91 | ## Motivation
 92 | 
 93 | I wrote this library for another project of mine, [explainshell](http://www.explainshell.com)
 94 | which needed a new parsing backend to support complex constructs such as
 95 | process/command substitutions.
 96 | 
 97 | ## Releasing a new version
 98 | 
 99 | Suggestion for making a release environment:
100 | 
101 | ```bash
102 | python3 -m venv venv
103 | source venv/bin/activate
104 | pip install -r requirements.txt
105 | ```
106 | 
107 | - `make tests`
108 | - bump version in `setup.py`
109 | - git tag the new commit
110 | - run `python -m build`
111 | - run twine upload dist/*
112 | 
113 | ## License
114 | 
115 | The license for this is the same as that used by GNU bash, GNU GPL v3+.
116 | 


--------------------------------------------------------------------------------
/bashlex/ast.py:
--------------------------------------------------------------------------------
  1 | class node(object):
  2 |     """
  3 |     This class represents a node in the AST built while parsing command lines.
  4 |     It's basically an object container for various attributes, with a slightly
  5 |     specialised representation to make it a little easier to debug the parser.
  6 |     """
  7 | 
  8 |     def __init__(self, **kwargs):
  9 |         assert 'kind' in kwargs
 10 |         self.__dict__.update(kwargs)
 11 | 
 12 |     def dump(self, indent='  '):
 13 |         return _dump(self, indent)
 14 | 
 15 |     def __repr__(self):
 16 |         chunks = []
 17 |         d = dict(self.__dict__)
 18 |         kind = d.pop('kind')
 19 |         for k, v in sorted(d.items()):
 20 |             chunks.append('%s=%r' % (k, v))
 21 |         return '%sNode(%s)' % (kind.title(), ' '.join(chunks))
 22 | 
 23 |     def __eq__(self, other):
 24 |         if not isinstance(other, node):
 25 |             return False
 26 |         return self.__dict__ == other.__dict__
 27 | 
 28 |     def __hash__(self):
 29 |         return hash(tuple(sorted(self.__dict__)))
 30 | 
 31 | class nodevisitor(object):
 32 |     def _visitnode(self, n, *args, **kwargs):
 33 |         k = n.kind
 34 |         self.visitnode(n)
 35 |         return getattr(self, 'visit%s' % k)(n, *args, **kwargs)
 36 | 
 37 |     def visit(self, n):
 38 |         k = n.kind
 39 |         if k == 'operator':
 40 |             self._visitnode(n, n.op)
 41 |         elif k == 'list':
 42 |             dochild = self._visitnode(n, n.parts)
 43 |             if dochild is None or dochild:
 44 |                 for child in n.parts:
 45 |                     self.visit(child)
 46 |         elif k == 'reservedword':
 47 |             self._visitnode(n, n.word)
 48 |         elif k == 'pipe':
 49 |             self._visitnode(n, n.pipe)
 50 |         elif k == 'pipeline':
 51 |             dochild = self._visitnode(n, n.parts)
 52 |             if dochild is None or dochild:
 53 |                 for child in n.parts:
 54 |                     self.visit(child)
 55 |         elif k == 'compound':
 56 |             dochild = self._visitnode(n, n.list, n.redirects)
 57 |             if dochild is None or dochild:
 58 |                 for child in n.list:
 59 |                     self.visit(child)
 60 |                 for child in n.redirects:
 61 |                     self.visit(child)
 62 |         elif k in ('if', 'for', 'while', 'until', 'case', 'pattern'):
 63 |             dochild = self._visitnode(n, n.parts)
 64 |             if dochild is None or dochild:
 65 |                 for child in n.parts:
 66 |                     self.visit(child)
 67 |         elif k == 'command':
 68 |             dochild = self._visitnode(n, n.parts)
 69 |             if dochild is None or dochild:
 70 |                 for child in n.parts:
 71 |                     self.visit(child)
 72 |         elif k == 'function':
 73 |             dochild = self._visitnode(n, n.name, n.body, n.parts)
 74 |             if dochild is None or dochild:
 75 |                 for child in n.parts:
 76 |                     self.visit(child)
 77 |         elif k == 'redirect':
 78 |             dochild = self._visitnode(n, n.input, n.type, n.output, n.heredoc)
 79 |             if dochild is None or dochild:
 80 |                 if isinstance(n.output, node):
 81 |                     self.visit(n.output)
 82 |                 if n.heredoc:
 83 |                     self.visit(n.heredoc)
 84 |         elif k in ('word', 'assignment'):
 85 |             dochild = self._visitnode(n, n.word)
 86 |             if dochild is None or dochild:
 87 |                 for child in n.parts:
 88 |                     self.visit(child)
 89 |         elif k in ('parameter', 'tilde', 'heredoc'):
 90 |             self._visitnode(n, n.value)
 91 |         elif k in ('commandsubstitution', 'processsubstitution'):
 92 |             dochild = self._visitnode(n, n.command)
 93 |             if dochild is None or dochild:
 94 |                 self.visit(n.command)
 95 |         elif k == 'unimplemented':
 96 |             dochild = self._visitnode(n, n.parts)
 97 |             if dochild is None or dochild:
 98 |                 for child in n.parts:
 99 |                     self.visit(child)
100 |         else:
101 |             raise ValueError('unknown node kind %r' % k)
102 |         self.visitnodeend(n)
103 | 
104 |     def visitnode(self, n):
105 |         pass
106 |     def visitnodeend(self, n):
107 |         pass
108 |     def visitoperator(self, n, op):
109 |         pass
110 |     def visitlist(self, n, parts):
111 |         pass
112 |     def visitpipe(self, n, pipe):
113 |         pass
114 |     def visitpipeline(self, n, parts):
115 |         pass
116 |     def visitcompound(self, n, list, redirects):
117 |         pass
118 |     def visitif(self, node, parts):
119 |         pass
120 |     def visitfor(self, node, parts):
121 |         pass
122 |     def visitwhile(self, node, parts):
123 |         pass
124 |     def visituntil(self, node, parts):
125 |         pass
126 |     def visitcommand(self, n, parts):
127 |         pass
128 |     def visitfunction(self, n, name, body, parts):
129 |         pass
130 |     def visitword(self, n, word):
131 |         pass
132 |     def visitassignment(self, n, word):
133 |         pass
134 |     def visitreservedword(self, n, word):
135 |         pass
136 |     def visitparameter(self, n, value):
137 |         pass
138 |     def visittilde(self, n, value):
139 |         pass
140 |     def visitredirect(self, n, input, type, output, heredoc):
141 |         pass
142 |     def visitheredoc(self, n, value):
143 |         pass
144 |     def visitprocesssubstitution(self, n, command):
145 |         pass
146 |     def visitcommandsubstitution(self, n, command):
147 |         pass
148 |     def visitcase(self, node, parts):
149 |         pass
150 |     def visitpattern(self, node, parts):
151 |         pass
152 |     def visitunimplemented(self, node, parts):
153 |         pass
154 | 
155 | 
156 | def _dump(tree, indent='  '):
157 |     def _format(n, level=0):
158 |         if isinstance(n, node):
159 |             d = dict(n.__dict__)
160 |             kind = d.pop('kind')
161 |             if kind == 'list' and level > 0:
162 |                 level = level + 1
163 |             fields = []
164 |             v = d.pop('s', None)
165 |             if v:
166 |                 fields.append(('s', _format(v, level)))
167 |             for k, v in sorted(d.items()):
168 |                 if not v or k == 'parts':
169 |                     continue
170 |                 llevel = level
171 |                 if isinstance(v, node):
172 |                     llevel += 1
173 |                     fields.append((k, '\n' + (indent * llevel) + _format(v, llevel)))
174 |                 else:
175 |                     fields.append((k, _format(v, level)))
176 |             if kind == 'function':
177 |                 fields = [f for f in fields if f[0] not in ('name', 'body')]
178 |             v = d.pop('parts', None)
179 |             if v:
180 |                 fields.append(('parts', _format(v, level)))
181 |             return ''.join([
182 |                 '%sNode' % kind.title(),
183 |                 '(',
184 |                 ', '.join(('%s=%s' % field for field in fields)),
185 |                 ')'])
186 |         elif isinstance(n, list):
187 |             lines = ['[']
188 |             lines.extend((indent * (level + 1) + _format(x, level + 1) + ','
189 |                          for x in n))
190 |             if len(lines) > 1:
191 |                 lines.append(indent * (level) + ']')
192 |             else:
193 |                 lines[-1] += ']'
194 |             return '\n'.join(lines)
195 |         return repr(n)
196 | 
197 |     if not isinstance(tree, node):
198 |         raise TypeError('expected node, got %r' % tree.__class__.__name__)
199 |     return _format(tree)
200 | 
201 | def findfirstkind(parts, kind):
202 |     for i, node in enumerate(parts):
203 |         if node.kind == kind:
204 |             return i
205 |     return -1
206 | 
207 | class posconverter(nodevisitor):
208 |     def __init__(self, string):
209 |         self.string = string
210 | 
211 |     def visitnode(self, node):
212 |         assert hasattr(node, 'pos'), 'node %r is missing pos attr' % node
213 |         start, end = node.__dict__.pop('pos')
214 |         node.s = self.string[start:end]
215 | 
216 | class posshifter(nodevisitor):
217 |     def __init__(self, count):
218 |         self.count = count
219 | 
220 |     def visitnode(self, node):
221 |         #assert node.pos[1] + base <= endlimit
222 |         node.pos = (node.pos[0] + self.count, node.pos[1] + self.count)
223 | 


--------------------------------------------------------------------------------
/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from bashlex import tokenizer, state, flags, errors
  4 | 
  5 | from bashlex.tokenizer import token as t
  6 | from bashlex.tokenizer import tokentype as tt
  7 | 
  8 | tokenize = lambda s: list(tokenizer.tokenizer(s, state.parserstate()))
  9 | 
 10 | hasdollarset = set([flags.word.HASDOLLAR])
 11 | 
 12 | class test_tokenizer(unittest.TestCase):
 13 | 
 14 |     def setUp(self):
 15 |         if not hasattr(self, 'assertRaisesRegex'):
 16 |             self.assertRaisesRegex = self.assertRaisesRegexp
 17 | 
 18 |     def assertTokens(self, s, tokens):
 19 |         result = tokenize(s)
 20 | 
 21 |         # pop the last token if it's a new line since that gets appended
 22 |         # to the input string by default and we don't really care about
 23 |         # that here
 24 |         if result[-1].value == '\n':
 25 |             result.pop()
 26 | 
 27 |         self.assertEqual(result, tokens)
 28 | 
 29 |         for t in tokens:
 30 |             self.assertEqual(str(t.value), s[t.lexpos:t.endlexpos])
 31 | 
 32 |     def test_empty_string(self):
 33 |         self.assertEqual(len(tokenize('')), 0)
 34 | 
 35 |     def test_simple(self):
 36 |         s = 'a b'
 37 |         self.assertTokens(s, [
 38 |                           t(tt.WORD, 'a', [0, 1]),
 39 |                           t(tt.WORD, 'b', [2, 3])])
 40 | 
 41 |     def test_meta(self):
 42 |         s = '!&()<>;&;;&;; |<<-<< <<<>>&&||<&>&<>>|&> &>>|&'
 43 |         self.assertTokens(s, [
 44 |                           t(tt.BANG, '!', [0, 1]),
 45 |                           t(tt.AMPERSAND, '&', [1, 2]),
 46 |                           t(tt.LEFT_PAREN, '(', [2, 3]),
 47 |                           t(tt.RIGHT_PAREN, ')', [3, 4]),
 48 |                           t(tt.LESS_GREATER, '<>', [4, 6]),
 49 |                           t(tt.SEMI_AND, ';&', [6, 8]),
 50 |                           t(tt.SEMI_SEMI_AND, ';;&', [8, 11]),
 51 |                           t(tt.SEMI_SEMI, ';;', [11, 13]),
 52 |                           t(tt.BAR, '|', [14, 15]),
 53 |                           t(tt.LESS_LESS_MINUS, '<<-', [15, 18]),
 54 |                           t(tt.LESS_LESS, '<<', [18, 20]),
 55 |                           t(tt.LESS_LESS_LESS, '<<<', [21, 24]),
 56 |                           t(tt.GREATER_GREATER, '>>', [24, 26]),
 57 |                           t(tt.AND_AND, '&&', [26, 28]),
 58 |                           t(tt.OR_OR, '||', [28, 30]),
 59 |                           t(tt.LESS_AND, '<&', [30, 32]),
 60 |                           t(tt.GREATER_AND, '>&', [32, 34]),
 61 |                           t(tt.LESS_GREATER, '<>', [34, 36]),
 62 |                           t(tt.GREATER_BAR, '>|', [36, 38]),
 63 |                           t(tt.AND_GREATER, '&>', [38, 40]),
 64 |                           t(tt.AND_GREATER_GREATER, '&>>', [41, 44]),
 65 |                           t(tt.BAR_AND, '|&', [44, 46])])
 66 | 
 67 |         s = '<&-'
 68 |         self.assertTokens(s, [
 69 |                           t(tt.LESS_AND, '<&', [0, 2]),
 70 |                           t(tt.DASH, '-', [2, 3])])
 71 | 
 72 |     def test_comment(self):
 73 |         s = '|# foo bar\n'
 74 |         self.assertTokens(s, [
 75 |                           t(tt.BAR, '|', [0, 1])])
 76 | 
 77 |     def test_shellquote(self):
 78 |         s = '"foo"'
 79 |         self.assertTokens(s, [
 80 |                           t(tt.WORD, '"foo"', [0, 5], set([flags.word.QUOTED]))])
 81 | 
 82 |         s = '"foo"bar\'baz\''
 83 |         self.assertTokens(s, [
 84 |                           t(tt.WORD, s, [0, len(s)], set([flags.word.QUOTED]))])
 85 | 
 86 |         self.assertRaises(tokenizer.MatchedPairError,
 87 |                           tokenize,
 88 |                           "'a")
 89 | 
 90 |     def test_shellexp(self):
 91 |         s = '<(foo) bar $(baz) ${a}'
 92 |         self.assertTokens(s, [
 93 |                           t(tt.WORD, '<(foo)', [0, 6], hasdollarset),
 94 |                           t(tt.WORD, 'bar', [7, 10]),
 95 |                           t(tt.WORD, '$(baz)', [11, 17], hasdollarset),
 96 |                           t(tt.WORD, '${a}', [18, 22], hasdollarset)])
 97 | 
 98 |         s = '$"foo" $1'
 99 |         self.assertTokens(s, [
100 |                           t(tt.WORD, '$"foo"', [0, 6], set([flags.word.QUOTED])),
101 |                           t(tt.WORD, '$1', [7, 9], hasdollarset)])
102 | 
103 |     def test_readtokenword(self):
104 |         s = 'a\\"'
105 |         self.assertTokens(s, [
106 |                           t(tt.WORD, 'a\\"', [0, len(s)], set([flags.word.QUOTED]))])
107 | 
108 |     def test_parameter_expansion(self):
109 |         # s = 'a $"foo"'
110 |         # tok = tokenizer.tokenizer(s, state.parserstate())
111 |         # self.assertEqual(list(tok), [t(tt.WORD, 'a'),
112 |         #                               t(tt.WORD, '"foo"', flags=set([flags.word.QUOTED]))])
113 | 
114 |         s = 'a $$'
115 |         self.assertTokens(s, [
116 |                           t(tt.WORD, 'a', [0, 1]),
117 |                           t(tt.WORD, '$$', [2, 4], hasdollarset)])
118 | 
119 |     def test_comsub(self):
120 |         s = 'a $(b)'
121 |         self.assertTokens(s, [
122 |                           t(tt.WORD, 'a', [0, 1]),
123 |                           t(tt.WORD, '$(b)', [2, 6], hasdollarset)])
124 | 
125 |         s = '$("a")'
126 |         self.assertTokens(s, [
127 |                           t(tt.WORD, '$("a")', [0, 6], hasdollarset)])
128 | 
129 |         s = "$($'a')"
130 |         self.assertTokens(s, [
131 |                           t(tt.WORD, "$($'a')", [0, 7], hasdollarset)])
132 | 
133 |         s = '$(a $(b))'
134 |         self.assertTokens(s, [
135 |                           t(tt.WORD, '$(a $(b))', [0, 9], hasdollarset)])
136 | 
137 |         s = '$(a ${b})'
138 |         self.assertTokens(s, [
139 |                           t(tt.WORD, '$(a ${b})', [0, 9], hasdollarset)])
140 | 
141 |         s = '$(a $[b])'
142 |         self.assertTokens(s, [
143 |                           t(tt.WORD, '$(a $[b])', [0, 9], hasdollarset)])
144 | 
145 |         s = '"$(a)"'
146 |         self.assertTokens(s, [
147 |                           t(tt.WORD, '"$(a)"', [0, 6], set([flags.word.HASDOLLAR,
148 |                                                             flags.word.QUOTED]))])
149 | 
150 |         s = 'a $(! b)'
151 |         self.assertTokens(s, [
152 |                           t(tt.WORD, 'a', [0, 1]),
153 |                           t(tt.WORD, '$(! b)', [2, 8], hasdollarset)])
154 | 
155 |         s = '$(!|!||)'
156 |         self.assertTokens(s, [
157 |                           t(tt.WORD, '$(!|!||)', [0, 8], hasdollarset)])
158 | 
159 |         s = '$(a <<EOF)'
160 |         self.assertTokens(s, [
161 |                           t(tt.WORD, '$(a <<EOF)', [0, 10], hasdollarset)])
162 | 
163 |         s = '$(a <b)'
164 |         self.assertTokens(s, [
165 |                           t(tt.WORD, '$(a <b)', [0, 7], hasdollarset)])
166 | 
167 |         s = '$(case ;; esac)'
168 |         self.assertTokens(s, [
169 |                           t(tt.WORD, '$(case ;; esac)', [0, 15], hasdollarset)])
170 | 
171 |         s = '$(case a in (b) c ;; (d) e ;; esac)'
172 |         self.assertTokens(s, [
173 |                           t(tt.WORD, '$(case a in (b) c ;; (d) e ;; esac)',
174 |                             [0, len(s)], hasdollarset)])
175 | 
176 |         s = '$(do )'
177 |         self.assertTokens(s, [
178 |                           t(tt.WORD, '$(do )', [0, len(s)], hasdollarset)])
179 | 
180 |         s = '$((a))'
181 |         self.assertTokens(s, [
182 |                           t(tt.WORD, '$((a))', [0, len(s)], hasdollarset)])
183 | 
184 |         s = '$('
185 |         self.assertRaises(tokenizer.MatchedPairError,
186 |                           tokenize, s)
187 | 
188 |         s = '$(;'
189 |         self.assertRaises(tokenizer.MatchedPairError,
190 |                           tokenize, s)
191 | 
192 |         s = '$(<'
193 |         self.assertRaises(tokenizer.MatchedPairError,
194 |                           tokenize, s)
195 | 
196 |         s = '$(<<'
197 |         self.assertRaises(tokenizer.MatchedPairError,
198 |                           tokenize, s)
199 | 
200 |         s = '$(a\\b)'
201 |         self.assertTokens(s, [
202 |                           t(tt.WORD, '$(a\\b)', [0, len(s)], hasdollarset)])
203 | 
204 |         s = '$(a <<EOF\nb\nEOF)'
205 |         self.assertTokens(s, [
206 |                           t(tt.WORD, '$(a <<EOF\nb\nEOF)', [0, len(s)],
207 |                             hasdollarset)])
208 | 
209 |         s = '$(a <<EOF\nb\nEOF\n)'
210 |         self.assertTokens(s, [
211 |                           t(tt.WORD, '$(a <<EOF\nb\nEOF\n)', [0, len(s)],
212 |                             hasdollarset)])
213 | 
214 |         s = '$(a <<-EOF\nb\nEOF)'
215 |         self.assertTokens(s, [
216 |                           t(tt.WORD, '$(a <<-EOF\nb\nEOF)', [0, len(s)],
217 |                             hasdollarset)])
218 | 
219 |         s = '$(a # comment\n)'
220 |         self.assertTokens(s, [
221 |                           t(tt.WORD, '$(a # comment\n)', [0, len(s)],
222 |                             hasdollarset)])
223 | 
224 |     def test_parsematchedpair(self):
225 |         s = '"`foo`"'
226 |         self.assertTokens(s, [
227 |                           t(tt.WORD, '"`foo`"', [0, len(s)], set([flags.word.QUOTED]))])
228 | 
229 |         s = '"${a}"'
230 |         self.assertTokens(s, [
231 |                           t(tt.WORD, '"${a}"', [0, len(s)], set([flags.word.HASDOLLAR,
232 |                                                                 flags.word.QUOTED]))])
233 | 
234 |         s = '${\'a\'}'
235 |         self.assertTokens(s, [
236 |                           t(tt.WORD, '${\'a\'}', [0, len(s)], hasdollarset)])
237 | 
238 |         s = '${$\'a\'}'
239 |         self.assertTokens(s, [
240 |                           t(tt.WORD, '${$\'a\'}', [0, len(s)], hasdollarset)])
241 | 
242 |         s = "'a\\'"
243 |         self.assertTokens(s, [
244 |                           t(tt.WORD, "'a\\'", [0, len(s)], set([flags.word.QUOTED]))])
245 | 
246 |         #s = '"\\\n"'
247 |         #self.assertEqual(tokenize(s), [
248 |         #                  t(tt.WORD, '"\\a"', flags=set([flags.word.QUOTED]))])
249 | 
250 |     def test_assignment(self):
251 |         s = 'a=b'
252 |         self.assertTokens(s, [
253 |                           t(tt.ASSIGNMENT_WORD, 'a=b', [0, 3],
254 |                             flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])
255 | 
256 |         s = 'a+=b'
257 |         self.assertTokens(s, [
258 |                           t(tt.ASSIGNMENT_WORD, 'a+=b', [0, 4],
259 |                             flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])
260 | 
261 |         s = 'a2=b'
262 |         self.assertTokens(s, [
263 |                           t(tt.ASSIGNMENT_WORD, 'a2=b', [0, 4],
264 |                             flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])
265 | 
266 |         s = '2a=b'  # This should be a word, not an assignment. bash doesn't allow 1st char to be int
267 |         self.assertTokens(s, [
268 |                           t(tt.WORD, '2a=b', [0, 4])])
269 | 
270 |     def test_plus_at_end_of_word(self):
271 |         s = 'a+ b'
272 |         self.assertTokens(s, [
273 |                           t(tt.WORD, 'a+', [0, 2]),
274 |                           t(tt.WORD, 'b', [3, 4])])
275 | 
276 |     def test_heredoc(self):
277 |         s = 'a <<EOF'
278 |         self.assertTokens(s, [
279 |                           t(tt.WORD, 'a', [0, 1]),
280 |                           t(tt.LESS_LESS, '<<', [2, 4]),
281 |                           t(tt.WORD, 'EOF', [4, 7])])
282 | 
283 |     def test_herestring(self):
284 |         s = 'a <<<foo'
285 |         self.assertTokens(s, [
286 |                           t(tt.WORD, 'a', [0, 1]),
287 |                           t(tt.LESS_LESS_LESS, '<<<', [2, 5]),
288 |                           t(tt.WORD, 'foo', [5, 8])])
289 | 
290 |         s = 'a <<<"b\nc"'
291 |         self.assertTokens(s, [
292 |                           t(tt.WORD, 'a', [0, 1]),
293 |                           t(tt.LESS_LESS_LESS, '<<<', [2, 5]),
294 |                           t(tt.WORD, '"b\nc"', [5, 10], set([flags.word.QUOTED]))])
295 | 
296 |     def test_foo(self):
297 |         s = 'c)'
298 |         self.assertTokens(s, [
299 |                           t(tt.WORD, 'c', [0, 1]),
300 |                           t(tt.RIGHT_PAREN, ')', [1, 2])])
301 | 
302 |     def test_redirections(self):
303 |         s = '1>'
304 |         self.assertTokens(s, [
305 |                           t(tt.NUMBER, 1, [0, 1]),
306 |                           t(tt.GREATER, '>', [1, 2])])
307 |         s = '$<$(b)'
308 |         self.assertTokens(s, [
309 |                           t(tt.WORD, '$', [0, 1], hasdollarset),
310 |                           t(tt.LESS, '<', [1, 2]),
311 |                           t(tt.WORD, '$(b)', [2, 6], hasdollarset)])
312 | 
313 |     def test_quote_error(self):
314 |         s = "a 'b"
315 |         msg = "EOF.*matching \"'\" \\(position 4"
316 |         self.assertRaisesRegex(errors.ParsingError, msg, tokenize, s)
317 | 
318 |     def test_escape_error(self):
319 |         return # TODO
320 | 
321 |         s = "a b\\"
322 | 
323 |         self.assertRaisesRegex(errors.ParsingError, "No escaped character.*position 2", tokenize, s)
324 | 
325 |     def test_tokenize(self):
326 |         s = 'bar -x'
327 |         self.assertTokens(s, [
328 |                           t(tt.WORD, 'bar', [0, 3]),
329 |                           t(tt.WORD, '-x', [4, 6])])
330 | 
331 |         s = 'wx    y =z '
332 |         self.assertTokens(s, [
333 |                           t(tt.WORD, 'wx', [0, 2]),
334 |                           t(tt.WORD, 'y', [6, 7]),
335 |                           t(tt.WORD, '=z', [8, 10])])
336 | 
337 |         s = "a 'b' c"
338 |         self.assertTokens(s, [
339 |                           t(tt.WORD, 'a', [0, 1]),
340 |                           t(tt.WORD, "'b'", [2, 5], set([flags.word.QUOTED])),
341 |                           t(tt.WORD, 'c', [6, 7])])
342 | 
343 |         s = "a 'b  ' c"
344 |         self.assertTokens(s, [
345 |                           t(tt.WORD, 'a', [0, 1]),
346 |                           t(tt.WORD, "'b  '", [2, 7], set([flags.word.QUOTED])),
347 |                           t(tt.WORD, 'c', [8, 9])])
348 | 
349 |     def test_escaped_newline(self):
350 |         s= """a \\\nb"""
351 |         self.assertTokens(s, [
352 |             t(tt.WORD, 'a', [0, 1]),
353 |             t(tt.WORD, 'b', [4, 5])
354 |         ])
355 | 


--------------------------------------------------------------------------------
/bashlex/subst.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | from bashlex import ast, flags, tokenizer, errors
  4 | 
  5 | def _recursiveparse(parserobj, base, sindex, tokenizerargs=None):
  6 |     # TODO: fix this hack that prevents mutual import
  7 |     from bashlex import parser
  8 | 
  9 |     tok = parserobj.tok
 10 | 
 11 |     if tokenizerargs is None:
 12 |         tokenizerargs = {'parserstate' : copy.copy(tok._parserstate),
 13 |                          'lastreadtoken' : tok._last_read_token,
 14 |                          'tokenbeforethat' : tok._token_before_that,
 15 |                          'twotokensago' : tok._two_tokens_ago}
 16 | 
 17 |     string = base[sindex:]
 18 |     newlimit = parserobj._expansionlimit
 19 |     if newlimit is not None:
 20 |         newlimit -= 1
 21 |     p = parser._parser(string, tokenizerargs=tokenizerargs,
 22 |                        expansionlimit=newlimit)
 23 |     node = p.parse()
 24 | 
 25 |     endp = node.pos[1]
 26 |     _adjustpositions(node, sindex, len(base))
 27 | 
 28 |     return node, endp
 29 | 
 30 | def _parsedolparen(parserobj, base, sindex):
 31 |     copiedps = copy.copy(parserobj.parserstate)
 32 |     copiedps.add(flags.parser.CMDSUBST)
 33 |     copiedps.add(flags.parser.EOFTOKEN)
 34 |     string = base[sindex:]
 35 | 
 36 |     tokenizerargs = {'eoftoken' : tokenizer.token(tokenizer.tokentype.RIGHT_PAREN, ')'),
 37 |                      'parserstate' : copiedps,
 38 |                      'lastreadtoken' : parserobj.tok._last_read_token,
 39 |                      'tokenbeforethat' : parserobj.tok._token_before_that,
 40 |                      'twotokensago' : parserobj.tok._two_tokens_ago}
 41 | 
 42 |     node, endp = _recursiveparse(parserobj, base, sindex, tokenizerargs)
 43 | 
 44 |     if string[endp] != ')':
 45 |         while endp > 0 and string[endp-1] == '\n':
 46 |             endp -= 1
 47 | 
 48 |     return node, sindex + endp
 49 | 
 50 | def _extractcommandsubst(parserobj, string, sindex, sxcommand=False):
 51 |     if string[sindex] == '(':
 52 |         raise NotImplementedError('arithmetic expansion')
 53 |         #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True)
 54 |     else:
 55 |         node, si = _parsedolparen(parserobj, string, sindex)
 56 |         si += 1
 57 |         return ast.node(kind='commandsubstitution', command=node, pos=(sindex-2, si)), si
 58 | 
 59 | def _extractprocesssubst(parserobj, string, sindex):
 60 |     #return _extractdelimitedstring(tok, string, sindex, starter, '(', ')', sxcommand=True)
 61 |     node, si = _parsedolparen(parserobj, string, sindex)
 62 |     return node, si + 1
 63 | 
 64 | #def _extractdelimitedstring(parserobj, string, sindex, opener, altopener, closer,
 65 | #                            sxcommand=False):
 66 | #    parts = []
 67 | #    incomment = False
 68 | #    passchar = False
 69 | #    nestinglevel = 1
 70 | #    i = sindex
 71 | 
 72 | #    while nestinglevel:
 73 | #        if i >= len(string):
 74 | #            break
 75 | #        c = string[i]
 76 | #        if incomment:
 77 | #            if c == '\n':
 78 | #                incomment = False
 79 | #            i += 1
 80 | #            continue
 81 | #        elif passchar:
 82 | #            passchar = False
 83 | #            i += 1
 84 | #            continue
 85 | 
 86 | #        if sxcommand and c == '#' and (i == 0 or string[i-1] == '\n' or
 87 | #                                       tokenizer._shellblank(string[i-1])):
 88 | #            incomment = True
 89 | #            i += 1
 90 | #            continue
 91 | 
 92 | #        if c == '\\':
 93 | #            passchar = True
 94 | #            i += 1
 95 | #            continue
 96 | 
 97 | #        if sxcommand and string[i:i+2] == '$(':
 98 | #            si = i + 2
 99 | #            node, si = _extractcommandsubst(parserobj, string, si, sxcommand=sxcommand)
100 | #            parts.append(node)
101 | #            i = si + 1
102 | #            continue
103 | 
104 | #        if string.startswith(opener, i):
105 | #            si = i + len(opener)
106 | #            nodes, si = _extractdelimitedstring(parserobj, string, si, opener, altopener,
107 | #                                                closer, sxcommand=sxcommand)
108 | #            parts.extend(nodes)
109 | #            i = si + 1
110 | #            continue
111 | 
112 | #        if string.startswith(altopener, i):
113 | #            si = i + len(altopener)
114 | #            nodes, si = _extractdelimitedstring(parserobj, string, si, altopener, altopener,
115 | #                                                closer, sxcommand=sxcommand)
116 | #            parts.extend(nodes)
117 | #            i = si + 1
118 | #            continue
119 | 
120 | #        # 1327
121 | #        if string.startswith(closer, i):
122 | #            i += len(closer) - 1
123 | #            nestinglevel -= 1
124 | #            if nestinglevel == 0:
125 | #                break
126 | 
127 | #        if c == '`':
128 | #            si = i + 1
129 | #            t = _stringextract(string, si, '`', sxcommand=sxcommand)
130 | #            i = si + 1
131 | #            continue
132 | 
133 | #        if c in "'\"":
134 | #            si = i +1
135 | #            if c == '"':
136 | #                i = _skipsinglequoted(string, si)
137 | #            else:
138 | #                i = _skipdoublequoted(string, si)
139 | #            continue
140 | 
141 | #        i += 1
142 | 
143 | #    if i == len(string) and nestinglevel:
144 | #        raise errors.ParsingError('bad substitution: no closing %r in %s' % (closer, string))
145 | 
146 | #    return parts, i
147 | 
148 | def _paramexpand(parserobj, string, sindex):
149 |     node = None
150 |     zindex = sindex + 1
151 |     c = string[zindex] if zindex < len(string) else None
152 |     if c and c in '0123456789$#?-!*@':
153 |         # XXX 7685
154 |         node = ast.node(kind='parameter', value=c,
155 |                         pos=(sindex, zindex+1))
156 |     elif c == '{':
157 |         # XXX 7863
158 |         # TODO not start enough, doesn't consider escaping
159 |         zindex = string.find('}', zindex + 1)
160 |         node = ast.node(kind='parameter', value=string[sindex+2:zindex],
161 |                         pos=(sindex, zindex+1))
162 |         # TODO
163 |         # return _parameterbraceexpand(string, zindex)
164 |     elif c == '(':
165 |         return _extractcommandsubst(parserobj, string, zindex + 1)
166 |     elif c == '[':
167 |         raise NotImplementedError('arithmetic substitution')
168 |         #return _extractarithmeticsubst(string, zindex + 1)
169 |     else:
170 |         tindex = zindex
171 |         for zindex in range(tindex, len(string) + 1):
172 |             if zindex == len(string):
173 |                 break
174 |             if not string[zindex].isalnum() and not string[zindex] == '_':
175 |                 break
176 |         temp1 = string[sindex:zindex]
177 |         if temp1:
178 |             return (ast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)),
179 |                     zindex)
180 | 
181 |     if zindex < len(string):
182 |         zindex += 1
183 | 
184 |     return node, zindex
185 | 
186 | def _adjustpositions(node_, base, endlimit):
187 |     class v(ast.nodevisitor):
188 |         def visitnode(self, node):
189 |             assert node.pos[1] + base <= endlimit
190 |             node.pos = (node.pos[0] + base, node.pos[1] + base)
191 |     visitor = v()
192 |     visitor.visit(node_)
193 | 
194 | def _expandwordinternal(parserobj, wordtoken, qheredocument, qdoublequotes, quoted, isexp):
195 |     # bash/subst.c L8132
196 |     istring = ''
197 |     parts = []
198 |     tindex = [0]
199 |     sindex = [0]
200 |     string = wordtoken.value
201 |     def nextchar():
202 |         sindex[0] += 1
203 |         if sindex[0] < len(string):
204 |             return string[sindex[0]]
205 |     def peekchar():
206 |         if sindex[0]+1 < len(string):
207 |             return string[sindex[0]+1]
208 | 
209 |     while True:
210 |         if sindex[0] == len(string):
211 |             break
212 |             # goto finished_with_string
213 |         c = string[sindex[0]]
214 |         if c in '<>':
215 |             if (nextchar() != '(' or qheredocument or qdoublequotes or
216 |                 (wordtoken.flags & set([flags.word.DQUOTE, flags.word.NOPROCSUB]))):
217 |                 sindex[0] -= 1
218 | 
219 |                 # goto add_character
220 |                 sindex[0] += 1
221 |                 istring += c
222 |             else:
223 |                 tindex = sindex[0] + 1
224 | 
225 |                 node, sindex[0] = _extractprocesssubst(parserobj, string, tindex)
226 | 
227 |                 parts.append(ast.node(kind='processsubstitution', command=node,
228 |                                       pos=(tindex - 2, sindex[0])))
229 |                 istring += string[tindex - 2:sindex[0]]
230 |                 # goto dollar_add_string
231 |         # TODO
232 |         # elif c == '=':
233 |         #     pass
234 |         # elif c == ':':
235 |         #     pass
236 |         elif c == '~':
237 |             if (wordtoken.flags & set([flags.word.NOTILDE, flags.word.DQUOTE]) or
238 |                 (sindex[0] > 0 and not (wordtoken.flags & flags.word.NOTILDE)) or
239 |                 qdoublequotes or qheredocument):
240 |                 wordtoken.flags.clear()
241 |                 wordtoken.flags.add(flags.word.ITILDE)
242 |                 sindex[0] += 1
243 |                 istring += c
244 |             else:
245 |                 stopatcolon = wordtoken.flags & set([flags.word.ASSIGNRHS,
246 |                                                     flags.word.ASSIGNMENT,
247 |                                                     flags.word.TILDEEXP])
248 |                 expand = True
249 |                 for i in range(sindex[0], len(string)):
250 |                     r = string[i]
251 |                     if r == '/':
252 |                         break
253 |                     if r in "\\'\"":
254 |                         expand = False
255 |                         break
256 |                     if stopatcolon and r == ':':
257 |                         break
258 |                 else:
259 |                     # go one past the end if we didn't exit early
260 |                     i += 1
261 | 
262 |                 if i > sindex[0] and expand:
263 |                     node = ast.node(kind='tilde', value=string[sindex[0]:i],
264 |                                     pos=(sindex[0], i))
265 |                     parts.append(node)
266 |                 istring += string[sindex[0]:i]
267 |                 sindex[0] = i
268 | 
269 |         elif c == '$' and len(string) > 1:
270 |             tindex = sindex[0]
271 |             node, sindex[0] = _paramexpand(parserobj, string, sindex[0])
272 |             if node:
273 |                 parts.append(node)
274 |             istring += string[tindex:sindex[0]]
275 |         elif c == '`':
276 |             tindex = sindex[0]
277 |             # bare instance of ``
278 |             if nextchar() == '`':
279 |                 sindex[0] += 1
280 |                 istring += '``'
281 |             else:
282 |                 x = _stringextract(string, sindex[0], "`")
283 |                 if x == -1:
284 |                     raise errors.ParsingError('bad substitution: no closing "`" '
285 |                                               'in %s' % string)
286 |                 else:
287 |                     if wordtoken.flags & flags.word.NOCOMSUB:
288 |                         pass
289 |                     else:
290 |                         sindex[0] = x
291 | 
292 |                         word = string[tindex+1:sindex[0]]
293 |                         command, ttindex = _recursiveparse(parserobj, word, 0)
294 |                         _adjustpositions(command, tindex+1, len(string))
295 |                         ttindex += 1 # ttindex is on the closing char
296 | 
297 |                         # assert sindex[0] == ttindex
298 |                         # go one past the closing `
299 |                         sindex[0] += 1
300 | 
301 |                         node = ast.node(kind='commandsubstitution',
302 |                                         command=command,
303 |                                         pos=(tindex, sindex[0]))
304 |                         parts.append(node)
305 |                         istring += string[tindex:sindex[0]]
306 | 
307 |         elif c == '\\':
308 |             istring += string[sindex[0]+1:sindex[0]+2]
309 |             sindex[0] += 2
310 |         elif c == '"':
311 |             sindex[0] += 1
312 |             continue
313 | 
314 |             # 8513
315 |             #if qdoublequotes or qheredocument:
316 |             #    sindex[0] += 1
317 |             #else:
318 |             #    tindex = sindex[0] + 1
319 |             #    parts, sindex[0] = _stringextractdoublequoted(string, sindex[0])
320 |             #    if tindex == 1 and sindex[0] == len(string):
321 |             #        quotedstate = 'wholly'
322 |             #    else:
323 |             #        quotedstate = 'partially'
324 | 
325 |         elif c == "'":
326 |             # entire string surronded by single quotes, no expansion is
327 |             # going to happen
328 |             if sindex[0] == 0 and string[-1] == "'":
329 |                 return [], string[1:-1]
330 | 
331 |             # check if we're inside double quotes
332 |             if not qdoublequotes:
333 |                 # look for the closing ', we know we have one or otherwise
334 |                 # this wouldn't tokenize due to unmatched '
335 |                 tindex = sindex[0]
336 |                 sindex[0] = string.find("'", sindex[0]) + 1
337 | 
338 |                 istring += string[tindex+1:sindex[0]-1]
339 |             else:
340 |                 # this is a single quote inside double quotes, add it
341 |                 istring += c
342 |                 sindex[0] += 1
343 |         else:
344 |             istring += string[sindex[0]:sindex[0]+1]
345 |             sindex[0] += 1
346 | 
347 |     if parts:
348 |         class v(ast.nodevisitor):
349 |             def visitnode(self, node):
350 |                 assert node.pos[1] + wordtoken.lexpos <= wordtoken.endlexpos
351 |                 node.pos = (node.pos[0] + wordtoken.lexpos,
352 |                             node.pos[1] + wordtoken.lexpos)
353 |         visitor = v()
354 |         for node in parts:
355 |             visitor.visit(node)
356 | 
357 |     return parts, istring
358 | 
359 | def _stringextract(string, sindex, charlist, sxvarname=False):
360 |     found = False
361 |     i = sindex
362 |     while i < len(string):
363 |         c = string[i]
364 |         if c == '\\':
365 |             if i + 1 < len(string):
366 |                 i += 1
367 |             else:
368 |                 break
369 |         elif sxvarname and c == '[':
370 |             ni = _skipsubscript(string, i, 0)
371 |             if string[ni] == ']':
372 |                 i = ni
373 |         elif c in charlist:
374 |             found = True
375 |             break
376 |         else:
377 |             i += 1
378 |     if found:
379 |         return i
380 |     else:
381 |         return -1
382 | 


--------------------------------------------------------------------------------
/bashlex/parser.py:
--------------------------------------------------------------------------------
  1 | import os, copy
  2 | 
  3 | from bashlex import yacc, tokenizer, state, ast, subst, flags, errors, heredoc
  4 | 
  5 | def _partsspan(parts):
  6 |     return parts[0].pos[0], parts[-1].pos[1]
  7 | 
  8 | tokens = [e.name for e in tokenizer.tokentype]
  9 | precedence = (
 10 |     ('left', 'AMPERSAND', 'SEMICOLON', 'NEWLINE', 'EOF'),
 11 |     ('left', 'AND_AND', 'OR_OR'),
 12 |     ('right', 'BAR', 'BAR_AND')
 13 | )
 14 | 
 15 | def handleNotImplemented(p, type):
 16 |     if p.context._proceedonerror:
 17 |         parts = _makeparts(p)
 18 |         p[0] = ast.node(kind='unimplemented', parts=parts, pos=_partsspan(parts))
 19 |         return
 20 |     if len(p) == 2:
 21 |         raise NotImplementedError('type = {%s}, token = {%s}' % (type, p[1]))
 22 |     else:
 23 |         raise NotImplementedError('type = {%s}, token = {%s}, parts = {%s}' % (type, p[1], p[2]))
 24 | 
 25 | def handleAssert(p, test):
 26 |     if not test:
 27 |         raise AssertionError('token = {%s}' % p[1])
 28 | 
 29 | def p_inputunit(p):
 30 |     '''inputunit : simple_list simple_list_terminator
 31 |                  | NEWLINE
 32 |                  | error NEWLINE
 33 |                  | EOF'''
 34 |     # XXX
 35 |     if p.lexer._parserstate & flags.parser.CMDSUBST:
 36 |         p.lexer._parserstate.add(flags.parser.EOFTOKEN)
 37 | 
 38 |     if isinstance(p[1], ast.node):
 39 |         p[0] = p[1]
 40 |         # accept right here in case the input contains more lines that are
 41 |         # not part of the current command
 42 |         p.accept()
 43 | 
 44 | def p_word_list(p):
 45 |     '''word_list : WORD
 46 |                  | word_list WORD'''
 47 |     parserobj = p.context
 48 |     if len(p) == 2:
 49 |         p[0] = [_expandword(parserobj, p.slice[1])]
 50 |     else:
 51 |         p[0] = p[1]
 52 |         p[0].append(_expandword(parserobj, p.slice[2]))
 53 | 
 54 | def p_redirection_heredoc(p):
 55 |     '''redirection : LESS_LESS WORD
 56 |                    | NUMBER LESS_LESS WORD
 57 |                    | REDIR_WORD LESS_LESS WORD
 58 |                    | LESS_LESS_MINUS WORD
 59 |                    | NUMBER LESS_LESS_MINUS WORD
 60 |                    | REDIR_WORD LESS_LESS_MINUS WORD'''
 61 |     parserobj = p.context
 62 |     assert isinstance(parserobj, _parser)
 63 | 
 64 |     output = ast.node(kind='word', word=p[len(p)-1], parts=[],
 65 |                       pos=p.lexspan(len(p)-1))
 66 |     if len(p) == 3:
 67 |         p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None,
 68 |                         output=output, pos=(p.lexpos(1), p.endlexpos(2)))
 69 |     else:
 70 |         p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
 71 |                         output=output, pos=(p.lexpos(1), p.endlexpos(3)))
 72 | 
 73 |     if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS:
 74 |         parserobj.redirstack.append((p[0], False))
 75 |     else:
 76 |         parserobj.redirstack.append((p[0], True))
 77 | 
 78 | def p_redirection(p):
 79 |     '''redirection : GREATER WORD
 80 |                    | LESS WORD
 81 |                    | NUMBER GREATER WORD
 82 |                    | NUMBER LESS WORD
 83 |                    | REDIR_WORD GREATER WORD
 84 |                    | REDIR_WORD LESS WORD
 85 |                    | GREATER_GREATER WORD
 86 |                    | NUMBER GREATER_GREATER WORD
 87 |                    | REDIR_WORD GREATER_GREATER WORD
 88 |                    | GREATER_BAR WORD
 89 |                    | NUMBER GREATER_BAR WORD
 90 |                    | REDIR_WORD GREATER_BAR WORD
 91 |                    | LESS_GREATER WORD
 92 |                    | NUMBER LESS_GREATER WORD
 93 |                    | REDIR_WORD LESS_GREATER WORD
 94 |                    | LESS_LESS_LESS WORD
 95 |                    | NUMBER LESS_LESS_LESS WORD
 96 |                    | REDIR_WORD LESS_LESS_LESS WORD
 97 |                    | LESS_AND NUMBER
 98 |                    | NUMBER LESS_AND NUMBER
 99 |                    | REDIR_WORD LESS_AND NUMBER
100 |                    | GREATER_AND NUMBER
101 |                    | NUMBER GREATER_AND NUMBER
102 |                    | REDIR_WORD GREATER_AND NUMBER
103 |                    | LESS_AND WORD
104 |                    | NUMBER LESS_AND WORD
105 |                    | REDIR_WORD LESS_AND WORD
106 |                    | GREATER_AND WORD
107 |                    | NUMBER GREATER_AND WORD
108 |                    | REDIR_WORD GREATER_AND WORD
109 |                    | GREATER_AND DASH
110 |                    | NUMBER GREATER_AND DASH
111 |                    | REDIR_WORD GREATER_AND DASH
112 |                    | LESS_AND DASH
113 |                    | NUMBER LESS_AND DASH
114 |                    | REDIR_WORD LESS_AND DASH
115 |                    | AND_GREATER WORD
116 |                    | AND_GREATER_GREATER WORD'''
117 |     parserobj = p.context
118 |     if len(p) == 3:
119 |         output = p[2]
120 |         if p.slice[2].ttype == tokenizer.tokentype.WORD:
121 |             output = _expandword(parserobj, p.slice[2])
122 |         p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None,
123 |                         output=output, pos=(p.lexpos(1), p.endlexpos(2)))
124 |     else:
125 |         output = p[3]
126 |         if p.slice[3].ttype == tokenizer.tokentype.WORD:
127 |             output = _expandword(parserobj, p.slice[3])
128 |         p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
129 |                         output=output, pos=(p.lexpos(1), p.endlexpos(3)))
130 | 
131 | def _expandword(parser, tokenword):
132 |     if parser._expansionlimit == -1:
133 |         # we enter this branch in the following conditions:
134 |         # - currently parsing a substitution as a result of an expansion
135 |         # - the previous expansion had limit == 0
136 |         #
137 |         # this means that this node is a descendant of a substitution in an
138 |         # unexpanded word and will be filtered in the limit == 0 condition below
139 |         #
140 |         # (the reason we even expand when limit == 0 is to get quote removal)
141 |         node = ast.node(kind='word', word=tokenword,
142 |                         pos=(tokenword.lexpos, tokenword.endlexpos), parts=[])
143 |         return node
144 |     else:
145 |         quoted = bool(tokenword.flags & flags.word.QUOTED)
146 |         doublequoted = quoted and tokenword.value[0] == '"'
147 | 
148 |         # TODO set qheredocument
149 |         parts, expandedword = subst._expandwordinternal(parser,
150 |                                                         tokenword, 0,
151 |                                                         doublequoted, 0, 0)
152 | 
153 |         # limit reached, don't include substitutions (still expanded to get
154 |         # quote removal though)
155 |         if parser._expansionlimit == 0:
156 |             parts = [node for node in parts if 'substitution' not in node.kind]
157 | 
158 |         node = ast.node(kind='word', word=expandedword,
159 |                         pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts)
160 |         return node
161 | 
162 | def p_simple_command_element(p):
163 |     '''simple_command_element : WORD
164 |                               | ASSIGNMENT_WORD
165 |                               | redirection'''
166 |     if isinstance(p[1], ast.node):
167 |         p[0] = [p[1]]
168 |         return
169 | 
170 |     parserobj = p.context
171 |     p[0] = [_expandword(parserobj, p.slice[1])]
172 | 
173 |     # change the word node to an assignment if necessary
174 |     if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD:
175 |         p[0][0].kind = 'assignment'
176 | 
177 | def p_redirection_list(p):
178 |     '''redirection_list : redirection
179 |                         | redirection_list redirection'''
180 |     if len(p) == 2:
181 |         p[0] = [p[1]]
182 |     else:
183 |         p[0] = p[1]
184 |         p[0].append(p[2])
185 | 
186 | def p_simple_command(p):
187 |     '''simple_command : simple_command_element
188 |                       | simple_command simple_command_element'''
189 | 
190 |     p[0] = p[1]
191 |     if len(p) == 3:
192 |         p[0].extend(p[2])
193 | 
194 | def p_command(p):
195 |     '''command : simple_command
196 |                | shell_command
197 |                | shell_command redirection_list
198 |                | function_def
199 |                | coproc'''
200 |     if isinstance(p[1], ast.node):
201 |         p[0] = p[1]
202 |         if len(p) == 3:
203 |             handleAssert(p, p[0].kind == 'compound')
204 |             p[0].redirects.extend(p[2])
205 |             handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1])
206 |             p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1])
207 |     else:
208 |         p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1]))
209 | 
210 | def p_shell_command(p):
211 |     '''shell_command : for_command
212 |                      | case_command
213 |                      | WHILE compound_list DO compound_list DONE
214 |                      | UNTIL compound_list DO compound_list DONE
215 |                      | select_command
216 |                      | if_command
217 |                      | subshell
218 |                      | group_command
219 |                      | arith_command
220 |                      | cond_command
221 |                      | arith_for_command'''
222 |     if len(p) == 2:
223 |         p[0] = p[1]
224 |     else:
225 |         # while or until
226 |         handleAssert(p, p[2].kind == 'list')
227 | 
228 |         parts = _makeparts(p)
229 |         kind = parts[0].word
230 |         assert kind in ('while', 'until')
231 |         p[0] = ast.node(kind='compound',
232 |                         redirects=[],
233 |                         list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))],
234 |                         pos=_partsspan(parts))
235 | 
236 |     handleAssert(p, p[0].kind == 'compound')
237 | 
238 | def _makeparts(p):
239 |     parts = []
240 |     for i in range(1, len(p)):
241 |         if isinstance(p[i], ast.node):
242 |             parts.append(p[i])
243 |         elif isinstance(p[i], list):
244 |             parts.extend(p[i])
245 |         elif isinstance(p.slice[i], tokenizer.token):
246 |             if p.slice[i].ttype == tokenizer.tokentype.WORD:
247 |                 parserobj = p.context
248 |                 parts.append(_expandword(parserobj, p.slice[i]))
249 |             else:
250 |                 parts.append(ast.node(kind='reservedword', word=p[i],
251 |                                       pos=p.lexspan(i)))
252 |         else:
253 |             pass
254 | 
255 |     return parts
256 | 
257 | def p_for_command(p):
258 |     '''for_command : FOR WORD newline_list DO compound_list DONE
259 |                    | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY
260 |                    | FOR WORD SEMICOLON newline_list DO compound_list DONE
261 |                    | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY
262 |                    | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE
263 |                    | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY
264 |                    | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE
265 |                    | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY'''
266 |     parts = _makeparts(p)
267 |     # find the operatornode that we might have there due to
268 |     # list_terminator/newline_list and convert it to a reservedword so its
269 |     # considered as part of the for loop
270 |     for i, part in enumerate(parts):
271 |         if part.kind == 'operator' and part.op == ';':
272 |             parts[i] = ast.node(kind='reservedword', word=';', pos=part.pos)
273 |             break # there could be only one in there...
274 | 
275 |     p[0] = ast.node(kind='compound',
276 |                     redirects=[],
277 |                     list=[ast.node(kind='for', parts=parts, pos=_partsspan(parts))],
278 |                     pos=_partsspan(parts))
279 | 
280 | def p_arith_for_command(p):
281 |     '''arith_for_command : FOR ARITH_FOR_EXPRS list_terminator newline_list DO compound_list DONE
282 |                          | FOR ARITH_FOR_EXPRS list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY
283 |                          | FOR ARITH_FOR_EXPRS DO compound_list DONE
284 |                          | FOR ARITH_FOR_EXPRS LEFT_CURLY compound_list RIGHT_CURLY'''
285 |     handleNotImplemented(p, 'arithmetic for')
286 | 
287 | def p_select_command(p):
288 |     '''select_command : SELECT WORD newline_list DO list DONE
289 |                       | SELECT WORD newline_list LEFT_CURLY list RIGHT_CURLY
290 |                       | SELECT WORD SEMICOLON newline_list DO list DONE
291 |                       | SELECT WORD SEMICOLON newline_list LEFT_CURLY list RIGHT_CURLY
292 |                       | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE
293 |                       | SELECT WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY list RIGHT_CURLY'''
294 |     handleNotImplemented(p, 'select command')
295 | 
296 | def p_case_command(p):
297 |     '''case_command : CASE WORD newline_list IN newline_list ESAC
298 |                     | CASE WORD newline_list IN case_clause_sequence newline_list ESAC
299 |                     | CASE WORD newline_list IN case_clause ESAC'''
300 |     parts = _makeparts(p)
301 |     p[0] = ast.node(kind='compound',
302 |                     redirects=[],
303 |                     list=[ast.node(kind='case', parts=parts, pos=_partsspan(parts))],
304 |                     pos=_partsspan(parts))
305 | 
306 | def p_function_def(p):
307 |     '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
308 |                     | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
309 |                     | FUNCTION WORD newline_list function_body'''
310 |     parts = _makeparts(p)
311 |     body = parts[-1]
312 |     name = parts[ast.findfirstkind(parts, 'word')]
313 | 
314 |     p[0] = ast.node(kind='function', name=name, body=body, parts=parts,
315 |                     pos=_partsspan(parts))
316 | 
317 | def p_function_body(p):
318 |     '''function_body : shell_command
319 |                      | shell_command redirection_list'''
320 |     handleAssert(p, p[1].kind == 'compound')
321 | 
322 |     p[0] = p[1]
323 |     if len(p) == 3:
324 |         p[0].redirects.extend(p[2])
325 |         handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1])
326 |         p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1])
327 | 
328 | def p_subshell(p):
329 |     '''subshell : LEFT_PAREN compound_list RIGHT_PAREN'''
330 |     lparen = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
331 |     rparen = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
332 |     parts = [lparen, p[2], rparen]
333 |     p[0] = ast.node(kind='compound', list=parts, redirects=[],
334 |                     pos=_partsspan(parts))
335 | 
336 | def p_coproc(p):
337 |     '''coproc : COPROC shell_command
338 |               | COPROC shell_command redirection_list
339 |               | COPROC WORD shell_command
340 |               | COPROC WORD shell_command redirection_list
341 |               | COPROC simple_command'''
342 |     handleNotImplemented(p, 'coproc')
343 | 
344 | def p_if_command(p):
345 |     '''if_command : IF compound_list THEN compound_list FI
346 |                   | IF compound_list THEN compound_list ELSE compound_list FI
347 |                   | IF compound_list THEN compound_list elif_clause FI'''
348 |     # we currently don't distinguish the various lists that make up the
349 |     # command, because it's not needed later on. if there will be a need
350 |     # we can always add different nodes for elif/else.
351 |     parts = _makeparts(p)
352 |     p[0] = ast.node(kind='compound',
353 |                     redirects=[],
354 |                     list=[ast.node(kind='if', parts=parts, pos=_partsspan(parts))],
355 |                     pos=_partsspan(parts))
356 | 
357 | def p_group_command(p):
358 |     '''group_command : LEFT_CURLY compound_list RIGHT_CURLY'''
359 |     lcurly = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
360 |     rcurly = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
361 |     parts = [lcurly, p[2], rcurly]
362 |     p[0] = ast.node(kind='compound', list=parts, redirects=[],
363 |                     pos=_partsspan(parts))
364 | 
365 | def p_arith_command(p):
366 |     '''arith_command : ARITH_CMD'''
367 |     handleNotImplemented(p, 'arithmetic command')
368 | 
369 | def p_cond_command(p):
370 |     '''cond_command : COND_START COND_CMD COND_END'''
371 |     handleNotImplemented(p, 'cond command')
372 | 
373 | def p_elif_clause(p):
374 |     '''elif_clause : ELIF compound_list THEN compound_list
375 |                    | ELIF compound_list THEN compound_list ELSE compound_list
376 |                    | ELIF compound_list THEN compound_list elif_clause'''
377 |     parts = []
378 |     for i in range(1, len(p)):
379 |         if isinstance(p[i], ast.node):
380 |             parts.append(p[i])
381 |         else:
382 |             parts.append(ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i)))
383 |     p[0] = parts
384 | 
385 | def p_case_clause(p):
386 |     '''case_clause : pattern_list
387 |                    | case_clause_sequence pattern_list'''
388 |     if len(p) == 2:
389 |         p[0] = [p[1]]
390 |     else:
391 |         p[0] = p[1]
392 |         p[0].append(p[2])
393 | 
394 | def p_pattern_list(p):
395 |     '''pattern_list : newline_list pattern RIGHT_PAREN compound_list
396 |                     | newline_list pattern RIGHT_PAREN newline_list
397 |                     | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list
398 |                     | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list'''
399 |     parts = []
400 |     if len(p) == 5:
401 |         parts.append(ast.node(kind='pattern', parts=p[2], pos=_partsspan(p[2])))
402 |         parts.append(ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)))
403 |         if isinstance(p[4], ast.node):
404 |             parts.append(p[4])
405 |     else:
406 |         parts.append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2)))
407 |         parts.append(ast.node(kind='pattern', parts=p[3], pos=_partsspan(p[3])))
408 |         parts.append(ast.node(kind='reservedword', word=p[4], pos=p.lexspan(4)))
409 |         if isinstance(p[5], ast.node):
410 |             parts.append(p[5])
411 | 
412 |     p[0] = ast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts))
413 | 
414 | def p_case_clause_sequence(p):
415 |     '''case_clause_sequence : pattern_list SEMI_SEMI
416 |                             | case_clause_sequence pattern_list SEMI_SEMI
417 |                             | pattern_list SEMI_AND
418 |                             | case_clause_sequence pattern_list SEMI_AND
419 |                             | pattern_list SEMI_SEMI_AND
420 |                             | case_clause_sequence pattern_list SEMI_SEMI_AND'''
421 |     if len(p) == 3:
422 |         p[0] = [p[1]]
423 |         p[0].append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2)))
424 |     else:
425 |         p[0] = p[1]
426 |         p[0].append(p[2])
427 |         p[0].append(ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)))
428 | 
429 | def p_pattern(p):
430 |     '''pattern : WORD
431 |                | pattern BAR WORD'''
432 | 
433 |     parserobj = p.context
434 |     if len(p) == 2:
435 |         p[0] = [_expandword(parserobj, p.slice[1])]
436 |     else:
437 |         p[0] = p[1]
438 |         p[0].append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2)))
439 |         p[0].append(_expandword(parserobj, p.slice[3]))
440 | 
441 | def p_list(p):
442 |     '''list : newline_list list0'''
443 |     p[0] = p[2]
444 | 
445 | def p_compound_list(p):
446 |     '''compound_list : list
447 |                      | newline_list list1'''
448 |     if len(p) == 2:
449 |         p[0] = p[1]
450 |     else:
451 |         parts = p[2]
452 |         if len(parts) > 1:
453 |             p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
454 |         else:
455 |             p[0] = parts[0]
456 | 
457 | def p_list0(p):
458 |     '''list0 : list1 NEWLINE newline_list
459 |              | list1 AMPERSAND newline_list
460 |              | list1 SEMICOLON newline_list'''
461 |     parts = p[1]
462 |     if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE:
463 |         parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
464 |         p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
465 |     else:
466 |         p[0] = parts[0]
467 | 
468 | def p_list1(p):
469 |     '''list1 : list1 AND_AND newline_list list1
470 |              | list1 OR_OR newline_list list1
471 |              | list1 AMPERSAND newline_list list1
472 |              | list1 SEMICOLON newline_list list1
473 |              | list1 NEWLINE newline_list list1
474 |              | pipeline_command'''
475 |     if len(p) == 2:
476 |         p[0] = [p[1]]
477 |     else:
478 |         p[0] = p[1]
479 |         # XXX newline
480 |         p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
481 |         p[0].extend(p[len(p) - 1])
482 | 
483 | def p_simple_list_terminator(p):
484 |     '''simple_list_terminator : NEWLINE
485 |                               | EOF'''
486 |     pass
487 | 
488 | def p_list_terminator(p):
489 |     '''list_terminator : NEWLINE
490 |                        | SEMICOLON
491 |                        | EOF'''
492 |     if p[1] == ';':
493 |         p[0] = ast.node(kind='operator', op=';', pos=p.lexspan(1))
494 | 
495 | def p_newline_list(p):
496 |     '''newline_list : empty
497 |                     | newline_list NEWLINE'''
498 |     pass
499 | 
500 | def p_simple_list(p):
501 |     '''simple_list : simple_list1
502 |                    | simple_list1 AMPERSAND
503 |                    | simple_list1 SEMICOLON'''
504 |     tok = p.lexer
505 |     heredoc.gatherheredocuments(tok)
506 | 
507 |     if len(p) == 3 or len(p[1]) > 1:
508 |         parts = p[1]
509 |         if len(p) == 3:
510 |             parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
511 |         p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
512 |     else:
513 |         assert len(p[1]) == 1
514 |         p[0] = p[1][0]
515 | 
516 |     if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and
517 |             p.lexer._current_token.nopos() == p.lexer._shell_eof_token):
518 |         # accept the input
519 |         p.accept()
520 | 
521 | def p_simple_list1(p):
522 |     '''simple_list1 : simple_list1 AND_AND newline_list simple_list1
523 |                     | simple_list1 OR_OR newline_list simple_list1
524 |                     | simple_list1 AMPERSAND simple_list1
525 |                     | simple_list1 SEMICOLON simple_list1
526 |                     | pipeline_command'''
527 |     if len(p) == 2:
528 |         p[0] = [p[1]]
529 |     else:
530 |         p[0] = p[1]
531 |         p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
532 |         p[0].extend(p[len(p) - 1])
533 | 
534 | def p_pipeline_command(p):
535 |     '''pipeline_command : pipeline
536 |                         | BANG pipeline_command
537 |                         | timespec pipeline_command
538 |                         | timespec list_terminator
539 |                         | BANG list_terminator'''
540 |     if len(p) == 2:
541 |         if len(p[1]) == 1:
542 |             p[0] = p[1][0]
543 |         else:
544 |             p[0] = ast.node(kind='pipeline', parts=p[1],
545 |                             pos=(p[1][0].pos[0], p[1][-1].pos[1]))
546 |     else:
547 |         # XXX timespec
548 |         node = ast.node(kind='reservedword', word='!', pos=p.lexspan(1))
549 |         if p[2].kind == 'pipeline':
550 |             p[0] = p[2]
551 |             p[0].parts.insert(0, node)
552 |             p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1])
553 |         else:
554 |             p[0] = ast.node(kind='pipeline', parts=[node, p[2]],
555 |                             pos=(node.pos[0], p[2].pos[1]))
556 | 
557 | def p_pipeline(p):
558 |     '''pipeline : pipeline BAR newline_list pipeline
559 |                 | pipeline BAR_AND newline_list pipeline
560 |                 | command'''
561 |     if len(p) == 2:
562 |         p[0] = [p[1]]
563 |     else:
564 |         p[0] = p[1]
565 |         p[0].append(ast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2)))
566 |         p[0].extend(p[len(p) - 1])
567 | 
568 | def p_timespec(p):
569 |     '''timespec : TIME
570 |                 | TIME TIMEOPT
571 |                 | TIME TIMEOPT TIMEIGN'''
572 |     handleNotImplemented(p, 'time command')
573 | 
574 | def p_empty(p):
575 |     '''empty :'''
576 |     pass
577 | 
578 | def p_error(p):
579 |     assert isinstance(p, tokenizer.token)
580 | 
581 |     if p.ttype == tokenizer.tokentype.EOF:
582 |         raise errors.ParsingError('unexpected EOF',
583 |                                   p.lexer.source,
584 |                                   len(p.lexer.source))
585 |     else:
586 |         raise errors.ParsingError('unexpected token %r' % p.value,
587 |                                   p.lexer.source, p.lexpos)
588 | 
589 | yaccparser = yacc.yacc(outputdir=os.path.dirname(__file__),         
590 |                         debug=False)
591 | 
592 | # some hack to fix yacc's reduction on command substitutions:
593 | # which state to fix is derived from static transition tables
594 | # as states are changeable among python versions and architectures
595 | # the only state that is considered fixed is the initial state: 0
596 | def get_correction_states():
597 |     reduce = yaccparser.goto[0]['simple_list'] #~10
598 |     state2 = yaccparser.action[reduce]['NEWLINE'] #63
599 |     state1 = yaccparser.goto[reduce]['simple_list_terminator'] #~10
600 |     return state1, state2
601 | 
602 | def get_correction_rightparen_states():
603 |     state1 = yaccparser.goto[0]['pipeline_command']
604 |     state2 = yaccparser.goto[0]['simple_list1'] #11
605 |     state_temp = yaccparser.action[state2]['SEMICOLON'] #65
606 |     state3 = yaccparser.goto[state_temp]['simple_list1']
607 |     return state1, state2, state3
608 | 
609 | for tt in tokenizer.tokentype:
610 |     states = get_correction_states()
611 |     yaccparser.action[states[0]][tt.name] = -1
612 |     yaccparser.action[states[1]][tt.name] = -141
613 | 
614 | states = get_correction_rightparen_states()
615 | yaccparser.action[states[0]]['RIGHT_PAREN'] = -155
616 | yaccparser.action[states[1]]['RIGHT_PAREN'] = -148
617 | yaccparser.action[states[2]]['RIGHT_PAREN'] = -154
618 | 
619 | def parsesingle(s, strictmode=True, expansionlimit=None, convertpos=False, proceedonerror=False):
620 |     '''like parse, but only consumes a single top level node, e.g. parsing
621 |     'a\nb' will only return a node for 'a', leaving b unparsed'''
622 |     p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit, proceedonerror=proceedonerror)
623 |     tree = p.parse()
624 |     if convertpos:
625 |         ast.posconverter(s).visit(tree)
626 |     return tree
627 | 
628 | def parse(s, strictmode=True, expansionlimit=None, convertpos=False, proceedonerror=False):
629 |     '''parse the input string, returning a list of nodes
630 | 
631 |     top level node kinds are:
632 | 
633 |     - command - a simple command
634 |     - pipeline - a series of simple commands
635 |     - list - a series of one or more pipelines
636 |     - compound - contains constructs for { list; }, (list), if, for..
637 | 
638 |     leafs are word nodes (which in turn can also contain any of the
639 |     aforementioned nodes due to command substitutions).
640 | 
641 |     when strictmode is set to False, we will:
642 |     - skip reading a heredoc if we're at the end of the input
643 | 
644 |     expansionlimit is used to limit the amount of recursive parsing done due to
645 |     command substitutions found during word expansion.
646 | 
647 |     when proceedonerror set, the parser will return AST nodes for unimplemented features, etc. (e.g., rather than throwing a NotImplementedError)
648 |     '''
649 |     p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit, proceedonerror=proceedonerror)
650 |     parts = [p.parse()]
651 | 
652 |     class endfinder(ast.nodevisitor):
653 |         def __init__(self):
654 |             self.end = -1
655 |         def visitheredoc(self, node, value):
656 |             self.end = node.pos[1]
657 | 
658 |     # find the 'real' end incase we have a heredoc in there
659 |     ef = _endfinder()
660 |     ef.visit(parts[-1])
661 |     index = max(parts[-1].pos[1], ef.end) + 1
662 |     while index < len(s):
663 |         part = _parser(s[index:], strictmode=strictmode, proceedonerror=proceedonerror).parse()
664 | 
665 |         if not isinstance(part, ast.node):
666 |             break
667 | 
668 |         ast.posshifter(index).visit(part)
669 |         parts.append(part)
670 |         ef = _endfinder()
671 |         ef.visit(parts[-1])
672 |         index = max(parts[-1].pos[1], ef.end) + 1
673 | 
674 |     if convertpos:
675 |         for tree in parts:
676 |             ast.posconverter(s).visit(tree)
677 | 
678 |     return parts
679 | 
680 | def split(s):
681 |     '''a utility function that mimics shlex.split but handles more
682 |     complex shell constructs such as command substitutions inside words
683 | 
684 |     >>> list(split('a b"c"\\'d\\''))
685 |     ['a', 'bcd']
686 |     >>> list(split('a "b $(c)" $(d) \\'$(e)\\''))
687 |     ['a', 'b $(c)', '$(d)', '$(e)']
688 |     >>> list(split('a b\\n'))
689 |     ['a', 'b', '\\n']
690 |     '''
691 |     p = _parser(s)
692 |     for t in p.tok:
693 |         if t.ttype == tokenizer.tokentype.WORD:
694 |             quoted = bool(t.flags & flags.word.QUOTED)
695 |             doublequoted = quoted and t.value[0] == '"'
696 |             parts, expandedword = subst._expandwordinternal(p, t, 0,
697 |                                                             doublequoted, 0, 0)
698 |             yield expandedword
699 |         else:
700 |             yield s[t.lexpos:t.endlexpos]
701 | 
702 | class _parser(object):
703 |     '''
704 |     this class is mainly used to provide context to the productions
705 |     when we're in the middle of parsing. as a hack, we shove it into the
706 |     YaccProduction context attribute to make it accessible.
707 |     '''
708 |     def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None,
709 |                  proceedonerror=None):
710 |         assert expansionlimit is None or isinstance(expansionlimit, int)
711 | 
712 |         self.s = s
713 |         self._strictmode = strictmode
714 |         self._expansionlimit = expansionlimit
715 |         self._proceedonerror = proceedonerror
716 | 
717 |         if tokenizerargs is None:
718 |             tokenizerargs = {}
719 |         self.parserstate = tokenizerargs.pop('parserstate', state.parserstate())
720 | 
721 |         self.tok = tokenizer.tokenizer(s,
722 |                                        parserstate=self.parserstate,
723 |                                        strictmode=strictmode,
724 |                                        **tokenizerargs)
725 | 
726 |         self.redirstack = self.tok.redirstack
727 | 
728 |     def parse(self):
729 |         # yacc.yacc returns a parser object that is not reentrant, it has
730 |         # some mutable state. we make a shallow copy of it so no
731 |         # state spills over to the next call to parse on it
732 |         theparser = copy.copy(yaccparser)
733 |         tree = theparser.parse(lexer=self.tok, context=self)
734 | 
735 |         return tree
736 | 
737 | class _endfinder(ast.nodevisitor):
738 |     '''helper class to find the "real" end pos of a node that contains
739 |     a heredoc. this is a hack because heredoc aren't really part of any node
740 |     since they don't always follow the end of a node and might appear on
741 |     a different line'''
742 |     def __init__(self):
743 |         self.end = -1
744 |     def visitheredoc(self, node, value):
745 |         self.end = node.pos[1]
746 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/bashlex/tokenizer.py:
--------------------------------------------------------------------------------
   1 | import re, collections, enum
   2 | 
   3 | from bashlex import flags, shutils, utils, errors, heredoc, state
   4 | 
   5 | sh_syntaxtab = collections.defaultdict(set)
   6 | 
   7 | def _addsyntax(chars, symbol):
   8 |     for c in chars:
   9 |         sh_syntaxtab[c].add(symbol)
  10 | 
  11 | _addsyntax('\\`$"\n', 'dquote')
  12 | _addsyntax('()<>;&|', 'meta')
  13 | _addsyntax('"`\'', 'quote')
  14 | _addsyntax('$<>', 'exp')
  15 | _addsyntax("()<>;&| \t\n", 'break')
  16 | 
  17 | def _shellblank(c):
  18 |     return c in ' \t'
  19 | 
  20 | def _shellmeta(c):
  21 |     return 'meta' in sh_syntaxtab[c]
  22 | 
  23 | def _shellquote(c):
  24 |     return 'quote' in sh_syntaxtab[c]
  25 | 
  26 | def _shellexp(c):
  27 |     return 'exp' in sh_syntaxtab[c]
  28 | 
  29 | def _shellbreak(c):
  30 |     return 'break' in sh_syntaxtab[c]
  31 | 
  32 | class tokentype(enum.Enum):
  33 |     IF = 1
  34 |     THEN = 2
  35 |     ELSE = 3
  36 |     ELIF = 4
  37 |     FI = 5
  38 |     CASE = 6
  39 |     ESAC = 7
  40 |     FOR = 8
  41 |     SELECT = 9
  42 |     WHILE = 10
  43 |     UNTIL = 11
  44 |     DO = 12
  45 |     DONE = 13
  46 |     FUNCTION = 14
  47 |     COPROC = 15
  48 |     COND_START = 16
  49 |     COND_END = 17
  50 |     # https://github.com/idank/bashlex/issues/20
  51 |     # COND_ERROR = 18
  52 |     IN = 19
  53 |     BANG = '!'
  54 |     TIME = 21
  55 |     TIMEOPT = 22
  56 |     TIMEIGN = 23
  57 |     WORD = 24
  58 |     ASSIGNMENT_WORD = 25
  59 |     REDIR_WORD = 26
  60 |     NUMBER = 27
  61 |     ARITH_CMD = 28
  62 |     ARITH_FOR_EXPRS = 29
  63 |     COND_CMD = 30
  64 |     AND_AND = '&&'
  65 |     OR_OR = '||'
  66 |     GREATER_GREATER = '>>'
  67 |     LESS_LESS = '<<'
  68 |     LESS_AND = '<&'
  69 |     LESS_LESS_LESS = '<<<'
  70 |     GREATER_AND = '>&'
  71 |     SEMI_SEMI = ';;'
  72 |     SEMI_AND = ';&'
  73 |     SEMI_SEMI_AND = ';;&'
  74 |     LESS_LESS_MINUS = '<<-'
  75 |     AND_GREATER = '&>'
  76 |     AND_GREATER_GREATER = '&>>'
  77 |     LESS_GREATER = '<>'
  78 |     GREATER_BAR = '>|'
  79 |     BAR_AND = '|&'
  80 |     LEFT_CURLY = 47
  81 |     RIGHT_CURLY = 48
  82 |     EOF = '$end'
  83 |     LEFT_PAREN = '('
  84 |     RIGHT_PAREN = ')'
  85 |     BAR = '|'
  86 |     SEMICOLON = ';'
  87 |     DASH = '-'
  88 |     NEWLINE = '\n'
  89 |     LESS = '<'
  90 |     GREATER = '>'
  91 |     AMPERSAND = '&'
  92 | 
  93 | _reserved = set([
  94 |     tokentype.AND_AND, tokentype.BANG, tokentype.BAR_AND, tokentype.DO,
  95 |     tokentype.DONE, tokentype.ELIF, tokentype.ELSE, tokentype.ESAC,
  96 |     tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI,
  97 |     tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN,
  98 |     tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC,
  99 |     tokentype.UNTIL, tokentype.WHILE])
 100 | 
 101 | for c in '\n;()|&{}':
 102 |     _reserved.add(c)
 103 | 
 104 | # word_token_alist
 105 | valid_reserved_first_command = {
 106 |     "if" : tokentype.IF,
 107 |     "then" : tokentype.THEN,
 108 |     "else" : tokentype.ELSE,
 109 |     "elif" : tokentype.ELIF,
 110 |     "fi" : tokentype.FI,
 111 |     "case" : tokentype.CASE,
 112 |     "esac" : tokentype.ESAC,
 113 |     "for" : tokentype.FOR,
 114 |     "select" : tokentype.SELECT,
 115 |     "while" : tokentype.WHILE,
 116 |     "until" : tokentype.UNTIL,
 117 |     "do" : tokentype.DO,
 118 |     "done" : tokentype.DONE,
 119 |     "in" : tokentype.IN,
 120 |     "function" : tokentype.FUNCTION,
 121 |     "time" : tokentype.TIME,
 122 |     "{" : tokentype.LEFT_CURLY,
 123 |     "}" : tokentype.RIGHT_CURLY,
 124 |     "!" : tokentype.BANG,
 125 |     "[[" : tokentype.COND_START,
 126 |     "]]" : tokentype.COND_END,
 127 |     "coproc" : tokentype.COPROC
 128 | }
 129 | 
 130 | class MatchedPairError(errors.ParsingError):
 131 |     def __init__(self, startline, message, tokenizer):
 132 |         # TODO use startline?
 133 |         super(MatchedPairError, self).__init__(message,
 134 |                                                tokenizer.source,
 135 |                                                tokenizer._shell_input_line_index - 1)
 136 | 
 137 | wordflags = flags.word
 138 | parserflags = flags.parser
 139 | 
 140 | class token(object):
 141 |     def __init__(self, type_, value, pos=None, flags=None):
 142 |         if type_ is not None:
 143 |             assert isinstance(type_, tokentype)
 144 | 
 145 |         if flags is None:
 146 |             flags = set()
 147 | 
 148 |         self.ttype = type_
 149 | 
 150 |         self.value = value
 151 |         if pos is not None:
 152 |             self.lexpos = pos[0]
 153 |             self.endlexpos = pos[1]
 154 |             assert self.lexpos < self.endlexpos, (self.lexpos, self.endlexpos)
 155 |         else:
 156 |             self.lexpos = self.endlexpos = None
 157 | 
 158 |         self.flags = flags
 159 | 
 160 |     @property
 161 |     def type(self):
 162 |         if self.ttype:
 163 |             # make yacc see our EOF token as its own special one $end
 164 |             if self.ttype == tokentype.EOF:
 165 |                 return '$end'
 166 |             else:
 167 |                 return self.ttype.name
 168 | 
 169 |     def __nonzero__(self):
 170 |         return not (self.ttype is None and self.value is None)
 171 | 
 172 |     __bool__ = __nonzero__
 173 | 
 174 |     def __eq__(self, other):
 175 |         return isinstance(other, token) and (self.type == other.type and
 176 |                                              self.value == other.value and
 177 |                                              self.lexpos == other.lexpos and
 178 |                                              self.endlexpos == other.endlexpos and
 179 |                                              self.flags == other.flags)
 180 | 
 181 |     def __repr__(self):
 182 |         s = ['<', self.type]
 183 |         if self.lexpos is not None and self.endlexpos is not None:
 184 |             s.append('@%d:%d' % (self.lexpos, self.endlexpos))
 185 |         if self.value:
 186 |             s.append(' ')
 187 |             s.append(repr(self.value))
 188 | 
 189 |         if self.flags:
 190 |             prettyflags = ' '.join([e.name for e in self.flags])
 191 |             s.append(' (%s)' % prettyflags)
 192 |         s.append('>')
 193 |         return ''.join(s)
 194 | 
 195 |     def nopos(self):
 196 |         return self.__class__(self.ttype, self.value, flags=self.flags)
 197 | 
 198 | eoftoken = token(tokentype.EOF, None)
 199 | 
 200 | class tokenizer(object):
 201 |     def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
 202 |                  lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
 203 |         self._shell_eof_token = eoftoken
 204 |         self._shell_input_line = s
 205 |         self._added_newline = False
 206 |         if self._shell_input_line and self._shell_input_line[-1] != '\n':
 207 |             self._shell_input_line += '\n' # bash/parse.y L2431
 208 |             self._added_newline = True
 209 |         self._shell_input_line_index = 0
 210 |         # self._shell_input_line_terminator = None
 211 |         self._two_tokens_ago = twotokensago or token(None, None)
 212 |         self._token_before_that = tokenbeforethat or token(None, None)
 213 |         self._last_read_token = lastreadtoken or token(None, None)
 214 |         self._current_token = token(None, None)
 215 | 
 216 |         # This implements one-character lookahead/lookbehind across physical
 217 |         # input lines, to avoid something being lost because it's pushed back
 218 |         # with shell_ungetc when we're at the start of a line.
 219 |         self._eol_ungetc_lookahead = None
 220 | 
 221 |         # token waiting to be read
 222 |         self._token_to_read = None
 223 | 
 224 |         self._parserstate = parserstate
 225 |         self._line_number = 0
 226 |         self._open_brace_count = 0
 227 |         self._esacs_needed_count = 0
 228 | 
 229 |         self._dstack = []
 230 | 
 231 |         # a stack of positions to record the start and end of a token
 232 |         self._positions = []
 233 | 
 234 |         self._strictmode = strictmode
 235 | 
 236 |         # hack: the tokenizer needs access to the stack of redirection
 237 |         # nodes when it reads heredocs. this instance is shared between
 238 |         # the tokenizer and the parser, which also needs it
 239 |         self.redirstack = []
 240 | 
 241 |     @property
 242 |     def source(self):
 243 |         if self._added_newline:
 244 |             return self._shell_input_line[:-1]
 245 |         return self._shell_input_line
 246 | 
 247 |     def __iter__(self):
 248 |         while True:
 249 |             t = self.token()
 250 |             # we're finished when we see the eoftoken OR when we added a newline
 251 |             # to the input and we're there now
 252 |             if t is eoftoken or (self._added_newline and
 253 |                                  t.lexpos + 1 == len(self._shell_input_line)):
 254 |                 break
 255 |             yield t
 256 | 
 257 |     def _createtoken(self, type_, value, flags=None):
 258 |         '''create a token with position information'''
 259 |         pos = None
 260 |         assert len(self._positions) >= 2, (type_, value)
 261 |         p2 = self._positions.pop()
 262 |         p1 = self._positions.pop()
 263 |         pos = [p1, p2]
 264 |         return token(type_, value, pos, flags)
 265 | 
 266 |     def token(self):
 267 |         self._two_tokens_ago, self._token_before_that, self._last_read_token = \
 268 |             self._token_before_that, self._last_read_token, self._current_token
 269 | 
 270 |         self._current_token = self._readtoken()
 271 |         if isinstance(self._current_token, tokentype):
 272 |             self._recordpos()
 273 |             self._current_token = self._createtoken(self._current_token,
 274 |                                                     self._current_token.value)
 275 | 
 276 |         if (self._parserstate & parserflags.EOFTOKEN and
 277 |             self._current_token.ttype == self._shell_eof_token):
 278 |             self._current_token = eoftoken
 279 |             # bash/parse.y L2626
 280 |         self._parserstate.discard(parserflags.EOFTOKEN)
 281 | 
 282 |         return self._current_token
 283 | 
 284 |     def _readtoken(self):
 285 |         character = None
 286 |         peek_char = None
 287 | 
 288 |         if self._token_to_read is not None:
 289 |             t = self._token_to_read
 290 |             self._token_to_read = None
 291 |             return t
 292 | 
 293 |         # bashlex/parse.y L2989 COND_COMMAND
 294 |         character = self._getc(True)
 295 |         while character is not None and _shellblank(character):
 296 |             character = self._getc(True)
 297 | 
 298 |         if character is None:
 299 |             return eoftoken
 300 | 
 301 |         if character == '#':
 302 |             self._discard_until('\n')
 303 |             self._getc(False)
 304 |             character = '\n'
 305 | 
 306 |         self._recordpos(1)
 307 | 
 308 |         if character == '\n':
 309 |             # bashlex/parse.y L3034 ALIAS
 310 |             heredoc.gatherheredocuments(self)
 311 | 
 312 |             self._parserstate.discard(parserflags.ASSIGNOK)
 313 |             return tokentype(character)
 314 | 
 315 |         if self._parserstate & parserflags.REGEXP:
 316 |             return self._readtokenword(character)
 317 | 
 318 |         if _shellmeta(character) and not (self._parserstate & parserflags.DBLPAREN):
 319 |             self._parserstate.discard(parserflags.ASSIGNOK)
 320 |             peek_char = self._getc(True)
 321 | 
 322 |             both = character
 323 |             if peek_char:
 324 |                 both += peek_char
 325 |             if character == peek_char:
 326 |                 if character == '<':
 327 |                     peek_char = self._getc()
 328 |                     if peek_char == '-':
 329 |                         return tokentype.LESS_LESS_MINUS
 330 |                     elif peek_char == '<':
 331 |                         return tokentype.LESS_LESS_LESS
 332 |                     else:
 333 |                         self._ungetc(peek_char)
 334 |                         return tokentype.LESS_LESS
 335 |                 elif character == '>':
 336 |                     return tokentype.GREATER_GREATER
 337 |                 elif character == ';':
 338 |                     self._parserstate |= parserflags.CASEPAT
 339 |                     # bashlex/parse.y L3085 ALIAS
 340 |                     peek_char = self._getc()
 341 |                     if peek_char == '&':
 342 |                         return tokentype.SEMI_SEMI_AND
 343 |                     else:
 344 |                         self._ungetc(peek_char)
 345 |                         return tokentype.SEMI_SEMI
 346 |                 elif character == '&':
 347 |                     return tokentype.AND_AND
 348 |                 elif character == '|':
 349 |                     return tokentype.OR_OR
 350 |                 # bashlex/parse.y L3105
 351 |             elif both == '<&':
 352 |                 return tokentype.LESS_AND
 353 |             elif both == '>&':
 354 |                 return tokentype.GREATER_AND
 355 |             elif both == '<>':
 356 |                 return tokentype.LESS_GREATER
 357 |             elif both == '>|':
 358 |                 return tokentype.GREATER_BAR
 359 |             elif both == '&>':
 360 |                 peek_char = self._getc()
 361 |                 if peek_char == '>':
 362 |                     return tokentype.AND_GREATER_GREATER
 363 |                 else:
 364 |                     self._ungetc(peek_char)
 365 |                     return tokentype.AND_GREATER
 366 |             elif both == '|&':
 367 |                 return tokentype.BAR_AND
 368 |             elif both == ';&':
 369 |                 return tokentype.SEMI_AND
 370 | 
 371 |             self._ungetc(peek_char)
 372 |             if character == ')' and self._last_read_token.value == '(' and self._token_before_that.ttype == tokentype.WORD:
 373 |                 self._parserstate.add(parserflags.ALLOWOPNBRC)
 374 |                 # bashlex/parse.y L3155
 375 | 
 376 |             if character == '(' and not self._parserstate & parserflags.CASEPAT:
 377 |                 self._parserstate.add(parserflags.SUBSHELL)
 378 |             elif self._parserstate & parserflags.CASEPAT and character == ')':
 379 |                 self._parserstate.discard(parserflags.CASEPAT)
 380 |             elif self._parserstate & parserflags.SUBSHELL and character == ')':
 381 |                 self._parserstate.discard(parserflags.SUBSHELL)
 382 | 
 383 |             if character not in '<>' or peek_char != '(':
 384 |                 return tokentype(character)
 385 | 
 386 |         if character == '-' and (self._last_read_token.ttype == tokentype.LESS_AND or self._last_read_token.ttype == tokentype.GREATER_AND):
 387 |             return tokentype(character)
 388 | 
 389 |         return self._readtokenword(character)
 390 | 
 391 |     def _readtokenword(self, c):
 392 |         d = {}
 393 |         d['all_digit_token'] = c.isdigit()
 394 |         d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
 395 | 
 396 |         tokenword = []
 397 | 
 398 |         def handleshellquote():
 399 |             self._push_delimiter(c)
 400 |             try:
 401 |                 ttok = self._parse_matched_pair(c, c, c, parsingcommand=(c == '`'))
 402 |             finally:
 403 |                 self._pop_delimiter()
 404 | 
 405 |             tokenword.append(c)
 406 |             tokenword.extend(ttok)
 407 |             d['all_digit_token'] = False
 408 |             d['quoted'] = True
 409 |             if not d['dollar_present']:
 410 |                 d['dollar_present'] = c == '"' and '$' in ttok
 411 | 
 412 |         def handleshellexp():
 413 |             peek_char = self._getc()
 414 |             if peek_char == '(' or (c == '$' and peek_char in '{['):
 415 |                 # try:
 416 |                 if peek_char == '{':
 417 |                     ttok = self._parse_matched_pair(cd, '{', '}', firstclose=True, dolbrace=True)
 418 |                 elif peek_char == '(':
 419 |                     self._push_delimiter(peek_char)
 420 |                     ttok = self._parse_comsub(cd, '(', ')', parsingcommand=True)
 421 |                     self._pop_delimiter()
 422 |                 else:
 423 |                     ttok = self._parse_matched_pair(cd, '[', ']')
 424 |                 # except MatchedPairError:
 425 |                 #   return -1
 426 | 
 427 |                 tokenword.append(c)
 428 |                 tokenword.append(peek_char)
 429 |                 tokenword.extend(ttok)
 430 |                 d['dollar_present'] = True
 431 |                 d['all_digit_token'] = False
 432 | 
 433 |                 # goto next_character
 434 |             elif c == '$' and peek_char in '\'"':
 435 |                 self._push_delimiter(peek_char)
 436 |                 try:
 437 |                     ttok = self._parse_matched_pair(peek_char, peek_char, peek_char,
 438 |                                                     allowesc=(peek_char == "'"))
 439 |                 # except MatchedPairError:
 440 |                 #    return -1
 441 |                 finally:
 442 |                     self._pop_delimiter()
 443 | 
 444 |                 #if peek_char == "'":
 445 |                 #    # XXX ansiexpand
 446 |                 #    ttok = shutils.single_quote(ttok)
 447 |                 #else:
 448 |                 #    ttok = shutils.double_quote(ttok)
 449 | 
 450 |                 tokenword.append(c)
 451 |                 tokenword.append(peek_char)
 452 |                 tokenword.extend(ttok)
 453 |                 d['quoted'] = True
 454 |                 d['all_digit_token'] = False
 455 | 
 456 |                 # goto next_character
 457 |             elif c == '$' and peek_char == '$':
 458 |                 tokenword.append('$')
 459 |                 tokenword.append('$')
 460 |                 d['dollar_present'] = True
 461 |                 d['all_digit_token'] = False
 462 | 
 463 |                 # goto next_character
 464 |             else:
 465 |                 self._ungetc(peek_char)
 466 |                 return True
 467 | 
 468 |             # bashlex/parse.y L4699 ARRAY_VARS
 469 | 
 470 |         def handleescapedchar():
 471 |             tokenword.append(c)
 472 |             d['all_digit_token'] &= c.isdigit()
 473 |             if not d['dollar_present']:
 474 |                 d['dollar_present'] = c == '$'
 475 | 
 476 |         while True:
 477 |             if c is None:
 478 |                 break
 479 | 
 480 |             if d['pass_next_character']:
 481 |                 d['pass_next_character'] = False
 482 |                 handleescapedchar()
 483 |                 # goto escaped_character
 484 |             else:
 485 |                 cd = self._current_delimiter()
 486 |                 gotonext = False
 487 |                 if c == '\\':
 488 |                     peek_char = self._getc(False)
 489 | 
 490 |                     if peek_char == '\n':
 491 |                         c = '\n'
 492 |                         gotonext = True
 493 |                         # goto next_character
 494 |                     else:
 495 |                         self._ungetc(peek_char)
 496 | 
 497 |                         if (cd is None or cd == '`' or
 498 |                             (cd == '"' and peek_char is not None and
 499 |                              'dquote' in sh_syntaxtab[peek_char])):
 500 |                             d['pass_next_character'] = True
 501 |                             d['quoted'] = True
 502 | 
 503 |                             handleescapedchar()
 504 |                             gotonext = True
 505 |                             # goto got_character
 506 |                 elif _shellquote(c):
 507 |                     handleshellquote()
 508 |                     gotonext = True
 509 |                     # goto next_character
 510 |                 # bashlex/parse.y L4542
 511 |                 # bashlex/parse.y L4567
 512 |                 elif _shellexp(c):
 513 |                     gotonext = not handleshellexp()
 514 |                     # bashlex/parse.y L4699
 515 |                 if not gotonext:
 516 |                     if _shellbreak(c):
 517 |                         self._ungetc(c)
 518 |                         break
 519 |                     else:
 520 |                         handleescapedchar()
 521 | 
 522 |             # got_character
 523 |             # got_escaped_character
 524 | 
 525 |             # tokenword.append(c)
 526 |             # all_digit_token &= c.isdigit()
 527 |             # if not dollar_present:
 528 |             #     dollar_present = c == '$'
 529 | 
 530 |             # next_character
 531 |             cd = self._current_delimiter()
 532 |             c = self._getc(cd != "'" and not d['pass_next_character'])
 533 | 
 534 |         # got_token
 535 |         self._recordpos()
 536 | 
 537 |         tokenword = ''.join(tokenword)
 538 | 
 539 |         if d['all_digit_token'] and (c in '<>' or self._last_read_token.ttype in (tokentype.LESS_AND, tokentype.GREATER_AND)) and shutils.legal_number(tokenword):
 540 |             return self._createtoken(tokentype.NUMBER, int(tokenword))
 541 | 
 542 |         # bashlex/parse.y L4811
 543 |         specialtokentype = self._specialcasetokens(tokenword)
 544 |         if specialtokentype:
 545 |             return self._createtoken(specialtokentype, tokenword)
 546 | 
 547 |         if not d['dollar_present'] and not d['quoted'] and self._reserved_word_acceptable(self._last_read_token):
 548 |             if tokenword in valid_reserved_first_command:
 549 |                 ttype = valid_reserved_first_command[tokenword]
 550 |                 ps = self._parserstate
 551 |                 if ps & parserflags.CASEPAT and ttype != tokentype.ESAC:
 552 |                     pass
 553 |                 elif ttype == tokentype.TIME and not self._time_command_acceptable():
 554 |                     pass
 555 |                 elif ttype == tokentype.ESAC:
 556 |                     ps.discard(parserflags.CASEPAT)
 557 |                     ps.discard(parserflags.CASESTMT)
 558 |                 elif ttype == tokentype.CASE:
 559 |                     ps.add(parserflags.CASESTMT)
 560 |                 elif ttype == tokentype.COND_END:
 561 |                     ps.discard(parserflags.CONDCMD)
 562 |                     ps.discard(parserflags.CONDEXPR)
 563 |                 elif ttype == tokentype.COND_START:
 564 |                     ps.add(parserflags.CONDCMD)
 565 |                 elif ttype == tokentype.LEFT_CURLY:
 566 |                     self._open_brace_count += 1
 567 |                 elif ttype == tokentype.RIGHT_CURLY and self._open_brace_count:
 568 |                     self._open_brace_count -= 1
 569 |                 return self._createtoken(ttype, tokenword)
 570 | 
 571 |         tokenword = self._createtoken(tokentype.WORD, tokenword, utils.typedset(wordflags))
 572 |         if d['dollar_present']:
 573 |             tokenword.flags.add(wordflags.HASDOLLAR)
 574 |         if d['quoted']:
 575 |             tokenword.flags.add(wordflags.QUOTED)
 576 |         if d['compound_assignment'] and tokenword[-1] == ')':
 577 |             tokenword.flags.add(wordflags.COMPASSIGN)
 578 |         if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
 579 |             tokenword.flags.add(wordflags.ASSIGNMENT)
 580 |             if self._assignment_acceptable(self._last_read_token):
 581 |                 tokenword.flags.add(wordflags.NOSPLIT)
 582 |                 if self._parserstate & parserflags.COMPASSIGN:
 583 |                     tokenword.flags.add(wordflags.NOGLOB)
 584 | 
 585 |         # bashlex/parse.y L4865
 586 |         if self._command_token_position(self._last_read_token):
 587 |             pass
 588 | 
 589 |         if tokenword.value[0] == '{' and tokenword.value[-1] == '}' and c in '<>':
 590 |             if shutils.legal_identifier(tokenword.value[1:]):
 591 |                 # XXX is this needed?
 592 |                 tokenword.value = tokenword.value[1:]
 593 |                 tokenword.ttype = tokentype.REDIR_WORD
 594 | 
 595 |             return tokenword
 596 | 
 597 |         if len(tokenword.flags & set([wordflags.ASSIGNMENT, wordflags.NOSPLIT])) == 2:
 598 |             tokenword.ttype = tokentype.ASSIGNMENT_WORD
 599 | 
 600 |         if self._last_read_token.ttype == tokentype.FUNCTION:
 601 |             self._parserstate.add(parserflags.ALLOWOPNBRC)
 602 |             self._function_dstart = self._line_number
 603 |         elif self._last_read_token.ttype in (tokentype.CASE, tokentype.SELECT, tokentype.FOR):
 604 |             pass # bashlex/parse.y L4907
 605 | 
 606 |         return tokenword
 607 | 
 608 |     def _parse_comsub(self, doublequotes, open, close, parsingcommand=False,
 609 |                       dquote=False, firstclose=False):
 610 |         peekc = self._getc(False)
 611 |         self._ungetc(peekc)
 612 | 
 613 |         if peekc == '(':
 614 |             return self._parse_matched_pair(doublequotes, open, close)
 615 | 
 616 |         count = 1
 617 |         dollarok = True
 618 | 
 619 |         checkcase = bool(parsingcommand and (doublequotes is None or doublequotes not in "'\"") and not dquote)
 620 |         checkcomment = checkcase
 621 | 
 622 |         startlineno = self._line_number
 623 |         heredelim = ''
 624 |         stripdoc = insideheredoc = insidecomment = insideword = insidecase = False
 625 |         readingheredocdelim = False
 626 |         wasdollar = passnextchar = False
 627 |         reservedwordok = True
 628 |         lexfirstind = -1
 629 |         lexrwlen = 0
 630 | 
 631 |         ret = ''
 632 | 
 633 |         while count:
 634 |             c = self._getc(doublequotes != "'" and not insidecomment and not passnextchar)
 635 | 
 636 |             if c is None:
 637 |                 raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
 638 | 
 639 |             # bashlex/parse.y L3571
 640 |             if c == '\n':
 641 |                 if readingheredocdelim and heredelim:
 642 |                     readingheredocdelim = False
 643 |                     insideheredoc = True
 644 |                     lexfirstind = len(ret) + 1
 645 |                 elif insideheredoc:
 646 |                     tind = lexfirstind
 647 |                     while stripdoc and ret[tind] == '\t':
 648 |                         tind += 1
 649 |                     if ret[tind:] == heredelim:
 650 |                         stripdoc = insideheredoc = False
 651 |                         heredelim = ''
 652 |                         lexfirstind = -1
 653 |                     else:
 654 |                         lexfirstind = len(ret) + 1
 655 |             # bashlex/parse.y L3599
 656 |             if insideheredoc and c == close and count == 1:
 657 |                 tind = lexfirstind
 658 |                 while stripdoc and ret[tind] == '\t':
 659 |                     tind += 1
 660 |                 if ret[tind:] == heredelim:
 661 |                     stripdoc = insideheredoc = False
 662 |                     heredelim = ''
 663 |                     lexfirstind = -1
 664 | 
 665 |             if insidecomment or insideheredoc:
 666 |                 ret += c
 667 | 
 668 |                 if insidecomment and c == '\n':
 669 |                     insidecomment = False
 670 | 
 671 |                 continue
 672 | 
 673 |             if passnextchar:
 674 |                 passnextchar = False
 675 |                 # XXX is this needed?
 676 |                 # if doublequotes != "'" and c == '\n':
 677 |                 #     if ret:
 678 |                 #         ret = ret[:-1]
 679 |                 # else:
 680 |                 #     ret += c
 681 |                 ret += c
 682 |                 continue
 683 | 
 684 |             if _shellbreak(c):
 685 |                 insideword = False
 686 |             else:
 687 |                 if insideword:
 688 |                     lexwlen += 1
 689 |                 else:
 690 |                     insideword = True
 691 |                     lexwlen = 0
 692 | 
 693 |             if _shellblank(c) and not readingheredocdelim and not lexrwlen:
 694 |                 ret += c
 695 |                 continue
 696 | 
 697 |             # bashlex/parse.y L3686
 698 |             if readingheredocdelim:
 699 |                 if lexfirstind == -1 and not _shellbreak(c):
 700 |                     lexfirstind = len(ret)
 701 |                 elif lexfirstind >= 0 and not passnextchar and _shellbreak(c):
 702 |                     if not heredelim:
 703 |                         nestret = ret[lexfirstind:]
 704 |                         heredelim = shutils.removequotes(nestret)
 705 |                     if c == '\n':
 706 |                         insideheredoc = True
 707 |                         readingheredocdelim = False
 708 |                         lexfirstind = len(ret) + 1
 709 |                     else:
 710 |                         lexfirstind = -1
 711 | 
 712 |             if not reservedwordok and checkcase and not insidecomment and (_shellmeta(c) or c == '\n'):
 713 |                 ret += c
 714 |                 peekc = self._getc(True)
 715 |                 if c == peekc and c in '&|;':
 716 |                     ret += peekc
 717 |                     reservedwordok = True
 718 |                     lexrwlen = 0
 719 |                     continue
 720 |                 elif c == '\n' or c in '&|;':
 721 |                     self._ungetc(peekc)
 722 |                     reservedwordok = True
 723 |                     lexrwlen = 0
 724 |                     continue
 725 |                 elif c is None:
 726 |                     raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) # pragma: no coverage
 727 |                 else:
 728 |                     ret = ret[:-1]
 729 |                     self._ungetc(peekc)
 730 | 
 731 |             # bashlex/parse.y L3761
 732 |             if reservedwordok:
 733 |                 if c.islower():
 734 |                     ret += c
 735 |                     lexrwlen += 1
 736 |                     continue
 737 |                 elif lexrwlen == 4 and _shellbreak(c):
 738 |                     if ret[-4:] == 'case':
 739 |                         insidecase = True
 740 |                     elif ret[-4:] == 'esac':
 741 |                         insidecase = False
 742 |                     reservedwordok = False
 743 |                 elif (checkcomment and c == '#' and (lexrwlen == 0 or
 744 |                         (insideword and lexwlen == 0))):
 745 |                     pass
 746 |                 elif (not insidecase and (_shellblank(c) or c == '\n') and
 747 |                     lexrwlen == 2 and ret[-2:] == 'do'):
 748 |                     lexrwlen = 0
 749 |                 elif insidecase and c != '\n':
 750 |                     reservedwordok = False
 751 |                 elif not _shellbreak(c):
 752 |                     reservedwordok = False
 753 | 
 754 |             if not insidecomment and checkcase and c == '<':
 755 |                 ret += c
 756 |                 peekc = self._getc(True)
 757 |                 if peekc is None:
 758 |                     raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
 759 |                 if peekc == c:
 760 |                     ret += peekc
 761 |                     peekc = self._getc(True)
 762 |                     if peekc is None:
 763 |                         raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
 764 |                     elif peekc == '-':
 765 |                         ret += peekc
 766 |                         stripdoc = True
 767 |                     else:
 768 |                         self._ungetc(peekc)
 769 | 
 770 |                     if peekc != '<':
 771 |                         readingheredocdelim = True
 772 |                         lexfirstind = -1
 773 | 
 774 |                     continue
 775 |                 else:
 776 |                     c = peekc
 777 |             elif checkcomment and not insidecomment and c == '#' and ((reservedwordok
 778 |                     and lexrwlen == 0) or insideword or lexwlen == 0):
 779 |                 insidecomment = True
 780 | 
 781 |             if c == close and not insidecase:
 782 |                 count -= 1
 783 |             elif not firstclose and not insidecase and c == open:
 784 |                 count += 1
 785 | 
 786 |             ret += c
 787 | 
 788 |             if count == 0:
 789 |                 break
 790 | 
 791 |             if c == '\\':
 792 |                 passnextchar = True
 793 | 
 794 |             # bashlex/parse.y L3897
 795 |             if _shellquote(c):
 796 |                 self._push_delimiter(c)
 797 |                 try:
 798 |                     if wasdollar and c == "'":
 799 |                         nestret = self._parse_matched_pair(c, c, c,
 800 |                                                            allowesc=True,
 801 |                                                            dquote=True)
 802 |                     else:
 803 |                         nestret = self._parse_matched_pair(c, c, c,
 804 |                                                            dquote=True)
 805 |                 finally:
 806 |                     self._pop_delimiter()
 807 | 
 808 |                 # XXX is this necessary?
 809 |                 # if wasdollar and c == "'" and not rdquote:
 810 |                 #     if not rdquote:
 811 |                 #         nestret = shutils.single_quote(nestret)
 812 |                 #     ret = ret[:-2]
 813 |                 # elif wasdollar and c == '"' and not rdquote:
 814 |                 #     nestret = shutils.double_quote(nestret)
 815 |                 #     ret = ret[:-2]
 816 | 
 817 |                 ret += nestret
 818 |             # check for $(), $[], or ${} inside command substitution
 819 |             elif wasdollar and c in '({[':
 820 |                 if not insidecase and open == c:
 821 |                     count -= 1
 822 |                 if c == '(':
 823 |                     nestret = self._parse_comsub(None, '(', ')',
 824 |                                                  parsingcommand=True,
 825 |                                                  dquote=False)
 826 |                 elif c == '{':
 827 |                     nestret = self._parse_matched_pair(None, '{', '}',
 828 |                                                        firstclose=True,
 829 |                                                        dolbrace=True,
 830 |                                                        dquote=True)
 831 |                 elif c == '[':
 832 |                     nestret = self._parse_matched_pair(None, '[', ']',
 833 |                                                        dquote=True)
 834 | 
 835 |                 ret += nestret
 836 | 
 837 |             wasdollar = c == '$'
 838 | 
 839 |         return ret
 840 | 
 841 |     def _parse_matched_pair(self, doublequotes, open, close, parsingcommand=False, allowesc=False, dquote=False, firstclose=False, dolbrace=False, arraysub=False):
 842 |         count = 1
 843 |         dolbracestate = ''
 844 |         if dolbrace:
 845 |             dolbracestate = 'param'
 846 | 
 847 |         insidecomment = False
 848 |         lookforcomments = False
 849 |         sawdollar = False
 850 | 
 851 |         if parsingcommand and doublequotes not in "`'\"" and dquote:
 852 |             lookforcomments = True
 853 | 
 854 |         rdquote = True if doublequotes == '"' else dquote
 855 |         passnextchar = False
 856 |         startlineno = self._line_number
 857 | 
 858 |         ret = ''
 859 | 
 860 |         def handledollarword():
 861 |             if open == c:
 862 |                 count -= 1
 863 | 
 864 |             # bashlex/parse.y L3486
 865 |             if c == '(':
 866 |                 return self._parse_comsub(None, '(', ')',
 867 |                                           parsingcommand=True,
 868 |                                           dquote=False)
 869 |             elif c == '{':
 870 |                 return self._parse_matched_pair(None, '{', '}',
 871 |                                                 firstclose=True,
 872 |                                                 dquote=rdquote,
 873 |                                                 dolbrace=True)
 874 |             elif c == '[':
 875 |                 return self._parse_matched_pair(None, '[', ']', dquote=rdquote)
 876 |             else:
 877 |                 assert False # pragma: no cover
 878 | 
 879 |         while count:
 880 |             c = self._getc(doublequotes != "'" and not passnextchar)
 881 |             if c is None:
 882 |                 raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
 883 | 
 884 |             # bashlex/parse.y L3285
 885 |             # if c == '\n':
 886 |             #    continue
 887 | 
 888 |             if insidecomment:
 889 |                 ret += c
 890 |                 if c == '\n':
 891 |                     insidecomment = False
 892 |                 continue
 893 |             elif lookforcomments and not insidecomment and c == '#' and (not ret
 894 |                     or ret[-1] == '\n' or _shellblank(ret[-1])):
 895 |                 insidecomment = True
 896 | 
 897 |             # last char was backslash
 898 |             if passnextchar:
 899 |                 passnextchar = False
 900 |                 #if doublequotes != "'" and c == '\n':
 901 |                 #    if ret:
 902 |                 #        ret = ret[:-1]
 903 |                 #    continue
 904 |                 ret += c
 905 |                 continue
 906 |             elif c == close:
 907 |                 count -= 1
 908 |             elif open != close and sawdollar and open == '{' and c == open:
 909 |                 count += 1
 910 |             elif not firstclose and c == open:
 911 |                 count += 1
 912 | 
 913 |             ret += c
 914 |             if count == 0:
 915 |                 break
 916 | 
 917 |             if open == "'":
 918 |                 if allowesc and c == "\\":
 919 |                     passnextchar = True
 920 |                 continue
 921 |             if c == "\\":
 922 |                 passnextchar = True
 923 |             if dolbrace:
 924 |                 if dolbracestate == 'param':
 925 |                     if len(ret) > 1:
 926 |                         dd = {'%' : 'quote', '#' : 'quote', '/' : 'quote2', '^' : 'quote',
 927 |                                 ',' : 'quote'}
 928 |                         if c in dd:
 929 |                             dolbracestate = dd[c]
 930 |                     elif c in '#%^,~:-=?+/':
 931 |                         dolbracestate = 'op'
 932 |                 if dolbracestate == 'op' and c in '#%^,~:-=?+/':
 933 |                     dolbracestate = 'word'
 934 | 
 935 |             if dolbracestate not in 'quote2' and dquote and dolbrace and c == "'":
 936 |                 continue
 937 | 
 938 |             if open != close:
 939 |                 if _shellquote(c):
 940 |                     self._push_delimiter(c)
 941 |                     try:
 942 |                         if sawdollar and "'":
 943 |                             nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=True, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
 944 |                         else:
 945 |                             nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
 946 |                     finally:
 947 |                         self._pop_delimiter()
 948 | 
 949 |                     # bashlex/parse.y L3419
 950 |                     if sawdollar and c == "'":
 951 |                         pass
 952 |                     elif sawdollar and c == '"':
 953 |                         ret = ret[:-2] # back up before the $"
 954 | 
 955 |                     ret += nestret
 956 |                 elif arraysub and sawdollar and c in '({[':
 957 |                     # goto parse_dollar_word
 958 |                     ret += handledollarword()
 959 |             elif open == '"' and c == '`':
 960 |                 ret += self._parse_matched_pair(None, '`', '`', parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
 961 |             elif open != '`' and sawdollar and c in '({[':
 962 |                 ret += handledollarword()
 963 | 
 964 |             sawdollar = c == '$'
 965 | 
 966 |         return ret
 967 | 
 968 | 
 969 |     def _is_assignment(self, value, iscompassign):
 970 |         c = value[0]
 971 | 
 972 |         def legalvariablechar(x):
 973 |             return x.isalnum() or x == '_'
 974 | 
 975 |         if not c.isalpha() and c != '_':
 976 |             return
 977 | 
 978 |         for i, c in enumerate(value):
 979 |             if c == '=':
 980 |                 return i
 981 | 
 982 |             # bash/general.c L289
 983 |             if c == '+' and i + 1 < len(value) and value[i+1] == '=':
 984 |                 return i+1
 985 | 
 986 |             if not legalvariablechar(c):
 987 |                 return False
 988 | 
 989 |     def _command_token_position(self, token):
 990 |         return (token.ttype == tokentype.ASSIGNMENT_WORD or
 991 |                 self._parserstate & parserflags.REDIRLIST or
 992 |                 (token.ttype not in (tokentype.SEMI_SEMI, tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND) and self._reserved_word_acceptable(token)))
 993 | 
 994 |     def _assignment_acceptable(self, token):
 995 |         return self._command_token_position(token) and not self._parserstate & parserflags.CASEPAT
 996 | 
 997 |     def _time_command_acceptable(self):
 998 |         pass
 999 | 
1000 |     def _reserved_word_acceptable(self, tok):
1001 |         if not tok or (tok.ttype in _reserved or tok.value in _reserved):
1002 |             return True
1003 |         # bash/parse.y L4955 cOPROCESS_SUPPORT
1004 | 
1005 |         if (self._last_read_token.ttype == tokentype.WORD and
1006 |             self._token_before_that.ttype == tokentype.FUNCTION):
1007 |             return True
1008 | 
1009 |         return False
1010 | 
1011 |     def _pop_delimiter(self):
1012 |         self._dstack.pop()
1013 | 
1014 |     def _push_delimiter(self, c):
1015 |         self._dstack.append(c)
1016 | 
1017 |     def _current_delimiter(self):
1018 |         if self._dstack:
1019 |             return self._dstack[-1]
1020 | 
1021 |     def _ungetc(self, c):
1022 |         if (self._shell_input_line and self._shell_input_line_index
1023 |             and self._shell_input_line_index <= len(self._shell_input_line)):
1024 |             self._shell_input_line_index -= 1
1025 |         else:
1026 |             self._eol_ungetc_lookahead = c
1027 | 
1028 |     def _getc(self, remove_quoted_newline=True):
1029 |         if self._eol_ungetc_lookahead is not None:
1030 |             c = self._eol_ungetc_lookahead
1031 |             self._eol_ungetc_lookahead = None
1032 |             return c
1033 | 
1034 |         # bash/parse.y L2220
1035 | 
1036 |         while True:
1037 |             if self._shell_input_line_index < len(self._shell_input_line):
1038 |                 c = self._shell_input_line[self._shell_input_line_index]
1039 |                 self._shell_input_line_index += 1
1040 |             else:
1041 |                 c = None
1042 | 
1043 |             if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n':
1044 |                 self._line_number += 1
1045 |                 # skip past the newline
1046 |                 self._shell_input_line_index += 1
1047 |                 continue
1048 |             else:
1049 |                 return c
1050 | 
1051 |             #if c is None and self._shell_input_line_terminator is None:
1052 |             #    if self._shell_input_line_index != 0:
1053 |             #        return '\n'
1054 |             #    else:
1055 |             #        return None
1056 | 
1057 |             #return c
1058 | 
1059 |     def _discard_until(self, character):
1060 |         c = self._getc(False)
1061 |         while c is not None and c != character:
1062 |             c = self._getc(False)
1063 |         if c is not None:
1064 |             self._ungetc(c)
1065 | 
1066 |     def _recordpos(self, relativeoffset=0):
1067 |         '''record the current index of the tokenizer into the positions stack
1068 |         while adding relativeoffset from it'''
1069 |         self._positions.append(self._shell_input_line_index - relativeoffset)
1070 | 
1071 |     def readline(self, removequotenewline):
1072 |         linebuffer = []
1073 |         passnext = indx = 0
1074 |         while True:
1075 |             c = self._getc()
1076 |             if c is None:
1077 |                 if indx == 0:
1078 |                     return None
1079 |                 c = '\n'
1080 | 
1081 |             if passnext:
1082 |                 linebuffer.append(c)
1083 |                 indx += 1
1084 |                 passnext = False
1085 |             elif c == '\\' and removequotenewline:
1086 |                 peekc = self._getc()
1087 |                 if peekc == '\n':
1088 |                     self._line_number += 1
1089 |                     continue
1090 |                 else:
1091 |                     self._ungetc(peekc)
1092 |                     passnext = True
1093 |                     linebuffer.append(c)
1094 |                     indx += 1
1095 |             else:
1096 |                 linebuffer.append(c)
1097 |                 indx += 1
1098 | 
1099 |             if c == '\n':
1100 |                 return ''.join(linebuffer)
1101 | 
1102 |     def _peekc(self, *args):
1103 |         peek_char = self._getc(*args)
1104 |         # only unget if we actually read something
1105 |         if peek_char is not None:
1106 |             self._ungetc(peek_char)
1107 |         return peek_char
1108 | 
1109 |     def _specialcasetokens(self, tokstr):
1110 |         if (self._last_read_token.ttype == tokentype.WORD and
1111 |             self._token_before_that.ttype in (tokentype.FOR,
1112 |                                               tokentype.CASE,
1113 |                                               tokentype.SELECT) and
1114 |             tokstr == 'in'):
1115 |                 if self._token_before_that.ttype == tokentype.CASE:
1116 |                     self._parserstate.add(parserflags.CASEPAT)
1117 |                     self._esacs_needed_count += 1
1118 |                 return tokentype.IN
1119 | 
1120 |         if (self._last_read_token.ttype == tokentype.WORD and
1121 |             self._token_before_that.ttype in (tokentype.FOR, tokentype.SELECT) and
1122 |             tokstr == 'do'):
1123 |             return tokentype.DO
1124 | 
1125 |         if self._esacs_needed_count:
1126 |             self._esacs_needed_count -= 1
1127 |             if tokstr == 'esac':
1128 |                 self._parserstate.discard(parserflags.CASEPAT)
1129 |                 return tokentype.ESAC
1130 | 
1131 |         if self._parserstate & parserflags.ALLOWOPNBRC:
1132 |             self._parserstate.discard(parserflags.ALLOWOPNBRC)
1133 |             if tokstr == '{':
1134 |                 self._open_brace_count += 1
1135 |                 # bash/parse.y L2887
1136 |                 return tokentype.LEFT_CURLY
1137 | 
1138 |         if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
1139 |             tokstr == 'do'):
1140 |             return tokentype.DO
1141 | 
1142 |         if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
1143 |             tokstr == '{'):
1144 |             self._open_brace_count += 1
1145 |             return tokentype.LEFT_CURLY
1146 | 
1147 |         if (self._open_brace_count and
1148 |             self._reserved_word_acceptable(self._last_read_token) and
1149 |             tokstr == '}'):
1150 |             self._open_brace_count -= 1
1151 |             return tokentype.RIGHT_CURLY
1152 | 
1153 |         if self._last_read_token.ttype == tokentype.TIME and tokstr == '-p':
1154 |             return tokentype.TIMEOPT
1155 | 
1156 |         if self._last_read_token.ttype == tokentype.TIMEOPT and tokstr == '--':
1157 |             return tokentype.TIMEIGN
1158 | 
1159 |         if self._parserstate & parserflags.CONDEXPR and tokstr == ']]':
1160 |             return tokentype.COND_END
1161 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
   1 | import unittest, functools
   2 | 
   3 | from bashlex import parser, state, flags, ast, errors
   4 | 
   5 | parse = functools.partial(parser.parse, convertpos=True)
   6 | 
   7 | def reservedwordnode(word, s):
   8 |     return ast.node(kind='reservedword', word=word, s=s)
   9 | 
  10 | def commandnode(s, *parts):
  11 |     return ast.node(kind='command', s=s, parts=list(parts))
  12 | 
  13 | def wordnode(word, s=None, parts=None):
  14 |     if s is None:
  15 |         s = word
  16 |     if parts is None:
  17 |         parts = []
  18 |     return ast.node(kind='word', word=word, s=s, parts=list(parts))
  19 | 
  20 | def assignmentnode(word, s=None, parts=None):
  21 |     node = wordnode(word, s, parts)
  22 |     node.kind = 'assignment'
  23 |     return node
  24 | 
  25 | def parameternode(value, s):
  26 |     return ast.node(kind='parameter', value=value, s=s)
  27 | 
  28 | def heredocnode(value, s=None):
  29 |     if s is None:
  30 |         s = value
  31 |     return ast.node(kind='heredoc', value=value, s=s)
  32 | 
  33 | def tildenode(value, s):
  34 |     return ast.node(kind='tilde', value=value, s=s)
  35 | 
  36 | def redirectnode(s, input, type, output, heredoc=None):
  37 |     return ast.node(kind='redirect', input=input, type=type, output=output,
  38 |                     heredoc=heredoc, s=s)
  39 | 
  40 | def pipenode(pipe, s):
  41 |     return ast.node(kind='pipe', pipe=pipe, s=s)
  42 | 
  43 | def pipelinenode(s, *parts):
  44 |     oldparts = parts
  45 |     if parts[0].kind == 'reservedword' and parts[0].word == '!':
  46 |         parts = parts[1:]
  47 |     for i in range(len(parts)):
  48 |         if i % 2 == 0:
  49 |             assert parts[i].kind in ('command', 'compound'), parts[i].kind
  50 |         else:
  51 |             assert parts[i].kind == 'pipe', parts[i].kind
  52 |     return ast.node(kind='pipeline', s=s, parts=list(oldparts))
  53 | 
  54 | def operatornode(op, s):
  55 |     return ast.node(kind='operator', op=op, s=s)
  56 | 
  57 | def listnode(s, *parts):
  58 |     for i in range(len(parts)):
  59 |         if i % 2 == 0:
  60 |             assert parts[i].kind in ('command', 'pipeline', 'compound'), parts[i].kind
  61 |         else:
  62 |             assert parts[i].kind == 'operator', parts[i].kind
  63 |     return ast.node(kind='list', parts=list(parts), s=s)
  64 | 
  65 | def compoundnode(s, *parts, **kwargs):
  66 |     redirects = kwargs.pop('redirects', [])
  67 |     assert not kwargs
  68 |     return ast.node(kind='compound', s=s, list=list(parts), redirects=redirects)
  69 | 
  70 | def procsubnode(s, command):
  71 |     return ast.node(kind='processsubstitution', s=s, command=command)
  72 | 
  73 | def comsubnode(s, command):
  74 |     return ast.node(kind='commandsubstitution', s=s, command=command)
  75 | 
  76 | def ifnode(s, *parts):
  77 |     return ast.node(kind='if', parts=list(parts), s=s)
  78 | 
  79 | def fornode(s, *parts):
  80 |     return ast.node(kind='for', parts=list(parts), s=s)
  81 | 
  82 | def whilenode(s, *parts):
  83 |     return ast.node(kind='while', parts=list(parts), s=s)
  84 | 
  85 | def casenode(s, *parts):
  86 |     return ast.node(kind='case', parts=list(parts), s=s)
  87 | 
  88 | def patternnode(s, *parts):
  89 |     return ast.node(kind='pattern', parts=list(parts), s=s)
  90 | 
  91 | def functionnode(s, name, body, *parts):
  92 |     return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s)
  93 | 
  94 | def unimplementednode(s, *parts):
  95 |     return ast.node(kind='unimplemented', parts=list(parts), s=s)
  96 | 
  97 | class test_parser(unittest.TestCase):
  98 | 
  99 |     def setUp(self):
 100 |         if not hasattr(self, 'assertRaisesRegex'):
 101 |             self.assertRaisesRegex = self.assertRaisesRegexp
 102 | 
 103 |     def assertASTEquals(self, s, expected, **parserargs):
 104 |         results = parse(s, **parserargs)
 105 |         self.assertTrue(len(results) == 1, 'expected one ast from parse(), '
 106 |                         'got %d' % len(results))
 107 |         result = results[0]
 108 | 
 109 |         # make sure our words are not empty
 110 |         class nullopvisitor(ast.nodevisitor):
 111 |             def visitword(_, node, word):
 112 |                 self.assertTrue(word, 'node %r has no word' % node)
 113 | 
 114 |         nullopvisitor().visit(result)
 115 | 
 116 |         msg = 'ASTs not equal for %r\n\nresult:\n\n%s\n\n!=\n\nexpected:\n\n%s' % (s, result.dump(), expected.dump())
 117 |         self.assertEqual(result, expected, msg)
 118 | 
 119 |     def assertASTsEquals(self, s, expectedlist, **parserargs):
 120 |         results = parse(s, **parserargs)
 121 |         self.assertEqual(len(results), len(expectedlist),
 122 |                           'mismatch on ASTs length')
 123 | 
 124 |         for result, expected in zip(results, expectedlist):
 125 |             msg = 'ASTs not equal for %r\n\nresult:\n\n%s\n\n!=\n\nexpected:\n\n%s' % (s, result.dump(), expected.dump())
 126 |             self.assertEqual(result, expected, msg)
 127 | 
 128 |     def test_command(self):
 129 |         s = 'a b c'
 130 |         self.assertASTEquals(s,
 131 |                 commandnode(s,
 132 |                   wordnode('a'),
 133 |                   wordnode('b'),
 134 |                   wordnode('c')))
 135 | 
 136 |         s = 'a b "c"'
 137 |         self.assertASTEquals(s,
 138 |                 commandnode(s,
 139 |                   wordnode('a'),
 140 |                   wordnode('b'),
 141 |                   wordnode('c', '"c"')))
 142 | 
 143 |         s = '2>/dev/null a b "c"'
 144 |         self.assertASTEquals(s,
 145 |                 commandnode(s,
 146 |                   redirectnode('2>/dev/null', 2, '>', wordnode('/dev/null')),
 147 |                   wordnode('a'),
 148 |                   wordnode('b'),
 149 |                   wordnode('c', '"c"')))
 150 | 
 151 |         s = 'a b>&1 2>&1'
 152 |         self.assertASTEquals(s,
 153 |                 commandnode(s,
 154 |                   wordnode('a'),
 155 |                   wordnode('b'),
 156 |                   redirectnode('>&1', None, '>&', 1),
 157 |                   redirectnode('2>&1', 2, '>&', 1)))
 158 | 
 159 |     def test_multiline(self):
 160 |         s = 'a\nb'
 161 |         self.assertASTsEquals(s, [
 162 |                               commandnode('a',
 163 |                                 wordnode('a')),
 164 |                               commandnode('b',
 165 |                                 wordnode('b'))
 166 |                               ])
 167 | 
 168 |     def test_pipeline(self):
 169 |         s = 'a | b'
 170 |         self.assertASTEquals(s,
 171 |                           pipelinenode(s,
 172 |                             commandnode('a', wordnode('a')),
 173 |                             pipenode('|', '|'),
 174 |                             commandnode('b', wordnode('b'))))
 175 | 
 176 |         s = '! a | b'
 177 |         self.assertASTEquals(s,
 178 |                           pipelinenode(s,
 179 |                             reservedwordnode('!', '!'),
 180 |                             commandnode('a', wordnode('a')),
 181 |                             pipenode('|', '|'),
 182 |                             commandnode('b', wordnode('b'))
 183 |                           ))
 184 | 
 185 |     def test_list(self):
 186 |         s = 'a;'
 187 |         self.assertASTEquals(s,
 188 |                           listnode(s,
 189 |                             commandnode('a', wordnode('a')),
 190 |                             operatornode(';', ';'),
 191 |                           ))
 192 | 
 193 |         s = 'a && b'
 194 |         self.assertASTEquals(s,
 195 |                           listnode(s,
 196 |                             commandnode('a', wordnode('a')),
 197 |                             operatornode('&&', '&&'),
 198 |                             commandnode('b', wordnode('b'))
 199 |                           ))
 200 | 
 201 |         s = 'a; b; c& d'
 202 |         self.assertASTEquals(s,
 203 |                           listnode(s,
 204 |                             commandnode('a', wordnode('a')),
 205 |                             operatornode(';', ';'),
 206 |                             commandnode('b', wordnode('b')),
 207 |                             operatornode(';', ';'),
 208 |                             commandnode('c', wordnode('c')),
 209 |                             operatornode('&', '&'),
 210 |                             commandnode('d', wordnode('d'))
 211 |                           ))
 212 | 
 213 |         s = 'a | b && c'
 214 |         self.assertASTEquals(s,
 215 |                           listnode(s,
 216 |                             pipelinenode('a | b',
 217 |                               commandnode('a', wordnode('a')),
 218 |                               pipenode('|', '|'),
 219 |                               commandnode('b', wordnode('b'))),
 220 |                             operatornode('&&', '&&'),
 221 |                             commandnode('c', wordnode('c'))
 222 |                           ))
 223 | 
 224 |     def test_nestedsubs(self):
 225 |         s = '$($<$(a) b)'
 226 |         self.assertASTEquals(s,
 227 |             commandnode(s,
 228 |               wordnode(s, s, [
 229 |                 comsubnode(s,
 230 |                     commandnode('$<$(a) b',
 231 |                         wordnode('$'),
 232 |                         redirectnode('<$(a)', None, '<',
 233 |                           wordnode('$(a)', '$(a)', [
 234 |                             comsubnode('$(a)',
 235 |                                 commandnode('a',
 236 |                                     wordnode('a'))
 237 |                             )
 238 |                           ])
 239 |                         ),
 240 |                         wordnode('b'),
 241 |                     )
 242 |                 )
 243 |               ])
 244 |             )
 245 |         )
 246 | 
 247 |     def test_paramexpand(self):
 248 |         s = 'a $1 $foo_bar "$@ $#" ~foo " ~bar" ${a} "${}"'
 249 |         self.assertASTEquals(s,
 250 |                 commandnode(s,
 251 |                   wordnode('a'),
 252 |                   wordnode('$1', '$1', [
 253 |                     parameternode('1', '$1'),
 254 |                   ]),
 255 |                   wordnode('$foo_bar', '$foo_bar', [
 256 |                     parameternode('foo_bar', '$foo_bar'),
 257 |                   ]),
 258 |                   wordnode('$@ $#', '"$@ $#"', [
 259 |                       parameternode('@', '$@'),
 260 |                       parameternode('#', '$#')
 261 |                   ]),
 262 |                   wordnode('~foo', '~foo', [
 263 |                     tildenode('~foo', '~foo'),
 264 |                   ]),
 265 |                   wordnode(' ~bar', '" ~bar"'),
 266 |                   wordnode('${a}', '${a}', [
 267 |                     parameternode('a', '${a}'),
 268 |                   ]),
 269 |                   wordnode('${}', '"${}"', [
 270 |                       parameternode('', '${}'),
 271 |                   ]),
 272 |                 )
 273 |               )
 274 | 
 275 |     def test_processsub(self):
 276 |         s = 'a <(b $(c))'
 277 |         self.assertASTEquals(s,
 278 |             commandnode(s,
 279 |               wordnode('a'),
 280 |               wordnode('<(b $(c))', '<(b $(c))', [
 281 |                 procsubnode('<(b $(c))',
 282 |                   commandnode('b $(c)',
 283 |                     wordnode('b'),
 284 |                     wordnode('$(c)', '$(c)', [
 285 |                       comsubnode('$(c)',
 286 |                           commandnode('c',
 287 |                               wordnode('c'))
 288 |                       )]
 289 |                     )
 290 |                   )
 291 |                 )
 292 |               ])
 293 |             )
 294 |         )
 295 | 
 296 |         s = 'a `b` "`c`" \'`c`\''
 297 |         self.assertASTEquals(s,
 298 |             commandnode(s,
 299 |                 wordnode('a'),
 300 |                 wordnode('`b`', '`b`', [
 301 |                     comsubnode('`b`',
 302 |                         commandnode('b',
 303 |                             wordnode('b'))
 304 |                     ),
 305 |                 ]),
 306 |                 wordnode('`c`', '"`c`"', [
 307 |                     comsubnode('`c`',
 308 |                         commandnode('c',
 309 |                             wordnode('c'))
 310 |                     ),
 311 |                 ]),
 312 |                 wordnode('`c`', "'`c`'")
 313 |             )
 314 |         )
 315 | 
 316 |     def test_error(self):
 317 |         self.assertRaises(errors.ParsingError, parse, 'a))')
 318 | 
 319 |     def test_redirection_input(self):
 320 |         s = 'a <f'
 321 |         self.assertASTEquals(s,
 322 |                 commandnode(s,
 323 |                   wordnode('a'),
 324 |                   redirectnode('<f', None, '<', wordnode('f'))))
 325 | 
 326 |         s = 'a <1'
 327 |         self.assertASTEquals(s,
 328 |                 commandnode(s,
 329 |                   wordnode('a'),
 330 |                   redirectnode('<1', None, '<', wordnode('1'))))
 331 | 
 332 |         s = 'a 1<f'
 333 |         self.assertASTEquals(s,
 334 |                 commandnode(s,
 335 |                   wordnode('a'),
 336 |                   redirectnode('1<f', 1, '<', wordnode('f'))))
 337 | 
 338 |         s = 'a 1 <f'
 339 |         self.assertASTEquals(s,
 340 |                 commandnode(s,
 341 |                   wordnode('a'),
 342 |                   wordnode('1'),
 343 |                   redirectnode('<f', None, '<', wordnode('f'))))
 344 | 
 345 |         s = 'a b<f'
 346 |         self.assertASTEquals(s,
 347 |                 commandnode(s,
 348 |                   wordnode('a'),
 349 |                   wordnode('b'),
 350 |                   redirectnode('<f', None, '<', wordnode('f'))))
 351 | 
 352 |         s = 'a 0<&3'
 353 |         self.assertASTEquals(s,
 354 |                 commandnode(s,
 355 |                   wordnode('a'),
 356 |                   redirectnode('0<&3', 0, '<&', 3)))
 357 | 
 358 |     def test_compound(self):
 359 |         s = '(a) && (b)'
 360 |         self.assertASTEquals(s,
 361 |                           listnode('(a) && (b)',
 362 |                             compoundnode('(a)',
 363 |                               reservedwordnode('(', '('),
 364 |                               commandnode('a',
 365 |                                 wordnode('a')),
 366 |                               reservedwordnode(')', ')'),
 367 |                               ),
 368 |                             operatornode('&&', '&&'),
 369 |                             compoundnode('(b)',
 370 |                               reservedwordnode('(', '('),
 371 |                               commandnode('b',
 372 |                                 wordnode('b')),
 373 |                               reservedwordnode(')', ')'),
 374 |                               ),
 375 |                           ))
 376 | 
 377 |         s = '(a) | (b)'
 378 |         self.assertASTEquals(s,
 379 |                           pipelinenode(s,
 380 |                             compoundnode('(a)',
 381 |                               reservedwordnode('(', '('),
 382 |                               commandnode('a',
 383 |                                 wordnode('a')),
 384 |                               reservedwordnode(')', ')')),
 385 |                             pipenode('|', '|'),
 386 |                             compoundnode('(b)',
 387 |                               reservedwordnode('(', '('),
 388 |                               commandnode('b',
 389 |                                 wordnode('b')),
 390 |                               reservedwordnode(')', ')')),
 391 |                           ))
 392 | 
 393 |         s = '(a) | (b) > /dev/null'
 394 |         self.assertASTEquals(s,
 395 |                           pipelinenode(s,
 396 |                             compoundnode('(a)',
 397 |                               reservedwordnode('(', '('),
 398 |                               commandnode('a',
 399 |                                 wordnode('a')),
 400 |                               reservedwordnode(')', ')')),
 401 |                             pipenode('|', '|'),
 402 |                             compoundnode('(b) > /dev/null',
 403 |                               reservedwordnode('(', '('),
 404 |                               commandnode('b',
 405 |                                 wordnode('b')),
 406 |                               reservedwordnode(')', ')'),
 407 |                               redirects=[
 408 |                                 redirectnode('> /dev/null', None, '>',
 409 |                                   wordnode('/dev/null'))]),
 410 |                           ))
 411 | 
 412 |         s = '(a && (b; c&)) || d'
 413 |         self.assertASTEquals(s,
 414 |                 listnode(s,
 415 |                   compoundnode('(a && (b; c&))',
 416 |                     reservedwordnode('(', '('),
 417 |                     listnode('a && (b; c&)',
 418 |                       commandnode('a',
 419 |                         wordnode('a')),
 420 |                       operatornode('&&', '&&'),
 421 |                       compoundnode('(b; c&)',
 422 |                         reservedwordnode('(', '('),
 423 |                         listnode('b; c&',
 424 |                           commandnode('b',
 425 |                             wordnode('b')),
 426 |                           operatornode(';', ';'),
 427 |                           commandnode('c',
 428 |                             wordnode('c')),
 429 |                           operatornode('&', '&')
 430 |                         ),
 431 |                         reservedwordnode(')', ')'),
 432 |                       ),
 433 |                     ),
 434 |                     reservedwordnode(')', ')'),
 435 |                   ),
 436 |                   operatornode('||', '||'),
 437 |                   commandnode('d',
 438 |                     wordnode('d')),
 439 |                 ))
 440 | 
 441 |     def test_compound_redirection(self):
 442 |         s = '(a) > /dev/null'
 443 |         self.assertASTEquals(s,
 444 |                 compoundnode(s,
 445 |                   reservedwordnode('(', '('),
 446 |                   commandnode('a',
 447 |                     wordnode('a')),
 448 |                   reservedwordnode(')', ')'),
 449 |                   redirects=[redirectnode('> /dev/null', None, '>', wordnode('/dev/null'))]
 450 |                 ))
 451 | 
 452 |     def test_compound_pipe(self):
 453 |         s = '(a) | b'
 454 |         self.assertASTEquals(s,
 455 |                 pipelinenode(s,
 456 |                   compoundnode('(a)',
 457 |                     reservedwordnode('(', '('),
 458 |                     commandnode('a',
 459 |                       wordnode('a')),
 460 |                     reservedwordnode(')', ')'),
 461 |                   ),
 462 |                   pipenode('|', '|'),
 463 |                   commandnode('b',
 464 |                     wordnode('b'))
 465 |                 ))
 466 | 
 467 |     def test_group(self):
 468 |         # reserved words are recognized only at the start of a simple command
 469 |         s = 'echo {}'
 470 |         self.assertASTEquals(s,
 471 |                           commandnode(s,
 472 |                             wordnode('echo'), wordnode('{}'))
 473 |                           )
 474 | 
 475 |         # reserved word at beginning isn't reserved if quoted
 476 |         s = "'{' foo"
 477 |         self.assertASTEquals(s,
 478 |                           commandnode(s,
 479 |                             wordnode('{', "'{'"), wordnode('foo'))
 480 |                           )
 481 | 
 482 |         s = '{ a; }'
 483 |         self.assertASTEquals(s,
 484 |                           compoundnode(s,
 485 |                             reservedwordnode('{', '{'),
 486 |                             listnode('a;',
 487 |                               commandnode('a', wordnode('a')),
 488 |                               operatornode(';', ';'),
 489 |                             ),
 490 |                             reservedwordnode('}', '}'),
 491 |                           ))
 492 | 
 493 |         s = '{ a; b; }'
 494 |         self.assertASTEquals(s,
 495 |                           compoundnode(s,
 496 |                             reservedwordnode('{', '{'),
 497 |                             listnode('a; b;',
 498 |                               commandnode('a', wordnode('a')),
 499 |                               operatornode(';', ';'),
 500 |                               commandnode('b', wordnode('b')),
 501 |                               operatornode(';', ';')
 502 |                             ),
 503 |                             reservedwordnode('}', '}'),
 504 |                           ))
 505 | 
 506 |         s = '(a) && { b; }'
 507 |         self.assertASTEquals(s,
 508 |                           listnode('(a) && { b; }',
 509 |                             compoundnode('(a)',
 510 |                               reservedwordnode('(', '('),
 511 |                               commandnode('a',
 512 |                                 wordnode('a')),
 513 |                               reservedwordnode(')', ')')),
 514 |                             operatornode('&&', '&&'),
 515 |                             compoundnode('{ b; }',
 516 |                               reservedwordnode('{', '{'),
 517 |                               listnode('b;',
 518 |                                 commandnode('b',
 519 |                                   wordnode('b')),
 520 |                                 operatornode(';', ';')),
 521 |                               reservedwordnode('}', '}'),
 522 |                               )
 523 |                           ))
 524 | 
 525 |         s = 'a; ! { b; }'
 526 |         self.assertASTEquals(s,
 527 |                           listnode(s,
 528 |                             commandnode('a', wordnode('a')),
 529 |                             operatornode(';', ';'),
 530 |                               pipelinenode('! { b; }',
 531 |                                 reservedwordnode('!', '!'),
 532 |                                 compoundnode('{ b; }',
 533 |                                   reservedwordnode('{', '{'),
 534 |                                   listnode('b;',
 535 |                                     commandnode('b', wordnode('b')),
 536 |                                     operatornode(';', ';'),
 537 |                                   ),
 538 |                                   reservedwordnode('}', '}'),
 539 |                                 )
 540 |                               )
 541 |                           ))
 542 | 
 543 |     def test_invalid_control(self):
 544 |         s = 'a &| b'
 545 |         self.assertRaisesRegex(errors.ParsingError, "unexpected token '|'.*position 3", parse, s)
 546 | 
 547 |     def test_invalid_redirect(self):
 548 |         s = 'a 2>'
 549 |         self.assertRaisesRegex(errors.ParsingError, r"unexpected token '\\n'.*position 4", parse, s)
 550 | 
 551 |         s = 'ssh -p 2222 <user>@<host>'
 552 |         self.assertRaisesRegex(errors.ParsingError, r"unexpected token '\\n'.*position %d" % len(s), parse, s)
 553 | 
 554 |     def test_if_redirection(self):
 555 |         s = 'if foo; then bar; fi >/dev/null'
 556 |         self.assertASTEquals(s,
 557 |                           compoundnode(s,
 558 |                             ifnode('if foo; then bar; fi',
 559 |                               reservedwordnode('if', 'if'),
 560 |                               listnode('foo;',
 561 |                                 commandnode('foo', wordnode('foo')),
 562 |                                 operatornode(';', ';')),
 563 |                               reservedwordnode('then', 'then'),
 564 |                               listnode('bar;',
 565 |                                 commandnode('bar', wordnode('bar')),
 566 |                                 operatornode(';', ';')),
 567 |                               reservedwordnode('fi', 'fi'),
 568 |                             ),
 569 |                             redirects=[
 570 |                               redirectnode('>/dev/null', None, '>',
 571 |                                 wordnode('/dev/null'))
 572 |                           ])
 573 |                         )
 574 | 
 575 |     def test_if(self):
 576 |         s = 'if foo; then bar; fi'
 577 |         self.assertASTEquals(s,
 578 |                           compoundnode(s,
 579 |                             ifnode(s,
 580 |                               reservedwordnode('if', 'if'),
 581 |                               listnode('foo;',
 582 |                                 commandnode('foo', wordnode('foo')),
 583 |                                 operatornode(';', ';')),
 584 |                               reservedwordnode('then', 'then'),
 585 |                               listnode('bar;',
 586 |                                 commandnode('bar', wordnode('bar')),
 587 |                                 operatornode(';', ';')),
 588 |                               reservedwordnode('fi', 'fi'),
 589 |                             ))
 590 |                           )
 591 | 
 592 |         s = 'if foo; bar; then baz; fi'
 593 |         self.assertASTEquals(s,
 594 |                           compoundnode(s,
 595 |                             ifnode(s,
 596 |                               reservedwordnode('if', 'if'),
 597 |                               listnode('foo; bar;',
 598 |                                 commandnode('foo', wordnode('foo')),
 599 |                                 operatornode(';', ';'),
 600 |                                 commandnode('bar', wordnode('bar')),
 601 |                                 operatornode(';', ';')),
 602 |                               reservedwordnode('then', 'then'),
 603 |                               listnode('baz;',
 604 |                                 commandnode('baz', wordnode('baz')),
 605 |                                 operatornode(';', ';')),
 606 |                               reservedwordnode('fi', 'fi'),
 607 |                             ))
 608 |                           )
 609 | 
 610 |         s = 'if foo; then bar; else baz; fi'
 611 |         self.assertASTEquals(s,
 612 |                           compoundnode(s,
 613 |                             ifnode(s,
 614 |                               reservedwordnode('if', 'if'),
 615 |                               listnode('foo;',
 616 |                                 commandnode('foo', wordnode('foo')),
 617 |                                 operatornode(';', ';')),
 618 |                               reservedwordnode('then', 'then'),
 619 |                               listnode('bar;',
 620 |                                 commandnode('bar', wordnode('bar')),
 621 |                                 operatornode(';', ';')),
 622 |                               reservedwordnode('else', 'else'),
 623 |                               listnode('baz;',
 624 |                                 commandnode('baz', wordnode('baz')),
 625 |                                 operatornode(';', ';')),
 626 |                               reservedwordnode('fi', 'fi'),
 627 |                               ))
 628 |                           )
 629 | 
 630 |         s = 'if foo; then bar; elif baz; then barbaz; fi'
 631 |         self.assertASTEquals(s,
 632 |                           compoundnode(s,
 633 |                             ifnode(s,
 634 |                               reservedwordnode('if', 'if'),
 635 |                               listnode('foo;',
 636 |                                 commandnode('foo', wordnode('foo')),
 637 |                                 operatornode(';', ';')),
 638 |                               reservedwordnode('then', 'then'),
 639 |                               listnode('bar;',
 640 |                                 commandnode('bar', wordnode('bar')),
 641 |                                 operatornode(';', ';')),
 642 |                               reservedwordnode('elif', 'elif'),
 643 |                               listnode('baz;',
 644 |                                 commandnode('baz', wordnode('baz')),
 645 |                                 operatornode(';', ';')),
 646 |                               reservedwordnode('then', 'then'),
 647 |                               listnode('barbaz;',
 648 |                                 commandnode('barbaz', wordnode('barbaz')),
 649 |                                 operatornode(';', ';')),
 650 |                               reservedwordnode('fi', 'fi'),
 651 |                               ))
 652 |                           )
 653 | 
 654 |         s = 'if foo; then bar; elif baz; then barbaz; else foobar; fi'
 655 |         self.assertASTEquals(s,
 656 |                           compoundnode(s,
 657 |                             ifnode(s,
 658 |                               reservedwordnode('if', 'if'),
 659 |                               listnode('foo;',
 660 |                                 commandnode('foo', wordnode('foo')),
 661 |                                 operatornode(';', ';')),
 662 |                               reservedwordnode('then', 'then'),
 663 |                               listnode('bar;',
 664 |                                 commandnode('bar', wordnode('bar')),
 665 |                                 operatornode(';', ';')),
 666 |                               reservedwordnode('elif', 'elif'),
 667 |                               listnode('baz;',
 668 |                                 commandnode('baz', wordnode('baz')),
 669 |                                 operatornode(';', ';')),
 670 |                               reservedwordnode('then', 'then'),
 671 |                               listnode('barbaz;',
 672 |                                 commandnode('barbaz', wordnode('barbaz')),
 673 |                                 operatornode(';', ';')),
 674 |                               reservedwordnode('else', 'else'),
 675 |                               listnode('foobar;',
 676 |                                 commandnode('foobar', wordnode('foobar')),
 677 |                                 operatornode(';', ';')),
 678 |                               reservedwordnode('fi', 'fi'),
 679 |                               ))
 680 |                           )
 681 | 
 682 |     def test_malformed_if(self):
 683 |         s = 'if foo; bar; fi'
 684 |         self.assertRaisesRegex(errors.ParsingError, "unexpected token 'fi'.*position 13", parse, s)
 685 | 
 686 |         s = 'if foo; then bar;'
 687 |         self.assertRaisesRegex(errors.ParsingError, "unexpected EOF.*position 17", parse, s)
 688 | 
 689 |         s = 'if foo; then bar; elif baz; fi'
 690 |         self.assertRaisesRegex(errors.ParsingError, "unexpected token 'fi'.*position 28", parse, s)
 691 | 
 692 |     def test_word_expansion(self):
 693 |         s = "'a' ' b' \"'c'\""
 694 |         self.assertASTEquals(s,
 695 |                 commandnode(s,
 696 |                   wordnode('a', "'a'"),
 697 |                   wordnode(' b', "' b'"),
 698 |                   wordnode("'c'", "\"'c'\"")))
 699 | 
 700 |         s = '"a\'b"'
 701 |         self.assertASTEquals(s,
 702 |                 commandnode(s,
 703 |                   wordnode("a'b", s)))
 704 | 
 705 |         s = 'a"b"\'c\'d'
 706 |         self.assertASTEquals(s,
 707 |                 commandnode(s,
 708 |                   wordnode("abcd", s)))
 709 | 
 710 |         s = "'$(a)' \"$(b)\""
 711 |         self.assertASTEquals(s,
 712 |                 commandnode(s,
 713 |                   wordnode("$(a)", "'$(a)'"),
 714 |                   wordnode("$(b)", '"$(b)"', [
 715 |                       comsubnode("$(b)",
 716 |                         commandnode("b", wordnode("b"))
 717 |                       )
 718 |                   ])))
 719 | 
 720 |         s = "\"$(a \"b\" 'c')\" '$(a \"b\" 'c')'"
 721 |         self.assertASTEquals(s,
 722 |                 commandnode(s,
 723 |                   wordnode("$(a \"b\" 'c')", "\"$(a \"b\" 'c')\"", [
 724 |                       comsubnode("$(a \"b\" 'c')",
 725 |                           commandnode("a \"b\" 'c'",
 726 |                               wordnode('a'),
 727 |                               wordnode('b', '"b"'),
 728 |                               wordnode('c', "'c'")
 729 |                           )
 730 |                       )
 731 |                   ]),
 732 |                   wordnode("$(a \"b\" 'c')", "'$(a \"b\" 'c')'")
 733 |                 ))
 734 | 
 735 |     def test_escape_not_part_of_word(self):
 736 |         s = "a \\;"
 737 |         self.assertASTEquals(s,
 738 |                 commandnode(s,
 739 |                   wordnode('a'),
 740 |                   wordnode(';', '\\;')))
 741 | 
 742 |     def test_heredoc_spec(self):
 743 |         for redirect_kind in ('<<', '<<<'):
 744 |             s = 'a %sEOF | b' % redirect_kind
 745 |             self.assertASTEquals(s,
 746 |                   pipelinenode(s,
 747 |                     commandnode('a %sEOF' % redirect_kind,
 748 |                       wordnode('a', 'a'),
 749 |                       redirectnode('%sEOF' % redirect_kind, None,
 750 |                                    redirect_kind, wordnode('EOF'))),
 751 |                     pipenode('|', '|'),
 752 |                     commandnode('b', wordnode('b', 'b'))),
 753 |                   strictmode=False)
 754 | 
 755 |         s = 'a <<-b'
 756 |         self.assertASTEquals(s,
 757 |                 commandnode(s,
 758 |                   wordnode('a', 'a'),
 759 |                   redirectnode('<<-b', None, '<<-', wordnode('b'))),
 760 |                 strictmode=False)
 761 | 
 762 |         s = 'a <<<<b'
 763 |         self.assertRaisesRegex(errors.ParsingError, "unexpected token '<'.*5", parse, s)
 764 | 
 765 |     def test_heredoc_with_actual_doc(self):
 766 |         doc = 'foo\nbar\nEOF'
 767 |         s = '''a <<EOF
 768 | %s''' % doc
 769 | 
 770 |         self.assertASTEquals(s,
 771 |                 commandnode('a <<EOF',
 772 |                   wordnode('a'),
 773 |                   redirectnode('<<EOF\n%s' % doc, None, '<<', wordnode('EOF'),
 774 |                       heredocnode(doc))
 775 |                 ))
 776 | 
 777 |         s = 'a <<EOF\nb'
 778 |         self.assertRaisesRegex(errors.ParsingError,
 779 |                                 "delimited by end-of-file \\(wanted 'EOF'",
 780 |                                 parse, s)
 781 | 
 782 |     def test_herestring(self):
 783 |         s = 'a <<<"b\nc"'
 784 |         self.assertASTEquals(s,
 785 |                 commandnode(s,
 786 |                   wordnode('a', 'a'),
 787 |                   redirectnode('<<<"b\nc"', None, '<<<',
 788 |                                wordnode('b\nc', '"b\nc"'))))
 789 | 
 790 |         s = 'a <<<$(b)'
 791 |         self.assertASTEquals(s,
 792 |                 commandnode(s,
 793 |                   wordnode('a', 'a'),
 794 |                   redirectnode('<<<$(b)', None, '<<<',
 795 |                     wordnode('$(b)', '$(b)', [
 796 |                       comsubnode('$(b)',
 797 |                         commandnode('b',
 798 |                           wordnode('b'))
 799 |                       )
 800 |                     ])
 801 |                   )
 802 |                 )
 803 |             )
 804 | 
 805 |     def test_for_expansion(self):
 806 |         s = 'for a in $(b)"c"; do d; done'
 807 |         self.assertASTEquals(s,
 808 |                           compoundnode(s,
 809 |                             fornode(s,
 810 |                               reservedwordnode('for', 'for'),
 811 |                               wordnode('a'),
 812 |                               reservedwordnode('in', 'in'),
 813 |                               wordnode('$(b)c', '$(b)"c"', [
 814 |                                 comsubnode('$(b)',
 815 |                                   commandnode('b', wordnode('b'))
 816 |                                 )
 817 |                               ]),
 818 |                               reservedwordnode(';', ';'),
 819 |                               reservedwordnode('do', 'do'),
 820 |                               listnode('d;',
 821 |                                 commandnode('d', wordnode('d')),
 822 |                                 operatornode(';', ';')),
 823 |                               reservedwordnode('done', 'done'),
 824 |                             ))
 825 |                           )
 826 | 
 827 |     def test_for(self):
 828 |         s = 'for a; do b; done'
 829 |         self.assertASTEquals(s,
 830 |                           compoundnode(s,
 831 |                             fornode(s,
 832 |                               reservedwordnode('for', 'for'),
 833 |                               wordnode('a'),
 834 |                               reservedwordnode(';', ';'),
 835 |                               reservedwordnode('do', 'do'),
 836 |                               listnode('b;',
 837 |                                 commandnode('b', wordnode('b')),
 838 |                                 operatornode(';', ';')),
 839 |                               reservedwordnode('done', 'done'),
 840 |                             ))
 841 |                           )
 842 | 
 843 |         s = 'for a in b c d; do b; done'
 844 |         self.assertASTEquals(s,
 845 |                           compoundnode(s,
 846 |                             fornode(s,
 847 |                               reservedwordnode('for', 'for'),
 848 |                               wordnode('a'),
 849 |                               reservedwordnode('in', 'in'),
 850 |                               wordnode('b'),
 851 |                               wordnode('c'),
 852 |                               wordnode('d'),
 853 |                               reservedwordnode(';', ';'),
 854 |                               reservedwordnode('do', 'do'),
 855 |                               listnode('b;',
 856 |                                 commandnode('b', wordnode('b')),
 857 |                                 operatornode(';', ';')),
 858 |                               reservedwordnode('done', 'done'),
 859 |                             ))
 860 |                           )
 861 | 
 862 |     def test_assignments(self):
 863 |         # assignments must appear before the first word
 864 |         s = 'a=b c e=d'
 865 |         self.assertASTEquals(s,
 866 |                              commandnode(s,
 867 |                                assignmentnode('a=b'),
 868 |                                wordnode('c'),
 869 |                                wordnode('e=d'),
 870 |                              )
 871 |                             )
 872 | 
 873 |         s = 'a=b c="d"e\'f\'g h'
 874 |         self.assertASTEquals(s,
 875 |                              commandnode(s,
 876 |                                assignmentnode('a=b'),
 877 |                                assignmentnode('c=defg', 'c="d"e\'f\'g'),
 878 |                                wordnode('h'),
 879 |                              )
 880 |                             )
 881 | 
 882 |         s = 'a=$(b) c'
 883 |         self.assertASTEquals(s,
 884 |                              commandnode(s,
 885 |                                assignmentnode('a=$(b)', 'a=$(b)', [
 886 |                                 comsubnode('$(b)',
 887 |                                   commandnode('b',
 888 |                                     wordnode('b'),
 889 |                                   )
 890 |                                 )
 891 |                                ]),
 892 |                                wordnode('c'),
 893 |                              )
 894 |                             )
 895 | 
 896 |         s = 'a="$(b) $c" d'
 897 |         self.assertASTEquals(s,
 898 |                              commandnode(s,
 899 |                                assignmentnode('a=$(b) $c', 'a="$(b) $c"', [
 900 |                                 comsubnode('$(b)',
 901 |                                   commandnode('b',
 902 |                                     wordnode('b'),
 903 |                                   )
 904 |                                 ),
 905 |                                 parameternode('c', '$c')
 906 |                                ]),
 907 |                                wordnode('d'),
 908 |                              )
 909 |                             )
 910 | 
 911 |     def test_while(self):
 912 |         s = 'while a; do b; done'
 913 |         self.assertASTEquals(s,
 914 |                           compoundnode(s,
 915 |                             whilenode(s,
 916 |                               reservedwordnode('while', 'while'),
 917 |                               listnode('a;',
 918 |                                 commandnode('a', wordnode('a')),
 919 |                                 operatornode(';', ';')),
 920 |                               reservedwordnode('do', 'do'),
 921 |                               listnode('b;',
 922 |                                 commandnode('b', wordnode('b')),
 923 |                                 operatornode(';', ';')),
 924 |                               reservedwordnode('done', 'done'),
 925 |                             ))
 926 |                           )
 927 | 
 928 |     def test_expansion_limit(self):
 929 |         '''make sure the expansion limit is working by tracking recursive
 930 |         parsing count, and also checking that the word isn't expanded'''
 931 |         counter = [0]
 932 |         class countingparser(parser._parser):
 933 |             def __init__(self, *args, **kwargs):
 934 |                 super(countingparser, self).__init__(*args, **kwargs)
 935 |                 counter[0] += 1
 936 | 
 937 |         old = parser._parser
 938 |         parser._parser = countingparser
 939 | 
 940 |         try:
 941 |             s = 'a $(b $(c $(d $(e))))'
 942 |             self.assertASTEquals(s,
 943 |                 commandnode(s,
 944 |                   wordnode('a'),
 945 |                   wordnode('$(b $(c $(d $(e))))', '$(b $(c $(d $(e))))', [
 946 |                     comsubnode('$(b $(c $(d $(e))))',
 947 |                       commandnode('b $(c $(d $(e)))',
 948 |                         wordnode('b'),
 949 |                         wordnode('$(c $(d $(e)))')
 950 |                       )
 951 |                     )
 952 |                   ])
 953 |                 ),
 954 |                 expansionlimit=1
 955 |             )
 956 | 
 957 |             self.assertEqual(counter[0], 3)
 958 |         finally:
 959 |             parser._parser = old
 960 | 
 961 |         s = 'a $(b $(c))'
 962 |         for i in [None] + list(range(2, 5)):
 963 |             self.assertASTEquals(s,
 964 |                 commandnode(s,
 965 |                   wordnode('a'),
 966 |                   wordnode('$(b $(c))', '$(b $(c))', [
 967 |                     comsubnode('$(b $(c))',
 968 |                       commandnode('b $(c)',
 969 |                         wordnode('b'),
 970 |                         wordnode('$(c)', '$(c)', [
 971 |                           comsubnode('$(c)',
 972 |                             commandnode('c',
 973 |                               wordnode('c')
 974 |                             )
 975 |                           )
 976 |                         ])
 977 |                       )
 978 |                     )
 979 |                   ])
 980 |                 ),
 981 |                 expansionlimit=i
 982 |             )
 983 | 
 984 |     def test_expansion_limit_word(self):
 985 |         s = 'a "$(b)"c" $1"'
 986 | 
 987 |         self.assertASTEquals(s,
 988 |             commandnode(s,
 989 |               wordnode('a'),
 990 |               wordnode('$(b)c $1', '"$(b)"c" $1"', [
 991 |                 comsubnode('$(b)',
 992 |                   commandnode('b',
 993 |                     wordnode('b'),
 994 |                   )
 995 |                 ),
 996 |                 parameternode('1', '$1'),
 997 |               ])
 998 |             ),
 999 |         )
1000 | 
1001 |         self.assertASTEquals(s,
1002 |             commandnode(s,
1003 |               wordnode('a'),
1004 |               wordnode('$(b)c $1', '"$(b)"c" $1"', [
1005 |                 parameternode('1', '$1'),
1006 |               ])
1007 |             ),
1008 |             expansionlimit=0
1009 |         )
1010 | 
1011 |     def test_command_arithmetic(self):
1012 |         self.assertRaisesRegex(NotImplementedError, 'arithmetic expansion',
1013 |                                 parse, 'a "$((2 + 2))"')
1014 | 
1015 |     def test_function_no_function_keyword(self):
1016 |         s = 'a() { b; }'
1017 |         name = wordnode('a')
1018 |         body = compoundnode('{ b; }',
1019 |                  reservedwordnode('{', '{'),
1020 |                  listnode('b;',
1021 |                    commandnode('b', wordnode('b')),
1022 |                    operatornode(';', ';'),
1023 |                  ),
1024 |                  reservedwordnode('}', '}'),
1025 |                )
1026 | 
1027 |         self.assertASTEquals(s,
1028 |                               functionnode(s, name, body,
1029 |                                 name,
1030 |                                 reservedwordnode('(', '('),
1031 |                                 reservedwordnode(')', ')'),
1032 |                                 body
1033 |                               )
1034 |                             )
1035 | 
1036 |     def test_function_with_keyword(self):
1037 |         s = 'function a() { b; }'
1038 |         name = wordnode('a')
1039 |         body = compoundnode('{ b; }',
1040 |                  reservedwordnode('{', '{'),
1041 |                  listnode('b;',
1042 |                    commandnode('b', wordnode('b')),
1043 |                    operatornode(';', ';'),
1044 |                  ),
1045 |                  reservedwordnode('}', '}'),
1046 |                )
1047 | 
1048 |         self.assertASTEquals(s,
1049 |                               functionnode(s, name, body,
1050 |                                 reservedwordnode('function', 'function'),
1051 |                                 name,
1052 |                                 reservedwordnode('(', '('),
1053 |                                 reservedwordnode(')', ')'),
1054 |                                 body
1055 |                               )
1056 |                             )
1057 | 
1058 |     def test_function_parenthesis_optional(self):
1059 |         s = 'function a { b; }'
1060 |         name = wordnode('a')
1061 |         body = compoundnode('{ b; }',
1062 |                  reservedwordnode('{', '{'),
1063 |                  listnode('b;',
1064 |                    commandnode('b', wordnode('b')),
1065 |                    operatornode(';', ';'),
1066 |                  ),
1067 |                  reservedwordnode('}', '}'),
1068 |                )
1069 |         self.assertASTEquals(s,
1070 |                               functionnode(s, name, body,
1071 |                                 reservedwordnode('function', 'function'),
1072 |                                 name,
1073 |                                 body
1074 |                               )
1075 |                             )
1076 | 
1077 |         s = 'a { b; }'
1078 |         self.assertRaisesRegex(errors.ParsingError, "unexpected token '}'.*7",
1079 |                                 parse, s)
1080 | 
1081 |     def test_command_substitution_dollar_semicolon(self):
1082 |         s = '$(a;b)'
1083 |         self.assertASTEquals(s,
1084 |                           commandnode('$(a;b)',
1085 |                           wordnode('$(a;b)', '$(a;b)', [
1086 |                             comsubnode('$(a;b)',
1087 |                               listnode('a;b',
1088 |                                   commandnode('a', wordnode('a'),),
1089 |                                   operatornode(';', ';'),
1090 |                                   commandnode('b', wordnode('b'),),
1091 |                                 )),
1092 |                           ]),
1093 |                         )
1094 |                         )
1095 | 
1096 |     def test_parameter_braces(self):
1097 |         return
1098 | 
1099 |         # FIXME
1100 |         s = 'a ${b\\}c}'
1101 | 
1102 |         self.assertASTEquals(s,
1103 |             commandnode(s,
1104 |               wordnode('a'),
1105 |               wordnode('$(b)c $1', '"$(b)"c" $1"', [
1106 |                 comsubnode('$(b)',
1107 |                   commandnode('b',
1108 |                     wordnode('b'),
1109 |                   )
1110 |                 ),
1111 |                 parameternode('1', '$1'),
1112 |               ])
1113 |             ),
1114 |         )
1115 | 
1116 |     def test_backslash_newline(self):
1117 |       s = """for hook in \\
1118 |         /etc/* \\
1119 |         /lib/* \\
1120 |         /asdf/*
1121 |       do
1122 |         echo hook
1123 |       done"""
1124 |       self.assertASTEquals(s,
1125 |                   compoundnode(s, fornode(s,
1126 |                     reservedwordnode('for', 'for'),
1127 |                     wordnode('hook','hook'),
1128 |                     reservedwordnode('in','in'),
1129 |                     wordnode('/etc/*','/etc/*'),
1130 |                     wordnode('/lib/*','/lib/*'),
1131 |                     wordnode('/asdf/*','/asdf/*'),
1132 |                     reservedwordnode('do', 'do'),
1133 |                     commandnode('echo hook',
1134 |                                 wordnode('echo'),
1135 |                                 wordnode('hook')),
1136 |                     reservedwordnode('done', 'done'),
1137 |                                           )))
1138 | 
1139 |     def test_ending_newlines(self):
1140 |       s = 'echo hello world\n\n\n'
1141 |       self.assertASTEquals(s, commandnode(s.strip(),
1142 |                                                   wordnode('echo'),
1143 |                                                   wordnode('hello'),
1144 |                                                   wordnode('world')))
1145 | 
1146 |     def test_case_simplest(self):
1147 |       s = 'case ${1} in esac'
1148 |       self.assertASTEquals(s,
1149 |         compoundnode(s,
1150 |           casenode(s,
1151 |             reservedwordnode('case', 'case'),
1152 |             wordnode('${1}', '${1}', [
1153 |                 parameternode('1', '${1}'),
1154 |             ]),
1155 |             reservedwordnode('in', 'in'),
1156 |             reservedwordnode('esac', 'esac'),
1157 |       )))
1158 | 
1159 |     def test_case_clause(self):
1160 |       s = 'case ${1} in (pattern) echo pattern; esac'
1161 |       self.assertASTEquals(s,
1162 |         compoundnode(s,
1163 |           casenode(s,
1164 |             reservedwordnode('case', 'case'),
1165 |             wordnode('${1}', '${1}', [
1166 |                 parameternode('1', '${1}'),
1167 |             ]),
1168 |             reservedwordnode('in', 'in'),
1169 |             compoundnode('(pattern) echo pattern;',
1170 |               reservedwordnode('(', '('),
1171 |               patternnode('pattern',
1172 |                 wordnode('pattern', 'pattern'),
1173 |               ),
1174 |               reservedwordnode(')', ')'),
1175 |               listnode('echo pattern;',
1176 |                 commandnode('echo pattern',
1177 |                   wordnode('echo', 'echo'),
1178 |                   wordnode('pattern', 'pattern'),
1179 |                 ),
1180 |                 operatornode(';', ';'),
1181 |               ),
1182 |             ),
1183 |             reservedwordnode('esac', 'esac'),
1184 |       )))
1185 | 
1186 |     def test_case_clause_sequence(self):
1187 |       s = """case ${1} in
1188 |         pattern1) echo pattern1;;
1189 |         pattern2|pattern3)
1190 |           echo pattern 2; echo pattern 3
1191 |           ;;
1192 |         *);;
1193 |       esac"""
1194 |       self.assertASTEquals(s,
1195 |         compoundnode(s,
1196 |           casenode(s,
1197 |             reservedwordnode('case', 'case'),
1198 |             wordnode('${1}', '${1}', [
1199 |               parameternode('1', '${1}'),
1200 |             ]),
1201 |             reservedwordnode('in', 'in'),
1202 |             compoundnode('pattern1) echo pattern1',
1203 |               patternnode('pattern1', wordnode('pattern1', 'pattern1')),
1204 |               reservedwordnode(')', ')'),
1205 |               commandnode('echo pattern1',
1206 |                 wordnode('echo', 'echo'),
1207 |                 wordnode('pattern1', 'pattern1'),
1208 |               ),
1209 |             ),
1210 |             reservedwordnode(';;', ';;'),
1211 |             compoundnode('pattern2|pattern3)\n          echo pattern 2; echo pattern 3\n',
1212 |               patternnode('pattern2|pattern3',
1213 |                 wordnode('pattern2', 'pattern2'),
1214 |                 reservedwordnode('|', '|'),
1215 |                 wordnode('pattern3', 'pattern3'),
1216 |               ),
1217 |               reservedwordnode(')', ')'),
1218 |               listnode('echo pattern 2; echo pattern 3\n',
1219 |                 commandnode('echo pattern 2',
1220 |                   wordnode('echo', 'echo'),
1221 |                   wordnode('pattern', 'pattern'),
1222 |                   wordnode('2', '2')
1223 |                 ),
1224 |                 operatornode(';', ';'),
1225 |                 commandnode('echo pattern 3',
1226 |                   wordnode('echo', 'echo'),
1227 |                   wordnode('pattern', 'pattern'),
1228 |                   wordnode('3', '3')
1229 |                 ),
1230 |                 operatornode('\n', '\n'),
1231 |               ),
1232 |             ),
1233 |             reservedwordnode(';;', ';;'),
1234 |             compoundnode('*)',
1235 |               patternnode('*', wordnode('*', '*')),
1236 |               reservedwordnode(')', ')'),
1237 |             ),
1238 |             reservedwordnode(';;', ';;'),
1239 |             reservedwordnode('esac', 'esac'),
1240 |           )
1241 |         )
1242 |       )
1243 | 
1244 |     def test_case_default_without_semicolon(self):
1245 |       s = """case ${1} in
1246 |         pattern1) echo pattern1;;
1247 |         *) echo pattern2
1248 |       esac"""
1249 |       self.assertASTEquals(s,
1250 |         compoundnode(s,
1251 |           casenode(s,
1252 |             reservedwordnode('case', 'case'),
1253 |             wordnode('${1}', '${1}', [
1254 |               parameternode('1', '${1}'),
1255 |             ]),
1256 |             reservedwordnode('in', 'in'),
1257 |             compoundnode('pattern1) echo pattern1',
1258 |               patternnode('pattern1', wordnode('pattern1', 'pattern1')),
1259 |               reservedwordnode(')', ')'),
1260 |               commandnode('echo pattern1',
1261 |                 wordnode('echo', 'echo'),
1262 |                 wordnode('pattern1', 'pattern1'),
1263 |               ),
1264 |             ),
1265 |             reservedwordnode(';;', ';;'),
1266 |             compoundnode('*) echo pattern2',
1267 |               patternnode('*', wordnode('*', '*')),
1268 |               reservedwordnode(')', ')'),
1269 |               commandnode('echo pattern2',
1270 |                 wordnode('echo', 'echo'),
1271 |                 wordnode('pattern2', 'pattern2'),
1272 |               ),
1273 |             ),
1274 |             reservedwordnode('esac', 'esac'),
1275 |           )
1276 |         )
1277 |       )
1278 | 
1279 |     def test_unimplemented(self):
1280 |       s = 'coproc echo'
1281 |       self.assertASTEquals(s,
1282 |               unimplementednode(s,
1283 |                   reservedwordnode('coproc', 'coproc'),
1284 |                   wordnode('echo', 'echo')),
1285 |               proceedonerror=True)
1286 |       with self.assertRaises(NotImplementedError):
1287 |           parse(s, proceedonerror=False)
1288 | 


--------------------------------------------------------------------------------