├── MANIFEST.in ├── requirements.txt ├── setup.cfg ├── bashlex ├── state.py ├── __init__.py ├── errors.py ├── shutils.py ├── heredoc.py ├── utils.py ├── flags.py ├── ast.py ├── subst.py ├── parser.py └── tokenizer.py ├── pyproject.toml ├── .gitignore ├── Makefile ├── setup.py ├── .github └── workflows │ └── test.yml ├── examples └── commandsubstitution-remover.py ├── README.md ├── tests ├── test_tokenizer.py └── test_parser.py └── LICENSE /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | enum34; python_version < "3.4" 2 | build 3 | twine 4 | pytest 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [tool:pytest] 5 | addopts = --doctest-modules -ra 6 | -------------------------------------------------------------------------------- /bashlex/state.py: -------------------------------------------------------------------------------- 1 | from bashlex import flags, utils 2 | 3 | parserstate = lambda: utils.typedset(flags.parser) 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | 7 | build-backend = "setuptools.build_meta" 8 | -------------------------------------------------------------------------------- /bashlex/__init__.py: -------------------------------------------------------------------------------- 1 | from bashlex import parser, tokenizer 2 | 3 | parse = parser.parse 4 | parsesingle = parser.parsesingle 5 | split = parser.split 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | .coverage 4 | .vagrant 5 | bashlex/parser.out 6 | bashlex/parsetab.py 7 | 8 | build/ 9 | dist/ 10 | bashlex.egg-info/ 11 | *env*/ 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | tests: 2 | @python -c "import pytest" >/dev/null 2>&1 || (echo "error: pytest missing, run 'pip install pytest'\n" && false) 3 | python -m pytest 4 | 5 | .PHONY: tests 6 | -------------------------------------------------------------------------------- /bashlex/errors.py: -------------------------------------------------------------------------------- 1 | class ParsingError(Exception): 2 | def __init__(self, message, s, position): 3 | self.message = message 4 | self.s = s 5 | self.position = position 6 | 7 | assert position <= len(s) 8 | super(ParsingError, self).__init__('%s (position %d)' % (message, position)) 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | name='bashlex', 6 | version='0.18', 7 | url='https://github.com/idank/bashlex.git', 8 | license='GPLv3+', 9 | author='Idan Kamara', 10 | author_email='i@idank.me', 11 | description='Python parser for bash', 12 | long_description='''bashlex is a Python port of the parser used internally by GNU bash. 13 | 14 | For the most part it's transliterated from C, the major differences are: 15 | 16 | 1. it does not execute anything 17 | 2. it is reentrant 18 | 3. it generates a complete AST 19 | 20 | See https://github.com/idank/bashlex/blob/master/README.md for more info.''', 21 | classifiers=[ 22 | 'Development Status :: 4 - Beta', 23 | 'Environment :: Console', 24 | 'Intended Audience :: Developers', 25 | 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', 26 | 'Operating System :: OS Independent', 27 | 'Programming Language :: Python', 28 | 'Topic :: Software Development :: Libraries :: Python Modules', 29 | 'Topic :: System :: System Shells', 30 | 'Topic :: Text Processing', 31 | ], 32 | python_requires=">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4", 33 | install_requires=['enum34; python_version < "3.4"'], 34 | packages=['bashlex'], 35 | ) 36 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | env: 6 | FORCE_COLOR: 1 7 | 8 | jobs: 9 | test: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] 15 | os: [ubuntu-latest, macos-latest, windows-latest] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | 25 | - name: Get pip cache dir 26 | id: pip-cache 27 | run: | 28 | echo "::set-output name=dir::$(pip cache dir)" 29 | 30 | - name: Cache 31 | uses: actions/cache@v2 32 | with: 33 | path: ${{ steps.pip-cache.outputs.dir }} 34 | key: 35 | ${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/requirements.txt') }} 36 | restore-keys: | 37 | ${{ matrix.os }}-${{ matrix.python-version }}-v1- 38 | 39 | - name: Install dependencies 40 | run: | 41 | python -m pip install -U pip 42 | python -m pip install -U wheel 43 | python -m pip install -Ur requirements.txt 44 | python setup.py install 45 | 46 | - name: Test 47 | run: | 48 | make tests 49 | -------------------------------------------------------------------------------- /bashlex/shutils.py: -------------------------------------------------------------------------------- 1 | def single_quote(s): 2 | if s[0] == "'" and len(s) == 1: 3 | return "\\'" 4 | 5 | l = ["'"] 6 | 7 | for c in s: 8 | l.append(c) 9 | if c == "'": 10 | l.extend(["\\''"]) 11 | 12 | l.append("'") 13 | 14 | return ''.join(l) 15 | 16 | def double_quote(s): 17 | return s 18 | 19 | def legal_number(s): 20 | try: 21 | x = int(s) 22 | return True 23 | except ValueError: 24 | return False 25 | 26 | def legal_identifier(name): 27 | pass 28 | 29 | def removequotes(s, heredoc=False, doublequotes=False): 30 | r = '' 31 | sindex = 0 32 | dquote = False 33 | while sindex < len(s): 34 | c = s[sindex] 35 | if c == '\\': 36 | sindex += 1 37 | if sindex == len(s): 38 | r += '\\' 39 | return r 40 | c = s[sindex] 41 | if ((heredoc and doublequotes) or dquote) and not _shellquote(c): 42 | r += '\\' 43 | r += c 44 | elif c == "'": 45 | if (heredoc and doublequotes) or dquote: 46 | r += c 47 | sindex += 1 48 | else: 49 | t = s.find("'", sindex + 1) 50 | if t == -1: 51 | t = len(s) 52 | else: 53 | t += 1 54 | 55 | r += s[sindex + 1:t-1] 56 | sindex = t 57 | elif c == '"': 58 | dquote = not dquote 59 | sindex += 1 60 | else: 61 | r += c 62 | sindex += 1 63 | return r 64 | -------------------------------------------------------------------------------- /bashlex/heredoc.py: -------------------------------------------------------------------------------- 1 | from bashlex import ast, errors 2 | 3 | def gatherheredocuments(tokenizer): 4 | # if we're at the end of the input and we're not strict, allow skipping 5 | # reading the heredoc 6 | while tokenizer.redirstack: 7 | if tokenizer._peekc() is None and not tokenizer._strictmode: 8 | tokenizer._shell_input_line_index += 1 9 | return 10 | 11 | redirnode, killleading = tokenizer.redirstack.pop(0) 12 | makeheredoc(tokenizer, redirnode, 0, killleading) 13 | 14 | def makeheredoc(tokenizer, redirnode, lineno, killleading): 15 | # redirword = string_quote_removal(redirectnode.word) 16 | redirword = redirnode.output.word 17 | document = [] 18 | 19 | startpos = tokenizer._shell_input_line_index 20 | 21 | #fullline = self.tok.readline(bool(redirword.output.flags & flags.word.QUOTED)) 22 | fullline = tokenizer.readline(False) 23 | while fullline: 24 | if killleading: 25 | while fullline[0] == '\t': 26 | fullline = fullline[1:] 27 | 28 | if not fullline: 29 | continue 30 | 31 | if fullline[:-1] == redirword and fullline[len(redirword)] == '\n': 32 | document.append(fullline[:-1]) 33 | # document_done 34 | break 35 | 36 | document.append(fullline) 37 | #fullline = self.readline(bool(redirnode.flags & flags.word.QUOTED)) 38 | fullline = tokenizer.readline(False) 39 | 40 | if not fullline: 41 | raise errors.ParsingError("here-document at line %d delimited by end-of-file (wanted %r)" % (lineno, redirword), tokenizer._shell_input_line, tokenizer._shell_input_line_index) 42 | 43 | document = ''.join(document) 44 | endpos = tokenizer._shell_input_line_index - 1 45 | 46 | assert hasattr(redirnode, 'heredoc') 47 | redirnode.heredoc = ast.node(kind='heredoc', value=document, 48 | pos=(startpos, endpos)) 49 | 50 | # if the heredoc immediately follows this node, fix its end pos 51 | if redirnode.pos[1] + 1 == startpos: 52 | redirnode.pos = (redirnode.pos[0], endpos) 53 | 54 | return document 55 | -------------------------------------------------------------------------------- /bashlex/utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | from collections.abc import MutableSet, Mapping 3 | except ImportError: 4 | # Python 2 fallback 5 | from collections import MutableSet, Mapping 6 | 7 | 8 | class typedset(MutableSet): 9 | def __init__(self, type_, iterable=[]): 10 | self._s = set() 11 | self._type = type_ 12 | for v in iterable: 13 | self.add(v) 14 | 15 | def add(self, value): 16 | if not isinstance(value, self._type): 17 | raise ValueError('can only add items of type %s to this set' % self._type) 18 | self._s.add(value) 19 | 20 | def discard(self, value): 21 | self._s.discard(value) 22 | 23 | def __contains__(self, value): 24 | return self._s.__contains__(value) 25 | 26 | def __iter__(self): 27 | return self._s.__iter__() 28 | 29 | def __len__(self): 30 | return len(self._s) 31 | 32 | def __and__(self, value): 33 | if isinstance(value, self._type): 34 | value = set([value]) 35 | return self._s.__and__(value) 36 | 37 | def __or__(self, value): 38 | if isinstance(value, self._type): 39 | value = set([value]) 40 | return self._s.__or__(value) 41 | 42 | def __ior__(self, value): 43 | if isinstance(value, self._type): 44 | value = set([value]) 45 | self._s.__ior__(value) 46 | return self 47 | 48 | #def __sub__(self, value): 49 | # if isinstance(value, self._type): 50 | # value = set([value]) 51 | # return self._s.__sub__(value) 52 | 53 | def __repr__(self): 54 | return self._s.__repr__() 55 | 56 | class frozendict(Mapping): 57 | def __init__(self, *args, **kwargs): 58 | self.__dict = dict(*args, **kwargs) 59 | self.__hash = None 60 | 61 | def __getitem__(self, key): 62 | return self.__dict[key] 63 | 64 | def copy(self, **add_or_replace): 65 | return frozendict(self, **add_or_replace) 66 | 67 | def __iter__(self): 68 | return iter(self.__dict) 69 | 70 | def __len__(self): 71 | return len(self.__dict) 72 | 73 | def __repr__(self): 74 | return '' % repr(self.__dict) 75 | -------------------------------------------------------------------------------- /examples/commandsubstitution-remover.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | 5 | import argparse 6 | from argparse import RawTextHelpFormatter 7 | 8 | from bashlex import parser, ast 9 | 10 | class nodevisitor(ast.nodevisitor): 11 | def __init__(self, positions): 12 | self.positions = positions 13 | 14 | def visitcommandsubstitution(self, n, command): 15 | # log the start and end positions of this command substitution 16 | self.positions.append(n.pos) 17 | 18 | # do not recurse into child nodes 19 | return False 20 | 21 | desc = '''replace all occurrences of $() and `` with the string given in -s 22 | 23 | $ commandsubstitution-remover.py -s nope -c 'foo $(bar)' 24 | foo nope 25 | 26 | within words: 27 | 28 | $ commandsubstitution-remover.py -c '"foo $(bar) baz"' 29 | "foo XXX baz" 30 | 31 | but not within single quotes, since they cancel special meaning: 32 | 33 | $ commandsubstitution-remover.py -c "foo '"'$(bar)'"'" 34 | foo '$(bar)' 35 | 36 | (this a simple script to demonstrate how to traverse the ast produced 37 | by bashlex) 38 | ''' 39 | 40 | if __name__ == '__main__': 41 | argparser = argparse.ArgumentParser(description=desc, 42 | formatter_class=RawTextHelpFormatter) 43 | argparser.add_argument('-s', dest='replacement', metavar='S', default='XXX', 44 | help='replace occurrences with S (default: XXX)') 45 | 46 | group = argparser.add_mutually_exclusive_group() 47 | group.add_argument('file', metavar='file', type=file, nargs='?', 48 | help='file to parse') 49 | group.add_argument('-c', dest='expression', 50 | help='string to parse') 51 | 52 | args = argparser.parse_args() 53 | 54 | if args.expression: 55 | s = args.expression 56 | elif args.file: 57 | s = args.file.read() 58 | else: 59 | s = sys.stdin.read() 60 | 61 | trees = parser.parse(s) 62 | positions = [] 63 | for tree in trees: 64 | visitor = nodevisitor(positions) 65 | visitor.visit(tree) 66 | 67 | # do replacements from the end so the indicies will be correct 68 | positions.reverse() 69 | 70 | postprocessed = list(s) 71 | 72 | for start, end in positions: 73 | # replace the portion of the input where the substitution occurred 74 | # with the replacement string 75 | postprocessed[start:end] = args.replacement 76 | 77 | print(''.join(postprocessed)) 78 | -------------------------------------------------------------------------------- /bashlex/flags.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | parser = enum.Enum('parserflags', [ 4 | 'CASEPAT', # in a case pattern list 5 | 'ALEXPNEXT', # expand next word for aliases 6 | 'ALLOWOPNBRC', # allow open brace for function def 7 | 'NEEDCLOSBRC', # need close brace 8 | 'DBLPAREN', # double-paren parsing 9 | 'SUBSHELL', # ( ... ) subshell 10 | 'CMDSUBST', # $( ... ) command substitution 11 | 'CASESTMT', # parsing a case statement 12 | 'CONDCMD', # parsing a [[...]] command 13 | 'CONDEXPR', # parsing the guts of [[...]] 14 | 'ARITHFOR', # parsing an arithmetic for command - unused 15 | 'ALEXPAND', # OK to expand aliases - unused 16 | 'EXTPAT', # parsing an extended shell pattern 17 | 'COMPASSIGN', # parsing x=(...) compound assignment 18 | 'ASSIGNOK', # assignment statement ok in this context 19 | 'EOFTOKEN', # yylex checks against shell_eof_token 20 | 'REGEXP', # parsing an ERE/BRE as a single word 21 | 'HEREDOC', # reading body of here-document 22 | 'REPARSE', # re-parsing in parse_string_to_word_list 23 | 'REDIRLIST', # parsing a list of redirections preceding a simple command name 24 | ]) 25 | 26 | word = enum.Enum('wordflags', [ 27 | 'HASDOLLAR', # Dollar sign present 28 | 'QUOTED', # Some form of quote character is present 29 | 'ASSIGNMENT', # This word is a variable assignment 30 | 'SPLITSPACE', # Split this word on " " regardless of IFS 31 | 'NOSPLIT', # Do not perform word splitting on this word because ifs is empty string 32 | 'NOGLOB', # Do not perform globbing on this word 33 | 'NOSPLIT2', # Don't split word except for $@ expansion (using spaces) because context does not allow it 34 | 'TILDEEXP', # Tilde expand this assignment word 35 | 'DOLLARAT', # $@ and its special handling 36 | 'DOLLARSTAR', # $* and its special handling 37 | 'NOCOMSUB', # Don't perform command substitution on this word 38 | 'ASSIGNRHS', # Word is rhs of an assignment statement 39 | 'NOTILDE', # Don't perform tilde expansion on this word 40 | 'ITILDE', # Internal flag for word expansion 41 | 'NOEXPAND', # Don't expand at all -- do quote removal 42 | 'COMPASSIGN', # Compound assignment 43 | 'ASSNBLTIN', # word is a builtin command that takes assignments 44 | 'ASSIGNARG', # word is assignment argument to command 45 | 'HASQUOTEDNULL', # word contains a quoted null character 46 | 'DQUOTE', # word should be treated as if double-quoted 47 | 'NOPROCSUB', # don't perform process substitution 48 | 'HASCTLESC', # word contains literal CTLESC characters 49 | 'ASSIGNASSOC', # word looks like associative array assignment 50 | 'ASSIGNARRAY', # word looks like a compound indexed array assignment 51 | 'ARRAYIND', # word is an array index being expanded 52 | 'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g) 53 | 'NOBRACE', # Don't perform brace expansion 54 | 'ASSIGNINT', # word is an integer assignment to declare 55 | ]) 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bashlex - Python parser for bash 2 | 3 | [![GitHub Actions status](https://github.com/idank/bashlex/workflows/Test/badge.svg)](https://github.com/idank/bashlex/actions) 4 | 5 | bashlex is a Python port of the parser used internally by GNU bash. 6 | 7 | For the most part it's transliterated from C, the major differences are: 8 | 9 | 1. it does not execute anything 10 | 2. it is reentrant 11 | 3. it generates a complete AST 12 | 13 | ## Installation: 14 | 15 | $ pip install bashlex 16 | 17 | ## Usage 18 | 19 | $ python 20 | >>> import bashlex 21 | >>> parts = bashlex.parse('true && cat <(echo $(echo foo))') 22 | >>> for ast in parts: 23 | ... print ast.dump() 24 | ListNode(pos=(0, 31), parts=[ 25 | CommandNode(pos=(0, 4), parts=[ 26 | WordNode(pos=(0, 4), word='true'), 27 | ]), 28 | OperatorNode(op='&&', pos=(5, 7)), 29 | CommandNode(pos=(8, 31), parts=[ 30 | WordNode(pos=(8, 11), word='cat'), 31 | WordNode(pos=(12, 31), word='<(echo $(echo foo))', parts=[ 32 | ProcesssubstitutionNode(command= 33 | CommandNode(pos=(14, 30), parts=[ 34 | WordNode(pos=(14, 18), word='echo'), 35 | WordNode(pos=(19, 30), word='$(echo foo)', parts=[ 36 | CommandsubstitutionNode(command= 37 | CommandNode(pos=(21, 29), parts=[ 38 | WordNode(pos=(21, 25), word='echo'), 39 | WordNode(pos=(26, 29), word='foo'), 40 | ]), pos=(19, 30)), 41 | ]), 42 | ]), pos=(12, 31)), 43 | ]), 44 | ]), 45 | ]) 46 | 47 | It is also possible to only use the tokenizer and get similar behaviour to 48 | shlex.split, but bashlex understands more complex constructs such as command 49 | and process substitutions: 50 | 51 | >>> list(bashlex.split('cat <(echo "a $(echo b)") | tee')) 52 | ['cat', '<(echo "a $(echo b)")', '|', 'tee'] 53 | 54 | ..compared to shlex: 55 | 56 | >>> shlex.split('cat <(echo "a $(echo b)") | tee') 57 | ['cat', '<(echo', 'a $(echo b))', '|', 'tee'] 58 | 59 | The examples/ directory contains a sample script that demonstrate how to 60 | traverse the ast to do more complicated things. 61 | 62 | ## Limitations 63 | 64 | Currently the parser has no support for: 65 | 66 | - arithmetic expressions $((..)) 67 | - the more complicated parameter expansions such as ${parameter#word} are taken 68 | literally and do not produce child nodes 69 | 70 | ## Debugging 71 | 72 | It can be useful to debug bashlex in conjunction to GNU bash, since it's mostly 73 | a transliteration. Comments in the code sometimes contain line references to 74 | bash's source code, e.g. `# bash/parse.y L2626`. 75 | 76 | $ git clone git://git.sv.gnu.org/bash.git 77 | $ cd bash 78 | $ git checkout df2c55de9c87c2ee8904280d26e80f5c48dd6434 # commit used in 79 | translating the code 80 | $ ./configure 81 | $ make CFLAGS=-g CFLAGS_FOR_BUILD=-g # debug info and don't optimize 82 | $ gdb --args ./bash -c 'echo foo' 83 | 84 | Useful things to look at when debugging bash: 85 | 86 | - variables yylval, shell_input_line, shell_input_line_index 87 | - breakpoint at `yylex` (token numbers to names is in file parser-built) 88 | - breakpoint at `read_token_word` (corresponds to `bashlex/tokenizer._readtokenword`) 89 | - `xparse_dolparen, expand_word_internal` (called when parsing $()) 90 | 91 | ## Motivation 92 | 93 | I wrote this library for another project of mine, [explainshell](http://www.explainshell.com) 94 | which needed a new parsing backend to support complex constructs such as 95 | process/command substitutions. 96 | 97 | ## Releasing a new version 98 | 99 | Suggestion for making a release environment: 100 | 101 | ```bash 102 | python3 -m venv venv 103 | source venv/bin/activate 104 | pip install -r requirements.txt 105 | ``` 106 | 107 | - `make tests` 108 | - bump version in `setup.py` 109 | - git tag the new commit 110 | - run `python -m build` 111 | - run twine upload dist/* 112 | 113 | ## License 114 | 115 | The license for this is the same as that used by GNU bash, GNU GPL v3+. 116 | -------------------------------------------------------------------------------- /bashlex/ast.py: -------------------------------------------------------------------------------- 1 | class node(object): 2 | """ 3 | This class represents a node in the AST built while parsing command lines. 4 | It's basically an object container for various attributes, with a slightly 5 | specialised representation to make it a little easier to debug the parser. 6 | """ 7 | 8 | def __init__(self, **kwargs): 9 | assert 'kind' in kwargs 10 | self.__dict__.update(kwargs) 11 | 12 | def dump(self, indent=' '): 13 | return _dump(self, indent) 14 | 15 | def __repr__(self): 16 | chunks = [] 17 | d = dict(self.__dict__) 18 | kind = d.pop('kind') 19 | for k, v in sorted(d.items()): 20 | chunks.append('%s=%r' % (k, v)) 21 | return '%sNode(%s)' % (kind.title(), ' '.join(chunks)) 22 | 23 | def __eq__(self, other): 24 | if not isinstance(other, node): 25 | return False 26 | return self.__dict__ == other.__dict__ 27 | 28 | def __hash__(self): 29 | return hash(tuple(sorted(self.__dict__))) 30 | 31 | class nodevisitor(object): 32 | def _visitnode(self, n, *args, **kwargs): 33 | k = n.kind 34 | self.visitnode(n) 35 | return getattr(self, 'visit%s' % k)(n, *args, **kwargs) 36 | 37 | def visit(self, n): 38 | k = n.kind 39 | if k == 'operator': 40 | self._visitnode(n, n.op) 41 | elif k == 'list': 42 | dochild = self._visitnode(n, n.parts) 43 | if dochild is None or dochild: 44 | for child in n.parts: 45 | self.visit(child) 46 | elif k == 'reservedword': 47 | self._visitnode(n, n.word) 48 | elif k == 'pipe': 49 | self._visitnode(n, n.pipe) 50 | elif k == 'pipeline': 51 | dochild = self._visitnode(n, n.parts) 52 | if dochild is None or dochild: 53 | for child in n.parts: 54 | self.visit(child) 55 | elif k == 'compound': 56 | dochild = self._visitnode(n, n.list, n.redirects) 57 | if dochild is None or dochild: 58 | for child in n.list: 59 | self.visit(child) 60 | for child in n.redirects: 61 | self.visit(child) 62 | elif k in ('if', 'for', 'while', 'until', 'case', 'pattern'): 63 | dochild = self._visitnode(n, n.parts) 64 | if dochild is None or dochild: 65 | for child in n.parts: 66 | self.visit(child) 67 | elif k == 'command': 68 | dochild = self._visitnode(n, n.parts) 69 | if dochild is None or dochild: 70 | for child in n.parts: 71 | self.visit(child) 72 | elif k == 'function': 73 | dochild = self._visitnode(n, n.name, n.body, n.parts) 74 | if dochild is None or dochild: 75 | for child in n.parts: 76 | self.visit(child) 77 | elif k == 'redirect': 78 | dochild = self._visitnode(n, n.input, n.type, n.output, n.heredoc) 79 | if dochild is None or dochild: 80 | if isinstance(n.output, node): 81 | self.visit(n.output) 82 | if n.heredoc: 83 | self.visit(n.heredoc) 84 | elif k in ('word', 'assignment'): 85 | dochild = self._visitnode(n, n.word) 86 | if dochild is None or dochild: 87 | for child in n.parts: 88 | self.visit(child) 89 | elif k in ('parameter', 'tilde', 'heredoc'): 90 | self._visitnode(n, n.value) 91 | elif k in ('commandsubstitution', 'processsubstitution'): 92 | dochild = self._visitnode(n, n.command) 93 | if dochild is None or dochild: 94 | self.visit(n.command) 95 | elif k == 'unimplemented': 96 | dochild = self._visitnode(n, n.parts) 97 | if dochild is None or dochild: 98 | for child in n.parts: 99 | self.visit(child) 100 | else: 101 | raise ValueError('unknown node kind %r' % k) 102 | self.visitnodeend(n) 103 | 104 | def visitnode(self, n): 105 | pass 106 | def visitnodeend(self, n): 107 | pass 108 | def visitoperator(self, n, op): 109 | pass 110 | def visitlist(self, n, parts): 111 | pass 112 | def visitpipe(self, n, pipe): 113 | pass 114 | def visitpipeline(self, n, parts): 115 | pass 116 | def visitcompound(self, n, list, redirects): 117 | pass 118 | def visitif(self, node, parts): 119 | pass 120 | def visitfor(self, node, parts): 121 | pass 122 | def visitwhile(self, node, parts): 123 | pass 124 | def visituntil(self, node, parts): 125 | pass 126 | def visitcommand(self, n, parts): 127 | pass 128 | def visitfunction(self, n, name, body, parts): 129 | pass 130 | def visitword(self, n, word): 131 | pass 132 | def visitassignment(self, n, word): 133 | pass 134 | def visitreservedword(self, n, word): 135 | pass 136 | def visitparameter(self, n, value): 137 | pass 138 | def visittilde(self, n, value): 139 | pass 140 | def visitredirect(self, n, input, type, output, heredoc): 141 | pass 142 | def visitheredoc(self, n, value): 143 | pass 144 | def visitprocesssubstitution(self, n, command): 145 | pass 146 | def visitcommandsubstitution(self, n, command): 147 | pass 148 | def visitcase(self, node, parts): 149 | pass 150 | def visitpattern(self, node, parts): 151 | pass 152 | def visitunimplemented(self, node, parts): 153 | pass 154 | 155 | 156 | def _dump(tree, indent=' '): 157 | def _format(n, level=0): 158 | if isinstance(n, node): 159 | d = dict(n.__dict__) 160 | kind = d.pop('kind') 161 | if kind == 'list' and level > 0: 162 | level = level + 1 163 | fields = [] 164 | v = d.pop('s', None) 165 | if v: 166 | fields.append(('s', _format(v, level))) 167 | for k, v in sorted(d.items()): 168 | if not v or k == 'parts': 169 | continue 170 | llevel = level 171 | if isinstance(v, node): 172 | llevel += 1 173 | fields.append((k, '\n' + (indent * llevel) + _format(v, llevel))) 174 | else: 175 | fields.append((k, _format(v, level))) 176 | if kind == 'function': 177 | fields = [f for f in fields if f[0] not in ('name', 'body')] 178 | v = d.pop('parts', None) 179 | if v: 180 | fields.append(('parts', _format(v, level))) 181 | return ''.join([ 182 | '%sNode' % kind.title(), 183 | '(', 184 | ', '.join(('%s=%s' % field for field in fields)), 185 | ')']) 186 | elif isinstance(n, list): 187 | lines = ['['] 188 | lines.extend((indent * (level + 1) + _format(x, level + 1) + ',' 189 | for x in n)) 190 | if len(lines) > 1: 191 | lines.append(indent * (level) + ']') 192 | else: 193 | lines[-1] += ']' 194 | return '\n'.join(lines) 195 | return repr(n) 196 | 197 | if not isinstance(tree, node): 198 | raise TypeError('expected node, got %r' % tree.__class__.__name__) 199 | return _format(tree) 200 | 201 | def findfirstkind(parts, kind): 202 | for i, node in enumerate(parts): 203 | if node.kind == kind: 204 | return i 205 | return -1 206 | 207 | class posconverter(nodevisitor): 208 | def __init__(self, string): 209 | self.string = string 210 | 211 | def visitnode(self, node): 212 | assert hasattr(node, 'pos'), 'node %r is missing pos attr' % node 213 | start, end = node.__dict__.pop('pos') 214 | node.s = self.string[start:end] 215 | 216 | class posshifter(nodevisitor): 217 | def __init__(self, count): 218 | self.count = count 219 | 220 | def visitnode(self, node): 221 | #assert node.pos[1] + base <= endlimit 222 | node.pos = (node.pos[0] + self.count, node.pos[1] + self.count) 223 | -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from bashlex import tokenizer, state, flags, errors 4 | 5 | from bashlex.tokenizer import token as t 6 | from bashlex.tokenizer import tokentype as tt 7 | 8 | tokenize = lambda s: list(tokenizer.tokenizer(s, state.parserstate())) 9 | 10 | hasdollarset = set([flags.word.HASDOLLAR]) 11 | 12 | class test_tokenizer(unittest.TestCase): 13 | 14 | def setUp(self): 15 | if not hasattr(self, 'assertRaisesRegex'): 16 | self.assertRaisesRegex = self.assertRaisesRegexp 17 | 18 | def assertTokens(self, s, tokens): 19 | result = tokenize(s) 20 | 21 | # pop the last token if it's a new line since that gets appended 22 | # to the input string by default and we don't really care about 23 | # that here 24 | if result[-1].value == '\n': 25 | result.pop() 26 | 27 | self.assertEqual(result, tokens) 28 | 29 | for t in tokens: 30 | self.assertEqual(str(t.value), s[t.lexpos:t.endlexpos]) 31 | 32 | def test_empty_string(self): 33 | self.assertEqual(len(tokenize('')), 0) 34 | 35 | def test_simple(self): 36 | s = 'a b' 37 | self.assertTokens(s, [ 38 | t(tt.WORD, 'a', [0, 1]), 39 | t(tt.WORD, 'b', [2, 3])]) 40 | 41 | def test_meta(self): 42 | s = '!&()<>;&;;&;; |<<-<< <<<>>&&||<&>&<>>|&> &>>|&' 43 | self.assertTokens(s, [ 44 | t(tt.BANG, '!', [0, 1]), 45 | t(tt.AMPERSAND, '&', [1, 2]), 46 | t(tt.LEFT_PAREN, '(', [2, 3]), 47 | t(tt.RIGHT_PAREN, ')', [3, 4]), 48 | t(tt.LESS_GREATER, '<>', [4, 6]), 49 | t(tt.SEMI_AND, ';&', [6, 8]), 50 | t(tt.SEMI_SEMI_AND, ';;&', [8, 11]), 51 | t(tt.SEMI_SEMI, ';;', [11, 13]), 52 | t(tt.BAR, '|', [14, 15]), 53 | t(tt.LESS_LESS_MINUS, '<<-', [15, 18]), 54 | t(tt.LESS_LESS, '<<', [18, 20]), 55 | t(tt.LESS_LESS_LESS, '<<<', [21, 24]), 56 | t(tt.GREATER_GREATER, '>>', [24, 26]), 57 | t(tt.AND_AND, '&&', [26, 28]), 58 | t(tt.OR_OR, '||', [28, 30]), 59 | t(tt.LESS_AND, '<&', [30, 32]), 60 | t(tt.GREATER_AND, '>&', [32, 34]), 61 | t(tt.LESS_GREATER, '<>', [34, 36]), 62 | t(tt.GREATER_BAR, '>|', [36, 38]), 63 | t(tt.AND_GREATER, '&>', [38, 40]), 64 | t(tt.AND_GREATER_GREATER, '&>>', [41, 44]), 65 | t(tt.BAR_AND, '|&', [44, 46])]) 66 | 67 | s = '<&-' 68 | self.assertTokens(s, [ 69 | t(tt.LESS_AND, '<&', [0, 2]), 70 | t(tt.DASH, '-', [2, 3])]) 71 | 72 | def test_comment(self): 73 | s = '|# foo bar\n' 74 | self.assertTokens(s, [ 75 | t(tt.BAR, '|', [0, 1])]) 76 | 77 | def test_shellquote(self): 78 | s = '"foo"' 79 | self.assertTokens(s, [ 80 | t(tt.WORD, '"foo"', [0, 5], set([flags.word.QUOTED]))]) 81 | 82 | s = '"foo"bar\'baz\'' 83 | self.assertTokens(s, [ 84 | t(tt.WORD, s, [0, len(s)], set([flags.word.QUOTED]))]) 85 | 86 | self.assertRaises(tokenizer.MatchedPairError, 87 | tokenize, 88 | "'a") 89 | 90 | def test_shellexp(self): 91 | s = '<(foo) bar $(baz) ${a}' 92 | self.assertTokens(s, [ 93 | t(tt.WORD, '<(foo)', [0, 6], hasdollarset), 94 | t(tt.WORD, 'bar', [7, 10]), 95 | t(tt.WORD, '$(baz)', [11, 17], hasdollarset), 96 | t(tt.WORD, '${a}', [18, 22], hasdollarset)]) 97 | 98 | s = '$"foo" $1' 99 | self.assertTokens(s, [ 100 | t(tt.WORD, '$"foo"', [0, 6], set([flags.word.QUOTED])), 101 | t(tt.WORD, '$1', [7, 9], hasdollarset)]) 102 | 103 | def test_readtokenword(self): 104 | s = 'a\\"' 105 | self.assertTokens(s, [ 106 | t(tt.WORD, 'a\\"', [0, len(s)], set([flags.word.QUOTED]))]) 107 | 108 | def test_parameter_expansion(self): 109 | # s = 'a $"foo"' 110 | # tok = tokenizer.tokenizer(s, state.parserstate()) 111 | # self.assertEqual(list(tok), [t(tt.WORD, 'a'), 112 | # t(tt.WORD, '"foo"', flags=set([flags.word.QUOTED]))]) 113 | 114 | s = 'a $$' 115 | self.assertTokens(s, [ 116 | t(tt.WORD, 'a', [0, 1]), 117 | t(tt.WORD, '$$', [2, 4], hasdollarset)]) 118 | 119 | def test_comsub(self): 120 | s = 'a $(b)' 121 | self.assertTokens(s, [ 122 | t(tt.WORD, 'a', [0, 1]), 123 | t(tt.WORD, '$(b)', [2, 6], hasdollarset)]) 124 | 125 | s = '$("a")' 126 | self.assertTokens(s, [ 127 | t(tt.WORD, '$("a")', [0, 6], hasdollarset)]) 128 | 129 | s = "$($'a')" 130 | self.assertTokens(s, [ 131 | t(tt.WORD, "$($'a')", [0, 7], hasdollarset)]) 132 | 133 | s = '$(a $(b))' 134 | self.assertTokens(s, [ 135 | t(tt.WORD, '$(a $(b))', [0, 9], hasdollarset)]) 136 | 137 | s = '$(a ${b})' 138 | self.assertTokens(s, [ 139 | t(tt.WORD, '$(a ${b})', [0, 9], hasdollarset)]) 140 | 141 | s = '$(a $[b])' 142 | self.assertTokens(s, [ 143 | t(tt.WORD, '$(a $[b])', [0, 9], hasdollarset)]) 144 | 145 | s = '"$(a)"' 146 | self.assertTokens(s, [ 147 | t(tt.WORD, '"$(a)"', [0, 6], set([flags.word.HASDOLLAR, 148 | flags.word.QUOTED]))]) 149 | 150 | s = 'a $(! b)' 151 | self.assertTokens(s, [ 152 | t(tt.WORD, 'a', [0, 1]), 153 | t(tt.WORD, '$(! b)', [2, 8], hasdollarset)]) 154 | 155 | s = '$(!|!||)' 156 | self.assertTokens(s, [ 157 | t(tt.WORD, '$(!|!||)', [0, 8], hasdollarset)]) 158 | 159 | s = '$(a <', [1, 2])]) 307 | s = '$<$(b)' 308 | self.assertTokens(s, [ 309 | t(tt.WORD, '$', [0, 1], hasdollarset), 310 | t(tt.LESS, '<', [1, 2]), 311 | t(tt.WORD, '$(b)', [2, 6], hasdollarset)]) 312 | 313 | def test_quote_error(self): 314 | s = "a 'b" 315 | msg = "EOF.*matching \"'\" \\(position 4" 316 | self.assertRaisesRegex(errors.ParsingError, msg, tokenize, s) 317 | 318 | def test_escape_error(self): 319 | return # TODO 320 | 321 | s = "a b\\" 322 | 323 | self.assertRaisesRegex(errors.ParsingError, "No escaped character.*position 2", tokenize, s) 324 | 325 | def test_tokenize(self): 326 | s = 'bar -x' 327 | self.assertTokens(s, [ 328 | t(tt.WORD, 'bar', [0, 3]), 329 | t(tt.WORD, '-x', [4, 6])]) 330 | 331 | s = 'wx y =z ' 332 | self.assertTokens(s, [ 333 | t(tt.WORD, 'wx', [0, 2]), 334 | t(tt.WORD, 'y', [6, 7]), 335 | t(tt.WORD, '=z', [8, 10])]) 336 | 337 | s = "a 'b' c" 338 | self.assertTokens(s, [ 339 | t(tt.WORD, 'a', [0, 1]), 340 | t(tt.WORD, "'b'", [2, 5], set([flags.word.QUOTED])), 341 | t(tt.WORD, 'c', [6, 7])]) 342 | 343 | s = "a 'b ' c" 344 | self.assertTokens(s, [ 345 | t(tt.WORD, 'a', [0, 1]), 346 | t(tt.WORD, "'b '", [2, 7], set([flags.word.QUOTED])), 347 | t(tt.WORD, 'c', [8, 9])]) 348 | 349 | def test_escaped_newline(self): 350 | s= """a \\\nb""" 351 | self.assertTokens(s, [ 352 | t(tt.WORD, 'a', [0, 1]), 353 | t(tt.WORD, 'b', [4, 5]) 354 | ]) 355 | -------------------------------------------------------------------------------- /bashlex/subst.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from bashlex import ast, flags, tokenizer, errors 4 | 5 | def _recursiveparse(parserobj, base, sindex, tokenizerargs=None): 6 | # TODO: fix this hack that prevents mutual import 7 | from bashlex import parser 8 | 9 | tok = parserobj.tok 10 | 11 | if tokenizerargs is None: 12 | tokenizerargs = {'parserstate' : copy.copy(tok._parserstate), 13 | 'lastreadtoken' : tok._last_read_token, 14 | 'tokenbeforethat' : tok._token_before_that, 15 | 'twotokensago' : tok._two_tokens_ago} 16 | 17 | string = base[sindex:] 18 | newlimit = parserobj._expansionlimit 19 | if newlimit is not None: 20 | newlimit -= 1 21 | p = parser._parser(string, tokenizerargs=tokenizerargs, 22 | expansionlimit=newlimit) 23 | node = p.parse() 24 | 25 | endp = node.pos[1] 26 | _adjustpositions(node, sindex, len(base)) 27 | 28 | return node, endp 29 | 30 | def _parsedolparen(parserobj, base, sindex): 31 | copiedps = copy.copy(parserobj.parserstate) 32 | copiedps.add(flags.parser.CMDSUBST) 33 | copiedps.add(flags.parser.EOFTOKEN) 34 | string = base[sindex:] 35 | 36 | tokenizerargs = {'eoftoken' : tokenizer.token(tokenizer.tokentype.RIGHT_PAREN, ')'), 37 | 'parserstate' : copiedps, 38 | 'lastreadtoken' : parserobj.tok._last_read_token, 39 | 'tokenbeforethat' : parserobj.tok._token_before_that, 40 | 'twotokensago' : parserobj.tok._two_tokens_ago} 41 | 42 | node, endp = _recursiveparse(parserobj, base, sindex, tokenizerargs) 43 | 44 | if string[endp] != ')': 45 | while endp > 0 and string[endp-1] == '\n': 46 | endp -= 1 47 | 48 | return node, sindex + endp 49 | 50 | def _extractcommandsubst(parserobj, string, sindex, sxcommand=False): 51 | if string[sindex] == '(': 52 | raise NotImplementedError('arithmetic expansion') 53 | #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True) 54 | else: 55 | node, si = _parsedolparen(parserobj, string, sindex) 56 | si += 1 57 | return ast.node(kind='commandsubstitution', command=node, pos=(sindex-2, si)), si 58 | 59 | def _extractprocesssubst(parserobj, string, sindex): 60 | #return _extractdelimitedstring(tok, string, sindex, starter, '(', ')', sxcommand=True) 61 | node, si = _parsedolparen(parserobj, string, sindex) 62 | return node, si + 1 63 | 64 | #def _extractdelimitedstring(parserobj, string, sindex, opener, altopener, closer, 65 | # sxcommand=False): 66 | # parts = [] 67 | # incomment = False 68 | # passchar = False 69 | # nestinglevel = 1 70 | # i = sindex 71 | 72 | # while nestinglevel: 73 | # if i >= len(string): 74 | # break 75 | # c = string[i] 76 | # if incomment: 77 | # if c == '\n': 78 | # incomment = False 79 | # i += 1 80 | # continue 81 | # elif passchar: 82 | # passchar = False 83 | # i += 1 84 | # continue 85 | 86 | # if sxcommand and c == '#' and (i == 0 or string[i-1] == '\n' or 87 | # tokenizer._shellblank(string[i-1])): 88 | # incomment = True 89 | # i += 1 90 | # continue 91 | 92 | # if c == '\\': 93 | # passchar = True 94 | # i += 1 95 | # continue 96 | 97 | # if sxcommand and string[i:i+2] == '$(': 98 | # si = i + 2 99 | # node, si = _extractcommandsubst(parserobj, string, si, sxcommand=sxcommand) 100 | # parts.append(node) 101 | # i = si + 1 102 | # continue 103 | 104 | # if string.startswith(opener, i): 105 | # si = i + len(opener) 106 | # nodes, si = _extractdelimitedstring(parserobj, string, si, opener, altopener, 107 | # closer, sxcommand=sxcommand) 108 | # parts.extend(nodes) 109 | # i = si + 1 110 | # continue 111 | 112 | # if string.startswith(altopener, i): 113 | # si = i + len(altopener) 114 | # nodes, si = _extractdelimitedstring(parserobj, string, si, altopener, altopener, 115 | # closer, sxcommand=sxcommand) 116 | # parts.extend(nodes) 117 | # i = si + 1 118 | # continue 119 | 120 | # # 1327 121 | # if string.startswith(closer, i): 122 | # i += len(closer) - 1 123 | # nestinglevel -= 1 124 | # if nestinglevel == 0: 125 | # break 126 | 127 | # if c == '`': 128 | # si = i + 1 129 | # t = _stringextract(string, si, '`', sxcommand=sxcommand) 130 | # i = si + 1 131 | # continue 132 | 133 | # if c in "'\"": 134 | # si = i +1 135 | # if c == '"': 136 | # i = _skipsinglequoted(string, si) 137 | # else: 138 | # i = _skipdoublequoted(string, si) 139 | # continue 140 | 141 | # i += 1 142 | 143 | # if i == len(string) and nestinglevel: 144 | # raise errors.ParsingError('bad substitution: no closing %r in %s' % (closer, string)) 145 | 146 | # return parts, i 147 | 148 | def _paramexpand(parserobj, string, sindex): 149 | node = None 150 | zindex = sindex + 1 151 | c = string[zindex] if zindex < len(string) else None 152 | if c and c in '0123456789$#?-!*@': 153 | # XXX 7685 154 | node = ast.node(kind='parameter', value=c, 155 | pos=(sindex, zindex+1)) 156 | elif c == '{': 157 | # XXX 7863 158 | # TODO not start enough, doesn't consider escaping 159 | zindex = string.find('}', zindex + 1) 160 | node = ast.node(kind='parameter', value=string[sindex+2:zindex], 161 | pos=(sindex, zindex+1)) 162 | # TODO 163 | # return _parameterbraceexpand(string, zindex) 164 | elif c == '(': 165 | return _extractcommandsubst(parserobj, string, zindex + 1) 166 | elif c == '[': 167 | raise NotImplementedError('arithmetic substitution') 168 | #return _extractarithmeticsubst(string, zindex + 1) 169 | else: 170 | tindex = zindex 171 | for zindex in range(tindex, len(string) + 1): 172 | if zindex == len(string): 173 | break 174 | if not string[zindex].isalnum() and not string[zindex] == '_': 175 | break 176 | temp1 = string[sindex:zindex] 177 | if temp1: 178 | return (ast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)), 179 | zindex) 180 | 181 | if zindex < len(string): 182 | zindex += 1 183 | 184 | return node, zindex 185 | 186 | def _adjustpositions(node_, base, endlimit): 187 | class v(ast.nodevisitor): 188 | def visitnode(self, node): 189 | assert node.pos[1] + base <= endlimit 190 | node.pos = (node.pos[0] + base, node.pos[1] + base) 191 | visitor = v() 192 | visitor.visit(node_) 193 | 194 | def _expandwordinternal(parserobj, wordtoken, qheredocument, qdoublequotes, quoted, isexp): 195 | # bash/subst.c L8132 196 | istring = '' 197 | parts = [] 198 | tindex = [0] 199 | sindex = [0] 200 | string = wordtoken.value 201 | def nextchar(): 202 | sindex[0] += 1 203 | if sindex[0] < len(string): 204 | return string[sindex[0]] 205 | def peekchar(): 206 | if sindex[0]+1 < len(string): 207 | return string[sindex[0]+1] 208 | 209 | while True: 210 | if sindex[0] == len(string): 211 | break 212 | # goto finished_with_string 213 | c = string[sindex[0]] 214 | if c in '<>': 215 | if (nextchar() != '(' or qheredocument or qdoublequotes or 216 | (wordtoken.flags & set([flags.word.DQUOTE, flags.word.NOPROCSUB]))): 217 | sindex[0] -= 1 218 | 219 | # goto add_character 220 | sindex[0] += 1 221 | istring += c 222 | else: 223 | tindex = sindex[0] + 1 224 | 225 | node, sindex[0] = _extractprocesssubst(parserobj, string, tindex) 226 | 227 | parts.append(ast.node(kind='processsubstitution', command=node, 228 | pos=(tindex - 2, sindex[0]))) 229 | istring += string[tindex - 2:sindex[0]] 230 | # goto dollar_add_string 231 | # TODO 232 | # elif c == '=': 233 | # pass 234 | # elif c == ':': 235 | # pass 236 | elif c == '~': 237 | if (wordtoken.flags & set([flags.word.NOTILDE, flags.word.DQUOTE]) or 238 | (sindex[0] > 0 and not (wordtoken.flags & flags.word.NOTILDE)) or 239 | qdoublequotes or qheredocument): 240 | wordtoken.flags.clear() 241 | wordtoken.flags.add(flags.word.ITILDE) 242 | sindex[0] += 1 243 | istring += c 244 | else: 245 | stopatcolon = wordtoken.flags & set([flags.word.ASSIGNRHS, 246 | flags.word.ASSIGNMENT, 247 | flags.word.TILDEEXP]) 248 | expand = True 249 | for i in range(sindex[0], len(string)): 250 | r = string[i] 251 | if r == '/': 252 | break 253 | if r in "\\'\"": 254 | expand = False 255 | break 256 | if stopatcolon and r == ':': 257 | break 258 | else: 259 | # go one past the end if we didn't exit early 260 | i += 1 261 | 262 | if i > sindex[0] and expand: 263 | node = ast.node(kind='tilde', value=string[sindex[0]:i], 264 | pos=(sindex[0], i)) 265 | parts.append(node) 266 | istring += string[sindex[0]:i] 267 | sindex[0] = i 268 | 269 | elif c == '$' and len(string) > 1: 270 | tindex = sindex[0] 271 | node, sindex[0] = _paramexpand(parserobj, string, sindex[0]) 272 | if node: 273 | parts.append(node) 274 | istring += string[tindex:sindex[0]] 275 | elif c == '`': 276 | tindex = sindex[0] 277 | # bare instance of `` 278 | if nextchar() == '`': 279 | sindex[0] += 1 280 | istring += '``' 281 | else: 282 | x = _stringextract(string, sindex[0], "`") 283 | if x == -1: 284 | raise errors.ParsingError('bad substitution: no closing "`" ' 285 | 'in %s' % string) 286 | else: 287 | if wordtoken.flags & flags.word.NOCOMSUB: 288 | pass 289 | else: 290 | sindex[0] = x 291 | 292 | word = string[tindex+1:sindex[0]] 293 | command, ttindex = _recursiveparse(parserobj, word, 0) 294 | _adjustpositions(command, tindex+1, len(string)) 295 | ttindex += 1 # ttindex is on the closing char 296 | 297 | # assert sindex[0] == ttindex 298 | # go one past the closing ` 299 | sindex[0] += 1 300 | 301 | node = ast.node(kind='commandsubstitution', 302 | command=command, 303 | pos=(tindex, sindex[0])) 304 | parts.append(node) 305 | istring += string[tindex:sindex[0]] 306 | 307 | elif c == '\\': 308 | istring += string[sindex[0]+1:sindex[0]+2] 309 | sindex[0] += 2 310 | elif c == '"': 311 | sindex[0] += 1 312 | continue 313 | 314 | # 8513 315 | #if qdoublequotes or qheredocument: 316 | # sindex[0] += 1 317 | #else: 318 | # tindex = sindex[0] + 1 319 | # parts, sindex[0] = _stringextractdoublequoted(string, sindex[0]) 320 | # if tindex == 1 and sindex[0] == len(string): 321 | # quotedstate = 'wholly' 322 | # else: 323 | # quotedstate = 'partially' 324 | 325 | elif c == "'": 326 | # entire string surronded by single quotes, no expansion is 327 | # going to happen 328 | if sindex[0] == 0 and string[-1] == "'": 329 | return [], string[1:-1] 330 | 331 | # check if we're inside double quotes 332 | if not qdoublequotes: 333 | # look for the closing ', we know we have one or otherwise 334 | # this wouldn't tokenize due to unmatched ' 335 | tindex = sindex[0] 336 | sindex[0] = string.find("'", sindex[0]) + 1 337 | 338 | istring += string[tindex+1:sindex[0]-1] 339 | else: 340 | # this is a single quote inside double quotes, add it 341 | istring += c 342 | sindex[0] += 1 343 | else: 344 | istring += string[sindex[0]:sindex[0]+1] 345 | sindex[0] += 1 346 | 347 | if parts: 348 | class v(ast.nodevisitor): 349 | def visitnode(self, node): 350 | assert node.pos[1] + wordtoken.lexpos <= wordtoken.endlexpos 351 | node.pos = (node.pos[0] + wordtoken.lexpos, 352 | node.pos[1] + wordtoken.lexpos) 353 | visitor = v() 354 | for node in parts: 355 | visitor.visit(node) 356 | 357 | return parts, istring 358 | 359 | def _stringextract(string, sindex, charlist, sxvarname=False): 360 | found = False 361 | i = sindex 362 | while i < len(string): 363 | c = string[i] 364 | if c == '\\': 365 | if i + 1 < len(string): 366 | i += 1 367 | else: 368 | break 369 | elif sxvarname and c == '[': 370 | ni = _skipsubscript(string, i, 0) 371 | if string[ni] == ']': 372 | i = ni 373 | elif c in charlist: 374 | found = True 375 | break 376 | else: 377 | i += 1 378 | if found: 379 | return i 380 | else: 381 | return -1 382 | -------------------------------------------------------------------------------- /bashlex/parser.py: -------------------------------------------------------------------------------- 1 | import os, copy 2 | 3 | from bashlex import yacc, tokenizer, state, ast, subst, flags, errors, heredoc 4 | 5 | def _partsspan(parts): 6 | return parts[0].pos[0], parts[-1].pos[1] 7 | 8 | tokens = [e.name for e in tokenizer.tokentype] 9 | precedence = ( 10 | ('left', 'AMPERSAND', 'SEMICOLON', 'NEWLINE', 'EOF'), 11 | ('left', 'AND_AND', 'OR_OR'), 12 | ('right', 'BAR', 'BAR_AND') 13 | ) 14 | 15 | def handleNotImplemented(p, type): 16 | if p.context._proceedonerror: 17 | parts = _makeparts(p) 18 | p[0] = ast.node(kind='unimplemented', parts=parts, pos=_partsspan(parts)) 19 | return 20 | if len(p) == 2: 21 | raise NotImplementedError('type = {%s}, token = {%s}' % (type, p[1])) 22 | else: 23 | raise NotImplementedError('type = {%s}, token = {%s}, parts = {%s}' % (type, p[1], p[2])) 24 | 25 | def handleAssert(p, test): 26 | if not test: 27 | raise AssertionError('token = {%s}' % p[1]) 28 | 29 | def p_inputunit(p): 30 | '''inputunit : simple_list simple_list_terminator 31 | | NEWLINE 32 | | error NEWLINE 33 | | EOF''' 34 | # XXX 35 | if p.lexer._parserstate & flags.parser.CMDSUBST: 36 | p.lexer._parserstate.add(flags.parser.EOFTOKEN) 37 | 38 | if isinstance(p[1], ast.node): 39 | p[0] = p[1] 40 | # accept right here in case the input contains more lines that are 41 | # not part of the current command 42 | p.accept() 43 | 44 | def p_word_list(p): 45 | '''word_list : WORD 46 | | word_list WORD''' 47 | parserobj = p.context 48 | if len(p) == 2: 49 | p[0] = [_expandword(parserobj, p.slice[1])] 50 | else: 51 | p[0] = p[1] 52 | p[0].append(_expandword(parserobj, p.slice[2])) 53 | 54 | def p_redirection_heredoc(p): 55 | '''redirection : LESS_LESS WORD 56 | | NUMBER LESS_LESS WORD 57 | | REDIR_WORD LESS_LESS WORD 58 | | LESS_LESS_MINUS WORD 59 | | NUMBER LESS_LESS_MINUS WORD 60 | | REDIR_WORD LESS_LESS_MINUS WORD''' 61 | parserobj = p.context 62 | assert isinstance(parserobj, _parser) 63 | 64 | output = ast.node(kind='word', word=p[len(p)-1], parts=[], 65 | pos=p.lexspan(len(p)-1)) 66 | if len(p) == 3: 67 | p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None, 68 | output=output, pos=(p.lexpos(1), p.endlexpos(2))) 69 | else: 70 | p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, 71 | output=output, pos=(p.lexpos(1), p.endlexpos(3))) 72 | 73 | if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS: 74 | parserobj.redirstack.append((p[0], False)) 75 | else: 76 | parserobj.redirstack.append((p[0], True)) 77 | 78 | def p_redirection(p): 79 | '''redirection : GREATER WORD 80 | | LESS WORD 81 | | NUMBER GREATER WORD 82 | | NUMBER LESS WORD 83 | | REDIR_WORD GREATER WORD 84 | | REDIR_WORD LESS WORD 85 | | GREATER_GREATER WORD 86 | | NUMBER GREATER_GREATER WORD 87 | | REDIR_WORD GREATER_GREATER WORD 88 | | GREATER_BAR WORD 89 | | NUMBER GREATER_BAR WORD 90 | | REDIR_WORD GREATER_BAR WORD 91 | | LESS_GREATER WORD 92 | | NUMBER LESS_GREATER WORD 93 | | REDIR_WORD LESS_GREATER WORD 94 | | LESS_LESS_LESS WORD 95 | | NUMBER LESS_LESS_LESS WORD 96 | | REDIR_WORD LESS_LESS_LESS WORD 97 | | LESS_AND NUMBER 98 | | NUMBER LESS_AND NUMBER 99 | | REDIR_WORD LESS_AND NUMBER 100 | | GREATER_AND NUMBER 101 | | NUMBER GREATER_AND NUMBER 102 | | REDIR_WORD GREATER_AND NUMBER 103 | | LESS_AND WORD 104 | | NUMBER LESS_AND WORD 105 | | REDIR_WORD LESS_AND WORD 106 | | GREATER_AND WORD 107 | | NUMBER GREATER_AND WORD 108 | | REDIR_WORD GREATER_AND WORD 109 | | GREATER_AND DASH 110 | | NUMBER GREATER_AND DASH 111 | | REDIR_WORD GREATER_AND DASH 112 | | LESS_AND DASH 113 | | NUMBER LESS_AND DASH 114 | | REDIR_WORD LESS_AND DASH 115 | | AND_GREATER WORD 116 | | AND_GREATER_GREATER WORD''' 117 | parserobj = p.context 118 | if len(p) == 3: 119 | output = p[2] 120 | if p.slice[2].ttype == tokenizer.tokentype.WORD: 121 | output = _expandword(parserobj, p.slice[2]) 122 | p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None, 123 | output=output, pos=(p.lexpos(1), p.endlexpos(2))) 124 | else: 125 | output = p[3] 126 | if p.slice[3].ttype == tokenizer.tokentype.WORD: 127 | output = _expandword(parserobj, p.slice[3]) 128 | p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None, 129 | output=output, pos=(p.lexpos(1), p.endlexpos(3))) 130 | 131 | def _expandword(parser, tokenword): 132 | if parser._expansionlimit == -1: 133 | # we enter this branch in the following conditions: 134 | # - currently parsing a substitution as a result of an expansion 135 | # - the previous expansion had limit == 0 136 | # 137 | # this means that this node is a descendant of a substitution in an 138 | # unexpanded word and will be filtered in the limit == 0 condition below 139 | # 140 | # (the reason we even expand when limit == 0 is to get quote removal) 141 | node = ast.node(kind='word', word=tokenword, 142 | pos=(tokenword.lexpos, tokenword.endlexpos), parts=[]) 143 | return node 144 | else: 145 | quoted = bool(tokenword.flags & flags.word.QUOTED) 146 | doublequoted = quoted and tokenword.value[0] == '"' 147 | 148 | # TODO set qheredocument 149 | parts, expandedword = subst._expandwordinternal(parser, 150 | tokenword, 0, 151 | doublequoted, 0, 0) 152 | 153 | # limit reached, don't include substitutions (still expanded to get 154 | # quote removal though) 155 | if parser._expansionlimit == 0: 156 | parts = [node for node in parts if 'substitution' not in node.kind] 157 | 158 | node = ast.node(kind='word', word=expandedword, 159 | pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts) 160 | return node 161 | 162 | def p_simple_command_element(p): 163 | '''simple_command_element : WORD 164 | | ASSIGNMENT_WORD 165 | | redirection''' 166 | if isinstance(p[1], ast.node): 167 | p[0] = [p[1]] 168 | return 169 | 170 | parserobj = p.context 171 | p[0] = [_expandword(parserobj, p.slice[1])] 172 | 173 | # change the word node to an assignment if necessary 174 | if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD: 175 | p[0][0].kind = 'assignment' 176 | 177 | def p_redirection_list(p): 178 | '''redirection_list : redirection 179 | | redirection_list redirection''' 180 | if len(p) == 2: 181 | p[0] = [p[1]] 182 | else: 183 | p[0] = p[1] 184 | p[0].append(p[2]) 185 | 186 | def p_simple_command(p): 187 | '''simple_command : simple_command_element 188 | | simple_command simple_command_element''' 189 | 190 | p[0] = p[1] 191 | if len(p) == 3: 192 | p[0].extend(p[2]) 193 | 194 | def p_command(p): 195 | '''command : simple_command 196 | | shell_command 197 | | shell_command redirection_list 198 | | function_def 199 | | coproc''' 200 | if isinstance(p[1], ast.node): 201 | p[0] = p[1] 202 | if len(p) == 3: 203 | handleAssert(p, p[0].kind == 'compound') 204 | p[0].redirects.extend(p[2]) 205 | handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) 206 | p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) 207 | else: 208 | p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1])) 209 | 210 | def p_shell_command(p): 211 | '''shell_command : for_command 212 | | case_command 213 | | WHILE compound_list DO compound_list DONE 214 | | UNTIL compound_list DO compound_list DONE 215 | | select_command 216 | | if_command 217 | | subshell 218 | | group_command 219 | | arith_command 220 | | cond_command 221 | | arith_for_command''' 222 | if len(p) == 2: 223 | p[0] = p[1] 224 | else: 225 | # while or until 226 | handleAssert(p, p[2].kind == 'list') 227 | 228 | parts = _makeparts(p) 229 | kind = parts[0].word 230 | assert kind in ('while', 'until') 231 | p[0] = ast.node(kind='compound', 232 | redirects=[], 233 | list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))], 234 | pos=_partsspan(parts)) 235 | 236 | handleAssert(p, p[0].kind == 'compound') 237 | 238 | def _makeparts(p): 239 | parts = [] 240 | for i in range(1, len(p)): 241 | if isinstance(p[i], ast.node): 242 | parts.append(p[i]) 243 | elif isinstance(p[i], list): 244 | parts.extend(p[i]) 245 | elif isinstance(p.slice[i], tokenizer.token): 246 | if p.slice[i].ttype == tokenizer.tokentype.WORD: 247 | parserobj = p.context 248 | parts.append(_expandword(parserobj, p.slice[i])) 249 | else: 250 | parts.append(ast.node(kind='reservedword', word=p[i], 251 | pos=p.lexspan(i))) 252 | else: 253 | pass 254 | 255 | return parts 256 | 257 | def p_for_command(p): 258 | '''for_command : FOR WORD newline_list DO compound_list DONE 259 | | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY 260 | | FOR WORD SEMICOLON newline_list DO compound_list DONE 261 | | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY 262 | | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE 263 | | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY 264 | | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE 265 | | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY''' 266 | parts = _makeparts(p) 267 | # find the operatornode that we might have there due to 268 | # list_terminator/newline_list and convert it to a reservedword so its 269 | # considered as part of the for loop 270 | for i, part in enumerate(parts): 271 | if part.kind == 'operator' and part.op == ';': 272 | parts[i] = ast.node(kind='reservedword', word=';', pos=part.pos) 273 | break # there could be only one in there... 274 | 275 | p[0] = ast.node(kind='compound', 276 | redirects=[], 277 | list=[ast.node(kind='for', parts=parts, pos=_partsspan(parts))], 278 | pos=_partsspan(parts)) 279 | 280 | def p_arith_for_command(p): 281 | '''arith_for_command : FOR ARITH_FOR_EXPRS list_terminator newline_list DO compound_list DONE 282 | | FOR ARITH_FOR_EXPRS list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY 283 | | FOR ARITH_FOR_EXPRS DO compound_list DONE 284 | | FOR ARITH_FOR_EXPRS LEFT_CURLY compound_list RIGHT_CURLY''' 285 | handleNotImplemented(p, 'arithmetic for') 286 | 287 | def p_select_command(p): 288 | '''select_command : SELECT WORD newline_list DO list DONE 289 | | SELECT WORD newline_list LEFT_CURLY list RIGHT_CURLY 290 | | SELECT WORD SEMICOLON newline_list DO list DONE 291 | | SELECT WORD SEMICOLON newline_list LEFT_CURLY list RIGHT_CURLY 292 | | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE 293 | | SELECT WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY list RIGHT_CURLY''' 294 | handleNotImplemented(p, 'select command') 295 | 296 | def p_case_command(p): 297 | '''case_command : CASE WORD newline_list IN newline_list ESAC 298 | | CASE WORD newline_list IN case_clause_sequence newline_list ESAC 299 | | CASE WORD newline_list IN case_clause ESAC''' 300 | parts = _makeparts(p) 301 | p[0] = ast.node(kind='compound', 302 | redirects=[], 303 | list=[ast.node(kind='case', parts=parts, pos=_partsspan(parts))], 304 | pos=_partsspan(parts)) 305 | 306 | def p_function_def(p): 307 | '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body 308 | | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body 309 | | FUNCTION WORD newline_list function_body''' 310 | parts = _makeparts(p) 311 | body = parts[-1] 312 | name = parts[ast.findfirstkind(parts, 'word')] 313 | 314 | p[0] = ast.node(kind='function', name=name, body=body, parts=parts, 315 | pos=_partsspan(parts)) 316 | 317 | def p_function_body(p): 318 | '''function_body : shell_command 319 | | shell_command redirection_list''' 320 | handleAssert(p, p[1].kind == 'compound') 321 | 322 | p[0] = p[1] 323 | if len(p) == 3: 324 | p[0].redirects.extend(p[2]) 325 | handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) 326 | p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) 327 | 328 | def p_subshell(p): 329 | '''subshell : LEFT_PAREN compound_list RIGHT_PAREN''' 330 | lparen = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) 331 | rparen = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) 332 | parts = [lparen, p[2], rparen] 333 | p[0] = ast.node(kind='compound', list=parts, redirects=[], 334 | pos=_partsspan(parts)) 335 | 336 | def p_coproc(p): 337 | '''coproc : COPROC shell_command 338 | | COPROC shell_command redirection_list 339 | | COPROC WORD shell_command 340 | | COPROC WORD shell_command redirection_list 341 | | COPROC simple_command''' 342 | handleNotImplemented(p, 'coproc') 343 | 344 | def p_if_command(p): 345 | '''if_command : IF compound_list THEN compound_list FI 346 | | IF compound_list THEN compound_list ELSE compound_list FI 347 | | IF compound_list THEN compound_list elif_clause FI''' 348 | # we currently don't distinguish the various lists that make up the 349 | # command, because it's not needed later on. if there will be a need 350 | # we can always add different nodes for elif/else. 351 | parts = _makeparts(p) 352 | p[0] = ast.node(kind='compound', 353 | redirects=[], 354 | list=[ast.node(kind='if', parts=parts, pos=_partsspan(parts))], 355 | pos=_partsspan(parts)) 356 | 357 | def p_group_command(p): 358 | '''group_command : LEFT_CURLY compound_list RIGHT_CURLY''' 359 | lcurly = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1)) 360 | rcurly = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3)) 361 | parts = [lcurly, p[2], rcurly] 362 | p[0] = ast.node(kind='compound', list=parts, redirects=[], 363 | pos=_partsspan(parts)) 364 | 365 | def p_arith_command(p): 366 | '''arith_command : ARITH_CMD''' 367 | handleNotImplemented(p, 'arithmetic command') 368 | 369 | def p_cond_command(p): 370 | '''cond_command : COND_START COND_CMD COND_END''' 371 | handleNotImplemented(p, 'cond command') 372 | 373 | def p_elif_clause(p): 374 | '''elif_clause : ELIF compound_list THEN compound_list 375 | | ELIF compound_list THEN compound_list ELSE compound_list 376 | | ELIF compound_list THEN compound_list elif_clause''' 377 | parts = [] 378 | for i in range(1, len(p)): 379 | if isinstance(p[i], ast.node): 380 | parts.append(p[i]) 381 | else: 382 | parts.append(ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i))) 383 | p[0] = parts 384 | 385 | def p_case_clause(p): 386 | '''case_clause : pattern_list 387 | | case_clause_sequence pattern_list''' 388 | if len(p) == 2: 389 | p[0] = [p[1]] 390 | else: 391 | p[0] = p[1] 392 | p[0].append(p[2]) 393 | 394 | def p_pattern_list(p): 395 | '''pattern_list : newline_list pattern RIGHT_PAREN compound_list 396 | | newline_list pattern RIGHT_PAREN newline_list 397 | | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list 398 | | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list''' 399 | parts = [] 400 | if len(p) == 5: 401 | parts.append(ast.node(kind='pattern', parts=p[2], pos=_partsspan(p[2]))) 402 | parts.append(ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))) 403 | if isinstance(p[4], ast.node): 404 | parts.append(p[4]) 405 | else: 406 | parts.append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2))) 407 | parts.append(ast.node(kind='pattern', parts=p[3], pos=_partsspan(p[3]))) 408 | parts.append(ast.node(kind='reservedword', word=p[4], pos=p.lexspan(4))) 409 | if isinstance(p[5], ast.node): 410 | parts.append(p[5]) 411 | 412 | p[0] = ast.node(kind='compound', list=parts, redirects=[], pos=_partsspan(parts)) 413 | 414 | def p_case_clause_sequence(p): 415 | '''case_clause_sequence : pattern_list SEMI_SEMI 416 | | case_clause_sequence pattern_list SEMI_SEMI 417 | | pattern_list SEMI_AND 418 | | case_clause_sequence pattern_list SEMI_AND 419 | | pattern_list SEMI_SEMI_AND 420 | | case_clause_sequence pattern_list SEMI_SEMI_AND''' 421 | if len(p) == 3: 422 | p[0] = [p[1]] 423 | p[0].append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2))) 424 | else: 425 | p[0] = p[1] 426 | p[0].append(p[2]) 427 | p[0].append(ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))) 428 | 429 | def p_pattern(p): 430 | '''pattern : WORD 431 | | pattern BAR WORD''' 432 | 433 | parserobj = p.context 434 | if len(p) == 2: 435 | p[0] = [_expandword(parserobj, p.slice[1])] 436 | else: 437 | p[0] = p[1] 438 | p[0].append(ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2))) 439 | p[0].append(_expandword(parserobj, p.slice[3])) 440 | 441 | def p_list(p): 442 | '''list : newline_list list0''' 443 | p[0] = p[2] 444 | 445 | def p_compound_list(p): 446 | '''compound_list : list 447 | | newline_list list1''' 448 | if len(p) == 2: 449 | p[0] = p[1] 450 | else: 451 | parts = p[2] 452 | if len(parts) > 1: 453 | p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) 454 | else: 455 | p[0] = parts[0] 456 | 457 | def p_list0(p): 458 | '''list0 : list1 NEWLINE newline_list 459 | | list1 AMPERSAND newline_list 460 | | list1 SEMICOLON newline_list''' 461 | parts = p[1] 462 | if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE: 463 | parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) 464 | p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) 465 | else: 466 | p[0] = parts[0] 467 | 468 | def p_list1(p): 469 | '''list1 : list1 AND_AND newline_list list1 470 | | list1 OR_OR newline_list list1 471 | | list1 AMPERSAND newline_list list1 472 | | list1 SEMICOLON newline_list list1 473 | | list1 NEWLINE newline_list list1 474 | | pipeline_command''' 475 | if len(p) == 2: 476 | p[0] = [p[1]] 477 | else: 478 | p[0] = p[1] 479 | # XXX newline 480 | p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) 481 | p[0].extend(p[len(p) - 1]) 482 | 483 | def p_simple_list_terminator(p): 484 | '''simple_list_terminator : NEWLINE 485 | | EOF''' 486 | pass 487 | 488 | def p_list_terminator(p): 489 | '''list_terminator : NEWLINE 490 | | SEMICOLON 491 | | EOF''' 492 | if p[1] == ';': 493 | p[0] = ast.node(kind='operator', op=';', pos=p.lexspan(1)) 494 | 495 | def p_newline_list(p): 496 | '''newline_list : empty 497 | | newline_list NEWLINE''' 498 | pass 499 | 500 | def p_simple_list(p): 501 | '''simple_list : simple_list1 502 | | simple_list1 AMPERSAND 503 | | simple_list1 SEMICOLON''' 504 | tok = p.lexer 505 | heredoc.gatherheredocuments(tok) 506 | 507 | if len(p) == 3 or len(p[1]) > 1: 508 | parts = p[1] 509 | if len(p) == 3: 510 | parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) 511 | p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts)) 512 | else: 513 | assert len(p[1]) == 1 514 | p[0] = p[1][0] 515 | 516 | if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and 517 | p.lexer._current_token.nopos() == p.lexer._shell_eof_token): 518 | # accept the input 519 | p.accept() 520 | 521 | def p_simple_list1(p): 522 | '''simple_list1 : simple_list1 AND_AND newline_list simple_list1 523 | | simple_list1 OR_OR newline_list simple_list1 524 | | simple_list1 AMPERSAND simple_list1 525 | | simple_list1 SEMICOLON simple_list1 526 | | pipeline_command''' 527 | if len(p) == 2: 528 | p[0] = [p[1]] 529 | else: 530 | p[0] = p[1] 531 | p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2))) 532 | p[0].extend(p[len(p) - 1]) 533 | 534 | def p_pipeline_command(p): 535 | '''pipeline_command : pipeline 536 | | BANG pipeline_command 537 | | timespec pipeline_command 538 | | timespec list_terminator 539 | | BANG list_terminator''' 540 | if len(p) == 2: 541 | if len(p[1]) == 1: 542 | p[0] = p[1][0] 543 | else: 544 | p[0] = ast.node(kind='pipeline', parts=p[1], 545 | pos=(p[1][0].pos[0], p[1][-1].pos[1])) 546 | else: 547 | # XXX timespec 548 | node = ast.node(kind='reservedword', word='!', pos=p.lexspan(1)) 549 | if p[2].kind == 'pipeline': 550 | p[0] = p[2] 551 | p[0].parts.insert(0, node) 552 | p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1]) 553 | else: 554 | p[0] = ast.node(kind='pipeline', parts=[node, p[2]], 555 | pos=(node.pos[0], p[2].pos[1])) 556 | 557 | def p_pipeline(p): 558 | '''pipeline : pipeline BAR newline_list pipeline 559 | | pipeline BAR_AND newline_list pipeline 560 | | command''' 561 | if len(p) == 2: 562 | p[0] = [p[1]] 563 | else: 564 | p[0] = p[1] 565 | p[0].append(ast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2))) 566 | p[0].extend(p[len(p) - 1]) 567 | 568 | def p_timespec(p): 569 | '''timespec : TIME 570 | | TIME TIMEOPT 571 | | TIME TIMEOPT TIMEIGN''' 572 | handleNotImplemented(p, 'time command') 573 | 574 | def p_empty(p): 575 | '''empty :''' 576 | pass 577 | 578 | def p_error(p): 579 | assert isinstance(p, tokenizer.token) 580 | 581 | if p.ttype == tokenizer.tokentype.EOF: 582 | raise errors.ParsingError('unexpected EOF', 583 | p.lexer.source, 584 | len(p.lexer.source)) 585 | else: 586 | raise errors.ParsingError('unexpected token %r' % p.value, 587 | p.lexer.source, p.lexpos) 588 | 589 | yaccparser = yacc.yacc(outputdir=os.path.dirname(__file__), 590 | debug=False) 591 | 592 | # some hack to fix yacc's reduction on command substitutions: 593 | # which state to fix is derived from static transition tables 594 | # as states are changeable among python versions and architectures 595 | # the only state that is considered fixed is the initial state: 0 596 | def get_correction_states(): 597 | reduce = yaccparser.goto[0]['simple_list'] #~10 598 | state2 = yaccparser.action[reduce]['NEWLINE'] #63 599 | state1 = yaccparser.goto[reduce]['simple_list_terminator'] #~10 600 | return state1, state2 601 | 602 | def get_correction_rightparen_states(): 603 | state1 = yaccparser.goto[0]['pipeline_command'] 604 | state2 = yaccparser.goto[0]['simple_list1'] #11 605 | state_temp = yaccparser.action[state2]['SEMICOLON'] #65 606 | state3 = yaccparser.goto[state_temp]['simple_list1'] 607 | return state1, state2, state3 608 | 609 | for tt in tokenizer.tokentype: 610 | states = get_correction_states() 611 | yaccparser.action[states[0]][tt.name] = -1 612 | yaccparser.action[states[1]][tt.name] = -141 613 | 614 | states = get_correction_rightparen_states() 615 | yaccparser.action[states[0]]['RIGHT_PAREN'] = -155 616 | yaccparser.action[states[1]]['RIGHT_PAREN'] = -148 617 | yaccparser.action[states[2]]['RIGHT_PAREN'] = -154 618 | 619 | def parsesingle(s, strictmode=True, expansionlimit=None, convertpos=False, proceedonerror=False): 620 | '''like parse, but only consumes a single top level node, e.g. parsing 621 | 'a\nb' will only return a node for 'a', leaving b unparsed''' 622 | p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit, proceedonerror=proceedonerror) 623 | tree = p.parse() 624 | if convertpos: 625 | ast.posconverter(s).visit(tree) 626 | return tree 627 | 628 | def parse(s, strictmode=True, expansionlimit=None, convertpos=False, proceedonerror=False): 629 | '''parse the input string, returning a list of nodes 630 | 631 | top level node kinds are: 632 | 633 | - command - a simple command 634 | - pipeline - a series of simple commands 635 | - list - a series of one or more pipelines 636 | - compound - contains constructs for { list; }, (list), if, for.. 637 | 638 | leafs are word nodes (which in turn can also contain any of the 639 | aforementioned nodes due to command substitutions). 640 | 641 | when strictmode is set to False, we will: 642 | - skip reading a heredoc if we're at the end of the input 643 | 644 | expansionlimit is used to limit the amount of recursive parsing done due to 645 | command substitutions found during word expansion. 646 | 647 | when proceedonerror set, the parser will return AST nodes for unimplemented features, etc. (e.g., rather than throwing a NotImplementedError) 648 | ''' 649 | p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit, proceedonerror=proceedonerror) 650 | parts = [p.parse()] 651 | 652 | class endfinder(ast.nodevisitor): 653 | def __init__(self): 654 | self.end = -1 655 | def visitheredoc(self, node, value): 656 | self.end = node.pos[1] 657 | 658 | # find the 'real' end incase we have a heredoc in there 659 | ef = _endfinder() 660 | ef.visit(parts[-1]) 661 | index = max(parts[-1].pos[1], ef.end) + 1 662 | while index < len(s): 663 | part = _parser(s[index:], strictmode=strictmode, proceedonerror=proceedonerror).parse() 664 | 665 | if not isinstance(part, ast.node): 666 | break 667 | 668 | ast.posshifter(index).visit(part) 669 | parts.append(part) 670 | ef = _endfinder() 671 | ef.visit(parts[-1]) 672 | index = max(parts[-1].pos[1], ef.end) + 1 673 | 674 | if convertpos: 675 | for tree in parts: 676 | ast.posconverter(s).visit(tree) 677 | 678 | return parts 679 | 680 | def split(s): 681 | '''a utility function that mimics shlex.split but handles more 682 | complex shell constructs such as command substitutions inside words 683 | 684 | >>> list(split('a b"c"\\'d\\'')) 685 | ['a', 'bcd'] 686 | >>> list(split('a "b $(c)" $(d) \\'$(e)\\'')) 687 | ['a', 'b $(c)', '$(d)', '$(e)'] 688 | >>> list(split('a b\\n')) 689 | ['a', 'b', '\\n'] 690 | ''' 691 | p = _parser(s) 692 | for t in p.tok: 693 | if t.ttype == tokenizer.tokentype.WORD: 694 | quoted = bool(t.flags & flags.word.QUOTED) 695 | doublequoted = quoted and t.value[0] == '"' 696 | parts, expandedword = subst._expandwordinternal(p, t, 0, 697 | doublequoted, 0, 0) 698 | yield expandedword 699 | else: 700 | yield s[t.lexpos:t.endlexpos] 701 | 702 | class _parser(object): 703 | ''' 704 | this class is mainly used to provide context to the productions 705 | when we're in the middle of parsing. as a hack, we shove it into the 706 | YaccProduction context attribute to make it accessible. 707 | ''' 708 | def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None, 709 | proceedonerror=None): 710 | assert expansionlimit is None or isinstance(expansionlimit, int) 711 | 712 | self.s = s 713 | self._strictmode = strictmode 714 | self._expansionlimit = expansionlimit 715 | self._proceedonerror = proceedonerror 716 | 717 | if tokenizerargs is None: 718 | tokenizerargs = {} 719 | self.parserstate = tokenizerargs.pop('parserstate', state.parserstate()) 720 | 721 | self.tok = tokenizer.tokenizer(s, 722 | parserstate=self.parserstate, 723 | strictmode=strictmode, 724 | **tokenizerargs) 725 | 726 | self.redirstack = self.tok.redirstack 727 | 728 | def parse(self): 729 | # yacc.yacc returns a parser object that is not reentrant, it has 730 | # some mutable state. we make a shallow copy of it so no 731 | # state spills over to the next call to parse on it 732 | theparser = copy.copy(yaccparser) 733 | tree = theparser.parse(lexer=self.tok, context=self) 734 | 735 | return tree 736 | 737 | class _endfinder(ast.nodevisitor): 738 | '''helper class to find the "real" end pos of a node that contains 739 | a heredoc. this is a hack because heredoc aren't really part of any node 740 | since they don't always follow the end of a node and might appear on 741 | a different line''' 742 | def __init__(self): 743 | self.end = -1 744 | def visitheredoc(self, node, value): 745 | self.end = node.pos[1] 746 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /bashlex/tokenizer.py: -------------------------------------------------------------------------------- 1 | import re, collections, enum 2 | 3 | from bashlex import flags, shutils, utils, errors, heredoc, state 4 | 5 | sh_syntaxtab = collections.defaultdict(set) 6 | 7 | def _addsyntax(chars, symbol): 8 | for c in chars: 9 | sh_syntaxtab[c].add(symbol) 10 | 11 | _addsyntax('\\`$"\n', 'dquote') 12 | _addsyntax('()<>;&|', 'meta') 13 | _addsyntax('"`\'', 'quote') 14 | _addsyntax('$<>', 'exp') 15 | _addsyntax("()<>;&| \t\n", 'break') 16 | 17 | def _shellblank(c): 18 | return c in ' \t' 19 | 20 | def _shellmeta(c): 21 | return 'meta' in sh_syntaxtab[c] 22 | 23 | def _shellquote(c): 24 | return 'quote' in sh_syntaxtab[c] 25 | 26 | def _shellexp(c): 27 | return 'exp' in sh_syntaxtab[c] 28 | 29 | def _shellbreak(c): 30 | return 'break' in sh_syntaxtab[c] 31 | 32 | class tokentype(enum.Enum): 33 | IF = 1 34 | THEN = 2 35 | ELSE = 3 36 | ELIF = 4 37 | FI = 5 38 | CASE = 6 39 | ESAC = 7 40 | FOR = 8 41 | SELECT = 9 42 | WHILE = 10 43 | UNTIL = 11 44 | DO = 12 45 | DONE = 13 46 | FUNCTION = 14 47 | COPROC = 15 48 | COND_START = 16 49 | COND_END = 17 50 | # https://github.com/idank/bashlex/issues/20 51 | # COND_ERROR = 18 52 | IN = 19 53 | BANG = '!' 54 | TIME = 21 55 | TIMEOPT = 22 56 | TIMEIGN = 23 57 | WORD = 24 58 | ASSIGNMENT_WORD = 25 59 | REDIR_WORD = 26 60 | NUMBER = 27 61 | ARITH_CMD = 28 62 | ARITH_FOR_EXPRS = 29 63 | COND_CMD = 30 64 | AND_AND = '&&' 65 | OR_OR = '||' 66 | GREATER_GREATER = '>>' 67 | LESS_LESS = '<<' 68 | LESS_AND = '<&' 69 | LESS_LESS_LESS = '<<<' 70 | GREATER_AND = '>&' 71 | SEMI_SEMI = ';;' 72 | SEMI_AND = ';&' 73 | SEMI_SEMI_AND = ';;&' 74 | LESS_LESS_MINUS = '<<-' 75 | AND_GREATER = '&>' 76 | AND_GREATER_GREATER = '&>>' 77 | LESS_GREATER = '<>' 78 | GREATER_BAR = '>|' 79 | BAR_AND = '|&' 80 | LEFT_CURLY = 47 81 | RIGHT_CURLY = 48 82 | EOF = '$end' 83 | LEFT_PAREN = '(' 84 | RIGHT_PAREN = ')' 85 | BAR = '|' 86 | SEMICOLON = ';' 87 | DASH = '-' 88 | NEWLINE = '\n' 89 | LESS = '<' 90 | GREATER = '>' 91 | AMPERSAND = '&' 92 | 93 | _reserved = set([ 94 | tokentype.AND_AND, tokentype.BANG, tokentype.BAR_AND, tokentype.DO, 95 | tokentype.DONE, tokentype.ELIF, tokentype.ELSE, tokentype.ESAC, 96 | tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI, 97 | tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN, 98 | tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC, 99 | tokentype.UNTIL, tokentype.WHILE]) 100 | 101 | for c in '\n;()|&{}': 102 | _reserved.add(c) 103 | 104 | # word_token_alist 105 | valid_reserved_first_command = { 106 | "if" : tokentype.IF, 107 | "then" : tokentype.THEN, 108 | "else" : tokentype.ELSE, 109 | "elif" : tokentype.ELIF, 110 | "fi" : tokentype.FI, 111 | "case" : tokentype.CASE, 112 | "esac" : tokentype.ESAC, 113 | "for" : tokentype.FOR, 114 | "select" : tokentype.SELECT, 115 | "while" : tokentype.WHILE, 116 | "until" : tokentype.UNTIL, 117 | "do" : tokentype.DO, 118 | "done" : tokentype.DONE, 119 | "in" : tokentype.IN, 120 | "function" : tokentype.FUNCTION, 121 | "time" : tokentype.TIME, 122 | "{" : tokentype.LEFT_CURLY, 123 | "}" : tokentype.RIGHT_CURLY, 124 | "!" : tokentype.BANG, 125 | "[[" : tokentype.COND_START, 126 | "]]" : tokentype.COND_END, 127 | "coproc" : tokentype.COPROC 128 | } 129 | 130 | class MatchedPairError(errors.ParsingError): 131 | def __init__(self, startline, message, tokenizer): 132 | # TODO use startline? 133 | super(MatchedPairError, self).__init__(message, 134 | tokenizer.source, 135 | tokenizer._shell_input_line_index - 1) 136 | 137 | wordflags = flags.word 138 | parserflags = flags.parser 139 | 140 | class token(object): 141 | def __init__(self, type_, value, pos=None, flags=None): 142 | if type_ is not None: 143 | assert isinstance(type_, tokentype) 144 | 145 | if flags is None: 146 | flags = set() 147 | 148 | self.ttype = type_ 149 | 150 | self.value = value 151 | if pos is not None: 152 | self.lexpos = pos[0] 153 | self.endlexpos = pos[1] 154 | assert self.lexpos < self.endlexpos, (self.lexpos, self.endlexpos) 155 | else: 156 | self.lexpos = self.endlexpos = None 157 | 158 | self.flags = flags 159 | 160 | @property 161 | def type(self): 162 | if self.ttype: 163 | # make yacc see our EOF token as its own special one $end 164 | if self.ttype == tokentype.EOF: 165 | return '$end' 166 | else: 167 | return self.ttype.name 168 | 169 | def __nonzero__(self): 170 | return not (self.ttype is None and self.value is None) 171 | 172 | __bool__ = __nonzero__ 173 | 174 | def __eq__(self, other): 175 | return isinstance(other, token) and (self.type == other.type and 176 | self.value == other.value and 177 | self.lexpos == other.lexpos and 178 | self.endlexpos == other.endlexpos and 179 | self.flags == other.flags) 180 | 181 | def __repr__(self): 182 | s = ['<', self.type] 183 | if self.lexpos is not None and self.endlexpos is not None: 184 | s.append('@%d:%d' % (self.lexpos, self.endlexpos)) 185 | if self.value: 186 | s.append(' ') 187 | s.append(repr(self.value)) 188 | 189 | if self.flags: 190 | prettyflags = ' '.join([e.name for e in self.flags]) 191 | s.append(' (%s)' % prettyflags) 192 | s.append('>') 193 | return ''.join(s) 194 | 195 | def nopos(self): 196 | return self.__class__(self.ttype, self.value, flags=self.flags) 197 | 198 | eoftoken = token(tokentype.EOF, None) 199 | 200 | class tokenizer(object): 201 | def __init__(self, s, parserstate, strictmode=True, eoftoken=None, 202 | lastreadtoken=None, tokenbeforethat=None, twotokensago=None): 203 | self._shell_eof_token = eoftoken 204 | self._shell_input_line = s 205 | self._added_newline = False 206 | if self._shell_input_line and self._shell_input_line[-1] != '\n': 207 | self._shell_input_line += '\n' # bash/parse.y L2431 208 | self._added_newline = True 209 | self._shell_input_line_index = 0 210 | # self._shell_input_line_terminator = None 211 | self._two_tokens_ago = twotokensago or token(None, None) 212 | self._token_before_that = tokenbeforethat or token(None, None) 213 | self._last_read_token = lastreadtoken or token(None, None) 214 | self._current_token = token(None, None) 215 | 216 | # This implements one-character lookahead/lookbehind across physical 217 | # input lines, to avoid something being lost because it's pushed back 218 | # with shell_ungetc when we're at the start of a line. 219 | self._eol_ungetc_lookahead = None 220 | 221 | # token waiting to be read 222 | self._token_to_read = None 223 | 224 | self._parserstate = parserstate 225 | self._line_number = 0 226 | self._open_brace_count = 0 227 | self._esacs_needed_count = 0 228 | 229 | self._dstack = [] 230 | 231 | # a stack of positions to record the start and end of a token 232 | self._positions = [] 233 | 234 | self._strictmode = strictmode 235 | 236 | # hack: the tokenizer needs access to the stack of redirection 237 | # nodes when it reads heredocs. this instance is shared between 238 | # the tokenizer and the parser, which also needs it 239 | self.redirstack = [] 240 | 241 | @property 242 | def source(self): 243 | if self._added_newline: 244 | return self._shell_input_line[:-1] 245 | return self._shell_input_line 246 | 247 | def __iter__(self): 248 | while True: 249 | t = self.token() 250 | # we're finished when we see the eoftoken OR when we added a newline 251 | # to the input and we're there now 252 | if t is eoftoken or (self._added_newline and 253 | t.lexpos + 1 == len(self._shell_input_line)): 254 | break 255 | yield t 256 | 257 | def _createtoken(self, type_, value, flags=None): 258 | '''create a token with position information''' 259 | pos = None 260 | assert len(self._positions) >= 2, (type_, value) 261 | p2 = self._positions.pop() 262 | p1 = self._positions.pop() 263 | pos = [p1, p2] 264 | return token(type_, value, pos, flags) 265 | 266 | def token(self): 267 | self._two_tokens_ago, self._token_before_that, self._last_read_token = \ 268 | self._token_before_that, self._last_read_token, self._current_token 269 | 270 | self._current_token = self._readtoken() 271 | if isinstance(self._current_token, tokentype): 272 | self._recordpos() 273 | self._current_token = self._createtoken(self._current_token, 274 | self._current_token.value) 275 | 276 | if (self._parserstate & parserflags.EOFTOKEN and 277 | self._current_token.ttype == self._shell_eof_token): 278 | self._current_token = eoftoken 279 | # bash/parse.y L2626 280 | self._parserstate.discard(parserflags.EOFTOKEN) 281 | 282 | return self._current_token 283 | 284 | def _readtoken(self): 285 | character = None 286 | peek_char = None 287 | 288 | if self._token_to_read is not None: 289 | t = self._token_to_read 290 | self._token_to_read = None 291 | return t 292 | 293 | # bashlex/parse.y L2989 COND_COMMAND 294 | character = self._getc(True) 295 | while character is not None and _shellblank(character): 296 | character = self._getc(True) 297 | 298 | if character is None: 299 | return eoftoken 300 | 301 | if character == '#': 302 | self._discard_until('\n') 303 | self._getc(False) 304 | character = '\n' 305 | 306 | self._recordpos(1) 307 | 308 | if character == '\n': 309 | # bashlex/parse.y L3034 ALIAS 310 | heredoc.gatherheredocuments(self) 311 | 312 | self._parserstate.discard(parserflags.ASSIGNOK) 313 | return tokentype(character) 314 | 315 | if self._parserstate & parserflags.REGEXP: 316 | return self._readtokenword(character) 317 | 318 | if _shellmeta(character) and not (self._parserstate & parserflags.DBLPAREN): 319 | self._parserstate.discard(parserflags.ASSIGNOK) 320 | peek_char = self._getc(True) 321 | 322 | both = character 323 | if peek_char: 324 | both += peek_char 325 | if character == peek_char: 326 | if character == '<': 327 | peek_char = self._getc() 328 | if peek_char == '-': 329 | return tokentype.LESS_LESS_MINUS 330 | elif peek_char == '<': 331 | return tokentype.LESS_LESS_LESS 332 | else: 333 | self._ungetc(peek_char) 334 | return tokentype.LESS_LESS 335 | elif character == '>': 336 | return tokentype.GREATER_GREATER 337 | elif character == ';': 338 | self._parserstate |= parserflags.CASEPAT 339 | # bashlex/parse.y L3085 ALIAS 340 | peek_char = self._getc() 341 | if peek_char == '&': 342 | return tokentype.SEMI_SEMI_AND 343 | else: 344 | self._ungetc(peek_char) 345 | return tokentype.SEMI_SEMI 346 | elif character == '&': 347 | return tokentype.AND_AND 348 | elif character == '|': 349 | return tokentype.OR_OR 350 | # bashlex/parse.y L3105 351 | elif both == '<&': 352 | return tokentype.LESS_AND 353 | elif both == '>&': 354 | return tokentype.GREATER_AND 355 | elif both == '<>': 356 | return tokentype.LESS_GREATER 357 | elif both == '>|': 358 | return tokentype.GREATER_BAR 359 | elif both == '&>': 360 | peek_char = self._getc() 361 | if peek_char == '>': 362 | return tokentype.AND_GREATER_GREATER 363 | else: 364 | self._ungetc(peek_char) 365 | return tokentype.AND_GREATER 366 | elif both == '|&': 367 | return tokentype.BAR_AND 368 | elif both == ';&': 369 | return tokentype.SEMI_AND 370 | 371 | self._ungetc(peek_char) 372 | if character == ')' and self._last_read_token.value == '(' and self._token_before_that.ttype == tokentype.WORD: 373 | self._parserstate.add(parserflags.ALLOWOPNBRC) 374 | # bashlex/parse.y L3155 375 | 376 | if character == '(' and not self._parserstate & parserflags.CASEPAT: 377 | self._parserstate.add(parserflags.SUBSHELL) 378 | elif self._parserstate & parserflags.CASEPAT and character == ')': 379 | self._parserstate.discard(parserflags.CASEPAT) 380 | elif self._parserstate & parserflags.SUBSHELL and character == ')': 381 | self._parserstate.discard(parserflags.SUBSHELL) 382 | 383 | if character not in '<>' or peek_char != '(': 384 | return tokentype(character) 385 | 386 | if character == '-' and (self._last_read_token.ttype == tokentype.LESS_AND or self._last_read_token.ttype == tokentype.GREATER_AND): 387 | return tokentype(character) 388 | 389 | return self._readtokenword(character) 390 | 391 | def _readtokenword(self, c): 392 | d = {} 393 | d['all_digit_token'] = c.isdigit() 394 | d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False 395 | 396 | tokenword = [] 397 | 398 | def handleshellquote(): 399 | self._push_delimiter(c) 400 | try: 401 | ttok = self._parse_matched_pair(c, c, c, parsingcommand=(c == '`')) 402 | finally: 403 | self._pop_delimiter() 404 | 405 | tokenword.append(c) 406 | tokenword.extend(ttok) 407 | d['all_digit_token'] = False 408 | d['quoted'] = True 409 | if not d['dollar_present']: 410 | d['dollar_present'] = c == '"' and '$' in ttok 411 | 412 | def handleshellexp(): 413 | peek_char = self._getc() 414 | if peek_char == '(' or (c == '$' and peek_char in '{['): 415 | # try: 416 | if peek_char == '{': 417 | ttok = self._parse_matched_pair(cd, '{', '}', firstclose=True, dolbrace=True) 418 | elif peek_char == '(': 419 | self._push_delimiter(peek_char) 420 | ttok = self._parse_comsub(cd, '(', ')', parsingcommand=True) 421 | self._pop_delimiter() 422 | else: 423 | ttok = self._parse_matched_pair(cd, '[', ']') 424 | # except MatchedPairError: 425 | # return -1 426 | 427 | tokenword.append(c) 428 | tokenword.append(peek_char) 429 | tokenword.extend(ttok) 430 | d['dollar_present'] = True 431 | d['all_digit_token'] = False 432 | 433 | # goto next_character 434 | elif c == '$' and peek_char in '\'"': 435 | self._push_delimiter(peek_char) 436 | try: 437 | ttok = self._parse_matched_pair(peek_char, peek_char, peek_char, 438 | allowesc=(peek_char == "'")) 439 | # except MatchedPairError: 440 | # return -1 441 | finally: 442 | self._pop_delimiter() 443 | 444 | #if peek_char == "'": 445 | # # XXX ansiexpand 446 | # ttok = shutils.single_quote(ttok) 447 | #else: 448 | # ttok = shutils.double_quote(ttok) 449 | 450 | tokenword.append(c) 451 | tokenword.append(peek_char) 452 | tokenword.extend(ttok) 453 | d['quoted'] = True 454 | d['all_digit_token'] = False 455 | 456 | # goto next_character 457 | elif c == '$' and peek_char == '$': 458 | tokenword.append('$') 459 | tokenword.append('$') 460 | d['dollar_present'] = True 461 | d['all_digit_token'] = False 462 | 463 | # goto next_character 464 | else: 465 | self._ungetc(peek_char) 466 | return True 467 | 468 | # bashlex/parse.y L4699 ARRAY_VARS 469 | 470 | def handleescapedchar(): 471 | tokenword.append(c) 472 | d['all_digit_token'] &= c.isdigit() 473 | if not d['dollar_present']: 474 | d['dollar_present'] = c == '$' 475 | 476 | while True: 477 | if c is None: 478 | break 479 | 480 | if d['pass_next_character']: 481 | d['pass_next_character'] = False 482 | handleescapedchar() 483 | # goto escaped_character 484 | else: 485 | cd = self._current_delimiter() 486 | gotonext = False 487 | if c == '\\': 488 | peek_char = self._getc(False) 489 | 490 | if peek_char == '\n': 491 | c = '\n' 492 | gotonext = True 493 | # goto next_character 494 | else: 495 | self._ungetc(peek_char) 496 | 497 | if (cd is None or cd == '`' or 498 | (cd == '"' and peek_char is not None and 499 | 'dquote' in sh_syntaxtab[peek_char])): 500 | d['pass_next_character'] = True 501 | d['quoted'] = True 502 | 503 | handleescapedchar() 504 | gotonext = True 505 | # goto got_character 506 | elif _shellquote(c): 507 | handleshellquote() 508 | gotonext = True 509 | # goto next_character 510 | # bashlex/parse.y L4542 511 | # bashlex/parse.y L4567 512 | elif _shellexp(c): 513 | gotonext = not handleshellexp() 514 | # bashlex/parse.y L4699 515 | if not gotonext: 516 | if _shellbreak(c): 517 | self._ungetc(c) 518 | break 519 | else: 520 | handleescapedchar() 521 | 522 | # got_character 523 | # got_escaped_character 524 | 525 | # tokenword.append(c) 526 | # all_digit_token &= c.isdigit() 527 | # if not dollar_present: 528 | # dollar_present = c == '$' 529 | 530 | # next_character 531 | cd = self._current_delimiter() 532 | c = self._getc(cd != "'" and not d['pass_next_character']) 533 | 534 | # got_token 535 | self._recordpos() 536 | 537 | tokenword = ''.join(tokenword) 538 | 539 | if d['all_digit_token'] and (c in '<>' or self._last_read_token.ttype in (tokentype.LESS_AND, tokentype.GREATER_AND)) and shutils.legal_number(tokenword): 540 | return self._createtoken(tokentype.NUMBER, int(tokenword)) 541 | 542 | # bashlex/parse.y L4811 543 | specialtokentype = self._specialcasetokens(tokenword) 544 | if specialtokentype: 545 | return self._createtoken(specialtokentype, tokenword) 546 | 547 | if not d['dollar_present'] and not d['quoted'] and self._reserved_word_acceptable(self._last_read_token): 548 | if tokenword in valid_reserved_first_command: 549 | ttype = valid_reserved_first_command[tokenword] 550 | ps = self._parserstate 551 | if ps & parserflags.CASEPAT and ttype != tokentype.ESAC: 552 | pass 553 | elif ttype == tokentype.TIME and not self._time_command_acceptable(): 554 | pass 555 | elif ttype == tokentype.ESAC: 556 | ps.discard(parserflags.CASEPAT) 557 | ps.discard(parserflags.CASESTMT) 558 | elif ttype == tokentype.CASE: 559 | ps.add(parserflags.CASESTMT) 560 | elif ttype == tokentype.COND_END: 561 | ps.discard(parserflags.CONDCMD) 562 | ps.discard(parserflags.CONDEXPR) 563 | elif ttype == tokentype.COND_START: 564 | ps.add(parserflags.CONDCMD) 565 | elif ttype == tokentype.LEFT_CURLY: 566 | self._open_brace_count += 1 567 | elif ttype == tokentype.RIGHT_CURLY and self._open_brace_count: 568 | self._open_brace_count -= 1 569 | return self._createtoken(ttype, tokenword) 570 | 571 | tokenword = self._createtoken(tokentype.WORD, tokenword, utils.typedset(wordflags)) 572 | if d['dollar_present']: 573 | tokenword.flags.add(wordflags.HASDOLLAR) 574 | if d['quoted']: 575 | tokenword.flags.add(wordflags.QUOTED) 576 | if d['compound_assignment'] and tokenword[-1] == ')': 577 | tokenword.flags.add(wordflags.COMPASSIGN) 578 | if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)): 579 | tokenword.flags.add(wordflags.ASSIGNMENT) 580 | if self._assignment_acceptable(self._last_read_token): 581 | tokenword.flags.add(wordflags.NOSPLIT) 582 | if self._parserstate & parserflags.COMPASSIGN: 583 | tokenword.flags.add(wordflags.NOGLOB) 584 | 585 | # bashlex/parse.y L4865 586 | if self._command_token_position(self._last_read_token): 587 | pass 588 | 589 | if tokenword.value[0] == '{' and tokenword.value[-1] == '}' and c in '<>': 590 | if shutils.legal_identifier(tokenword.value[1:]): 591 | # XXX is this needed? 592 | tokenword.value = tokenword.value[1:] 593 | tokenword.ttype = tokentype.REDIR_WORD 594 | 595 | return tokenword 596 | 597 | if len(tokenword.flags & set([wordflags.ASSIGNMENT, wordflags.NOSPLIT])) == 2: 598 | tokenword.ttype = tokentype.ASSIGNMENT_WORD 599 | 600 | if self._last_read_token.ttype == tokentype.FUNCTION: 601 | self._parserstate.add(parserflags.ALLOWOPNBRC) 602 | self._function_dstart = self._line_number 603 | elif self._last_read_token.ttype in (tokentype.CASE, tokentype.SELECT, tokentype.FOR): 604 | pass # bashlex/parse.y L4907 605 | 606 | return tokenword 607 | 608 | def _parse_comsub(self, doublequotes, open, close, parsingcommand=False, 609 | dquote=False, firstclose=False): 610 | peekc = self._getc(False) 611 | self._ungetc(peekc) 612 | 613 | if peekc == '(': 614 | return self._parse_matched_pair(doublequotes, open, close) 615 | 616 | count = 1 617 | dollarok = True 618 | 619 | checkcase = bool(parsingcommand and (doublequotes is None or doublequotes not in "'\"") and not dquote) 620 | checkcomment = checkcase 621 | 622 | startlineno = self._line_number 623 | heredelim = '' 624 | stripdoc = insideheredoc = insidecomment = insideword = insidecase = False 625 | readingheredocdelim = False 626 | wasdollar = passnextchar = False 627 | reservedwordok = True 628 | lexfirstind = -1 629 | lexrwlen = 0 630 | 631 | ret = '' 632 | 633 | while count: 634 | c = self._getc(doublequotes != "'" and not insidecomment and not passnextchar) 635 | 636 | if c is None: 637 | raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) 638 | 639 | # bashlex/parse.y L3571 640 | if c == '\n': 641 | if readingheredocdelim and heredelim: 642 | readingheredocdelim = False 643 | insideheredoc = True 644 | lexfirstind = len(ret) + 1 645 | elif insideheredoc: 646 | tind = lexfirstind 647 | while stripdoc and ret[tind] == '\t': 648 | tind += 1 649 | if ret[tind:] == heredelim: 650 | stripdoc = insideheredoc = False 651 | heredelim = '' 652 | lexfirstind = -1 653 | else: 654 | lexfirstind = len(ret) + 1 655 | # bashlex/parse.y L3599 656 | if insideheredoc and c == close and count == 1: 657 | tind = lexfirstind 658 | while stripdoc and ret[tind] == '\t': 659 | tind += 1 660 | if ret[tind:] == heredelim: 661 | stripdoc = insideheredoc = False 662 | heredelim = '' 663 | lexfirstind = -1 664 | 665 | if insidecomment or insideheredoc: 666 | ret += c 667 | 668 | if insidecomment and c == '\n': 669 | insidecomment = False 670 | 671 | continue 672 | 673 | if passnextchar: 674 | passnextchar = False 675 | # XXX is this needed? 676 | # if doublequotes != "'" and c == '\n': 677 | # if ret: 678 | # ret = ret[:-1] 679 | # else: 680 | # ret += c 681 | ret += c 682 | continue 683 | 684 | if _shellbreak(c): 685 | insideword = False 686 | else: 687 | if insideword: 688 | lexwlen += 1 689 | else: 690 | insideword = True 691 | lexwlen = 0 692 | 693 | if _shellblank(c) and not readingheredocdelim and not lexrwlen: 694 | ret += c 695 | continue 696 | 697 | # bashlex/parse.y L3686 698 | if readingheredocdelim: 699 | if lexfirstind == -1 and not _shellbreak(c): 700 | lexfirstind = len(ret) 701 | elif lexfirstind >= 0 and not passnextchar and _shellbreak(c): 702 | if not heredelim: 703 | nestret = ret[lexfirstind:] 704 | heredelim = shutils.removequotes(nestret) 705 | if c == '\n': 706 | insideheredoc = True 707 | readingheredocdelim = False 708 | lexfirstind = len(ret) + 1 709 | else: 710 | lexfirstind = -1 711 | 712 | if not reservedwordok and checkcase and not insidecomment and (_shellmeta(c) or c == '\n'): 713 | ret += c 714 | peekc = self._getc(True) 715 | if c == peekc and c in '&|;': 716 | ret += peekc 717 | reservedwordok = True 718 | lexrwlen = 0 719 | continue 720 | elif c == '\n' or c in '&|;': 721 | self._ungetc(peekc) 722 | reservedwordok = True 723 | lexrwlen = 0 724 | continue 725 | elif c is None: 726 | raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) # pragma: no coverage 727 | else: 728 | ret = ret[:-1] 729 | self._ungetc(peekc) 730 | 731 | # bashlex/parse.y L3761 732 | if reservedwordok: 733 | if c.islower(): 734 | ret += c 735 | lexrwlen += 1 736 | continue 737 | elif lexrwlen == 4 and _shellbreak(c): 738 | if ret[-4:] == 'case': 739 | insidecase = True 740 | elif ret[-4:] == 'esac': 741 | insidecase = False 742 | reservedwordok = False 743 | elif (checkcomment and c == '#' and (lexrwlen == 0 or 744 | (insideword and lexwlen == 0))): 745 | pass 746 | elif (not insidecase and (_shellblank(c) or c == '\n') and 747 | lexrwlen == 2 and ret[-2:] == 'do'): 748 | lexrwlen = 0 749 | elif insidecase and c != '\n': 750 | reservedwordok = False 751 | elif not _shellbreak(c): 752 | reservedwordok = False 753 | 754 | if not insidecomment and checkcase and c == '<': 755 | ret += c 756 | peekc = self._getc(True) 757 | if peekc is None: 758 | raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) 759 | if peekc == c: 760 | ret += peekc 761 | peekc = self._getc(True) 762 | if peekc is None: 763 | raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) 764 | elif peekc == '-': 765 | ret += peekc 766 | stripdoc = True 767 | else: 768 | self._ungetc(peekc) 769 | 770 | if peekc != '<': 771 | readingheredocdelim = True 772 | lexfirstind = -1 773 | 774 | continue 775 | else: 776 | c = peekc 777 | elif checkcomment and not insidecomment and c == '#' and ((reservedwordok 778 | and lexrwlen == 0) or insideword or lexwlen == 0): 779 | insidecomment = True 780 | 781 | if c == close and not insidecase: 782 | count -= 1 783 | elif not firstclose and not insidecase and c == open: 784 | count += 1 785 | 786 | ret += c 787 | 788 | if count == 0: 789 | break 790 | 791 | if c == '\\': 792 | passnextchar = True 793 | 794 | # bashlex/parse.y L3897 795 | if _shellquote(c): 796 | self._push_delimiter(c) 797 | try: 798 | if wasdollar and c == "'": 799 | nestret = self._parse_matched_pair(c, c, c, 800 | allowesc=True, 801 | dquote=True) 802 | else: 803 | nestret = self._parse_matched_pair(c, c, c, 804 | dquote=True) 805 | finally: 806 | self._pop_delimiter() 807 | 808 | # XXX is this necessary? 809 | # if wasdollar and c == "'" and not rdquote: 810 | # if not rdquote: 811 | # nestret = shutils.single_quote(nestret) 812 | # ret = ret[:-2] 813 | # elif wasdollar and c == '"' and not rdquote: 814 | # nestret = shutils.double_quote(nestret) 815 | # ret = ret[:-2] 816 | 817 | ret += nestret 818 | # check for $(), $[], or ${} inside command substitution 819 | elif wasdollar and c in '({[': 820 | if not insidecase and open == c: 821 | count -= 1 822 | if c == '(': 823 | nestret = self._parse_comsub(None, '(', ')', 824 | parsingcommand=True, 825 | dquote=False) 826 | elif c == '{': 827 | nestret = self._parse_matched_pair(None, '{', '}', 828 | firstclose=True, 829 | dolbrace=True, 830 | dquote=True) 831 | elif c == '[': 832 | nestret = self._parse_matched_pair(None, '[', ']', 833 | dquote=True) 834 | 835 | ret += nestret 836 | 837 | wasdollar = c == '$' 838 | 839 | return ret 840 | 841 | def _parse_matched_pair(self, doublequotes, open, close, parsingcommand=False, allowesc=False, dquote=False, firstclose=False, dolbrace=False, arraysub=False): 842 | count = 1 843 | dolbracestate = '' 844 | if dolbrace: 845 | dolbracestate = 'param' 846 | 847 | insidecomment = False 848 | lookforcomments = False 849 | sawdollar = False 850 | 851 | if parsingcommand and doublequotes not in "`'\"" and dquote: 852 | lookforcomments = True 853 | 854 | rdquote = True if doublequotes == '"' else dquote 855 | passnextchar = False 856 | startlineno = self._line_number 857 | 858 | ret = '' 859 | 860 | def handledollarword(): 861 | if open == c: 862 | count -= 1 863 | 864 | # bashlex/parse.y L3486 865 | if c == '(': 866 | return self._parse_comsub(None, '(', ')', 867 | parsingcommand=True, 868 | dquote=False) 869 | elif c == '{': 870 | return self._parse_matched_pair(None, '{', '}', 871 | firstclose=True, 872 | dquote=rdquote, 873 | dolbrace=True) 874 | elif c == '[': 875 | return self._parse_matched_pair(None, '[', ']', dquote=rdquote) 876 | else: 877 | assert False # pragma: no cover 878 | 879 | while count: 880 | c = self._getc(doublequotes != "'" and not passnextchar) 881 | if c is None: 882 | raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) 883 | 884 | # bashlex/parse.y L3285 885 | # if c == '\n': 886 | # continue 887 | 888 | if insidecomment: 889 | ret += c 890 | if c == '\n': 891 | insidecomment = False 892 | continue 893 | elif lookforcomments and not insidecomment and c == '#' and (not ret 894 | or ret[-1] == '\n' or _shellblank(ret[-1])): 895 | insidecomment = True 896 | 897 | # last char was backslash 898 | if passnextchar: 899 | passnextchar = False 900 | #if doublequotes != "'" and c == '\n': 901 | # if ret: 902 | # ret = ret[:-1] 903 | # continue 904 | ret += c 905 | continue 906 | elif c == close: 907 | count -= 1 908 | elif open != close and sawdollar and open == '{' and c == open: 909 | count += 1 910 | elif not firstclose and c == open: 911 | count += 1 912 | 913 | ret += c 914 | if count == 0: 915 | break 916 | 917 | if open == "'": 918 | if allowesc and c == "\\": 919 | passnextchar = True 920 | continue 921 | if c == "\\": 922 | passnextchar = True 923 | if dolbrace: 924 | if dolbracestate == 'param': 925 | if len(ret) > 1: 926 | dd = {'%' : 'quote', '#' : 'quote', '/' : 'quote2', '^' : 'quote', 927 | ',' : 'quote'} 928 | if c in dd: 929 | dolbracestate = dd[c] 930 | elif c in '#%^,~:-=?+/': 931 | dolbracestate = 'op' 932 | if dolbracestate == 'op' and c in '#%^,~:-=?+/': 933 | dolbracestate = 'word' 934 | 935 | if dolbracestate not in 'quote2' and dquote and dolbrace and c == "'": 936 | continue 937 | 938 | if open != close: 939 | if _shellquote(c): 940 | self._push_delimiter(c) 941 | try: 942 | if sawdollar and "'": 943 | nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=True, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace) 944 | else: 945 | nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace) 946 | finally: 947 | self._pop_delimiter() 948 | 949 | # bashlex/parse.y L3419 950 | if sawdollar and c == "'": 951 | pass 952 | elif sawdollar and c == '"': 953 | ret = ret[:-2] # back up before the $" 954 | 955 | ret += nestret 956 | elif arraysub and sawdollar and c in '({[': 957 | # goto parse_dollar_word 958 | ret += handledollarword() 959 | elif open == '"' and c == '`': 960 | ret += self._parse_matched_pair(None, '`', '`', parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace) 961 | elif open != '`' and sawdollar and c in '({[': 962 | ret += handledollarword() 963 | 964 | sawdollar = c == '$' 965 | 966 | return ret 967 | 968 | 969 | def _is_assignment(self, value, iscompassign): 970 | c = value[0] 971 | 972 | def legalvariablechar(x): 973 | return x.isalnum() or x == '_' 974 | 975 | if not c.isalpha() and c != '_': 976 | return 977 | 978 | for i, c in enumerate(value): 979 | if c == '=': 980 | return i 981 | 982 | # bash/general.c L289 983 | if c == '+' and i + 1 < len(value) and value[i+1] == '=': 984 | return i+1 985 | 986 | if not legalvariablechar(c): 987 | return False 988 | 989 | def _command_token_position(self, token): 990 | return (token.ttype == tokentype.ASSIGNMENT_WORD or 991 | self._parserstate & parserflags.REDIRLIST or 992 | (token.ttype not in (tokentype.SEMI_SEMI, tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND) and self._reserved_word_acceptable(token))) 993 | 994 | def _assignment_acceptable(self, token): 995 | return self._command_token_position(token) and not self._parserstate & parserflags.CASEPAT 996 | 997 | def _time_command_acceptable(self): 998 | pass 999 | 1000 | def _reserved_word_acceptable(self, tok): 1001 | if not tok or (tok.ttype in _reserved or tok.value in _reserved): 1002 | return True 1003 | # bash/parse.y L4955 cOPROCESS_SUPPORT 1004 | 1005 | if (self._last_read_token.ttype == tokentype.WORD and 1006 | self._token_before_that.ttype == tokentype.FUNCTION): 1007 | return True 1008 | 1009 | return False 1010 | 1011 | def _pop_delimiter(self): 1012 | self._dstack.pop() 1013 | 1014 | def _push_delimiter(self, c): 1015 | self._dstack.append(c) 1016 | 1017 | def _current_delimiter(self): 1018 | if self._dstack: 1019 | return self._dstack[-1] 1020 | 1021 | def _ungetc(self, c): 1022 | if (self._shell_input_line and self._shell_input_line_index 1023 | and self._shell_input_line_index <= len(self._shell_input_line)): 1024 | self._shell_input_line_index -= 1 1025 | else: 1026 | self._eol_ungetc_lookahead = c 1027 | 1028 | def _getc(self, remove_quoted_newline=True): 1029 | if self._eol_ungetc_lookahead is not None: 1030 | c = self._eol_ungetc_lookahead 1031 | self._eol_ungetc_lookahead = None 1032 | return c 1033 | 1034 | # bash/parse.y L2220 1035 | 1036 | while True: 1037 | if self._shell_input_line_index < len(self._shell_input_line): 1038 | c = self._shell_input_line[self._shell_input_line_index] 1039 | self._shell_input_line_index += 1 1040 | else: 1041 | c = None 1042 | 1043 | if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n': 1044 | self._line_number += 1 1045 | # skip past the newline 1046 | self._shell_input_line_index += 1 1047 | continue 1048 | else: 1049 | return c 1050 | 1051 | #if c is None and self._shell_input_line_terminator is None: 1052 | # if self._shell_input_line_index != 0: 1053 | # return '\n' 1054 | # else: 1055 | # return None 1056 | 1057 | #return c 1058 | 1059 | def _discard_until(self, character): 1060 | c = self._getc(False) 1061 | while c is not None and c != character: 1062 | c = self._getc(False) 1063 | if c is not None: 1064 | self._ungetc(c) 1065 | 1066 | def _recordpos(self, relativeoffset=0): 1067 | '''record the current index of the tokenizer into the positions stack 1068 | while adding relativeoffset from it''' 1069 | self._positions.append(self._shell_input_line_index - relativeoffset) 1070 | 1071 | def readline(self, removequotenewline): 1072 | linebuffer = [] 1073 | passnext = indx = 0 1074 | while True: 1075 | c = self._getc() 1076 | if c is None: 1077 | if indx == 0: 1078 | return None 1079 | c = '\n' 1080 | 1081 | if passnext: 1082 | linebuffer.append(c) 1083 | indx += 1 1084 | passnext = False 1085 | elif c == '\\' and removequotenewline: 1086 | peekc = self._getc() 1087 | if peekc == '\n': 1088 | self._line_number += 1 1089 | continue 1090 | else: 1091 | self._ungetc(peekc) 1092 | passnext = True 1093 | linebuffer.append(c) 1094 | indx += 1 1095 | else: 1096 | linebuffer.append(c) 1097 | indx += 1 1098 | 1099 | if c == '\n': 1100 | return ''.join(linebuffer) 1101 | 1102 | def _peekc(self, *args): 1103 | peek_char = self._getc(*args) 1104 | # only unget if we actually read something 1105 | if peek_char is not None: 1106 | self._ungetc(peek_char) 1107 | return peek_char 1108 | 1109 | def _specialcasetokens(self, tokstr): 1110 | if (self._last_read_token.ttype == tokentype.WORD and 1111 | self._token_before_that.ttype in (tokentype.FOR, 1112 | tokentype.CASE, 1113 | tokentype.SELECT) and 1114 | tokstr == 'in'): 1115 | if self._token_before_that.ttype == tokentype.CASE: 1116 | self._parserstate.add(parserflags.CASEPAT) 1117 | self._esacs_needed_count += 1 1118 | return tokentype.IN 1119 | 1120 | if (self._last_read_token.ttype == tokentype.WORD and 1121 | self._token_before_that.ttype in (tokentype.FOR, tokentype.SELECT) and 1122 | tokstr == 'do'): 1123 | return tokentype.DO 1124 | 1125 | if self._esacs_needed_count: 1126 | self._esacs_needed_count -= 1 1127 | if tokstr == 'esac': 1128 | self._parserstate.discard(parserflags.CASEPAT) 1129 | return tokentype.ESAC 1130 | 1131 | if self._parserstate & parserflags.ALLOWOPNBRC: 1132 | self._parserstate.discard(parserflags.ALLOWOPNBRC) 1133 | if tokstr == '{': 1134 | self._open_brace_count += 1 1135 | # bash/parse.y L2887 1136 | return tokentype.LEFT_CURLY 1137 | 1138 | if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and 1139 | tokstr == 'do'): 1140 | return tokentype.DO 1141 | 1142 | if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and 1143 | tokstr == '{'): 1144 | self._open_brace_count += 1 1145 | return tokentype.LEFT_CURLY 1146 | 1147 | if (self._open_brace_count and 1148 | self._reserved_word_acceptable(self._last_read_token) and 1149 | tokstr == '}'): 1150 | self._open_brace_count -= 1 1151 | return tokentype.RIGHT_CURLY 1152 | 1153 | if self._last_read_token.ttype == tokentype.TIME and tokstr == '-p': 1154 | return tokentype.TIMEOPT 1155 | 1156 | if self._last_read_token.ttype == tokentype.TIMEOPT and tokstr == '--': 1157 | return tokentype.TIMEIGN 1158 | 1159 | if self._parserstate & parserflags.CONDEXPR and tokstr == ']]': 1160 | return tokentype.COND_END 1161 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import unittest, functools 2 | 3 | from bashlex import parser, state, flags, ast, errors 4 | 5 | parse = functools.partial(parser.parse, convertpos=True) 6 | 7 | def reservedwordnode(word, s): 8 | return ast.node(kind='reservedword', word=word, s=s) 9 | 10 | def commandnode(s, *parts): 11 | return ast.node(kind='command', s=s, parts=list(parts)) 12 | 13 | def wordnode(word, s=None, parts=None): 14 | if s is None: 15 | s = word 16 | if parts is None: 17 | parts = [] 18 | return ast.node(kind='word', word=word, s=s, parts=list(parts)) 19 | 20 | def assignmentnode(word, s=None, parts=None): 21 | node = wordnode(word, s, parts) 22 | node.kind = 'assignment' 23 | return node 24 | 25 | def parameternode(value, s): 26 | return ast.node(kind='parameter', value=value, s=s) 27 | 28 | def heredocnode(value, s=None): 29 | if s is None: 30 | s = value 31 | return ast.node(kind='heredoc', value=value, s=s) 32 | 33 | def tildenode(value, s): 34 | return ast.node(kind='tilde', value=value, s=s) 35 | 36 | def redirectnode(s, input, type, output, heredoc=None): 37 | return ast.node(kind='redirect', input=input, type=type, output=output, 38 | heredoc=heredoc, s=s) 39 | 40 | def pipenode(pipe, s): 41 | return ast.node(kind='pipe', pipe=pipe, s=s) 42 | 43 | def pipelinenode(s, *parts): 44 | oldparts = parts 45 | if parts[0].kind == 'reservedword' and parts[0].word == '!': 46 | parts = parts[1:] 47 | for i in range(len(parts)): 48 | if i % 2 == 0: 49 | assert parts[i].kind in ('command', 'compound'), parts[i].kind 50 | else: 51 | assert parts[i].kind == 'pipe', parts[i].kind 52 | return ast.node(kind='pipeline', s=s, parts=list(oldparts)) 53 | 54 | def operatornode(op, s): 55 | return ast.node(kind='operator', op=op, s=s) 56 | 57 | def listnode(s, *parts): 58 | for i in range(len(parts)): 59 | if i % 2 == 0: 60 | assert parts[i].kind in ('command', 'pipeline', 'compound'), parts[i].kind 61 | else: 62 | assert parts[i].kind == 'operator', parts[i].kind 63 | return ast.node(kind='list', parts=list(parts), s=s) 64 | 65 | def compoundnode(s, *parts, **kwargs): 66 | redirects = kwargs.pop('redirects', []) 67 | assert not kwargs 68 | return ast.node(kind='compound', s=s, list=list(parts), redirects=redirects) 69 | 70 | def procsubnode(s, command): 71 | return ast.node(kind='processsubstitution', s=s, command=command) 72 | 73 | def comsubnode(s, command): 74 | return ast.node(kind='commandsubstitution', s=s, command=command) 75 | 76 | def ifnode(s, *parts): 77 | return ast.node(kind='if', parts=list(parts), s=s) 78 | 79 | def fornode(s, *parts): 80 | return ast.node(kind='for', parts=list(parts), s=s) 81 | 82 | def whilenode(s, *parts): 83 | return ast.node(kind='while', parts=list(parts), s=s) 84 | 85 | def casenode(s, *parts): 86 | return ast.node(kind='case', parts=list(parts), s=s) 87 | 88 | def patternnode(s, *parts): 89 | return ast.node(kind='pattern', parts=list(parts), s=s) 90 | 91 | def functionnode(s, name, body, *parts): 92 | return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s) 93 | 94 | def unimplementednode(s, *parts): 95 | return ast.node(kind='unimplemented', parts=list(parts), s=s) 96 | 97 | class test_parser(unittest.TestCase): 98 | 99 | def setUp(self): 100 | if not hasattr(self, 'assertRaisesRegex'): 101 | self.assertRaisesRegex = self.assertRaisesRegexp 102 | 103 | def assertASTEquals(self, s, expected, **parserargs): 104 | results = parse(s, **parserargs) 105 | self.assertTrue(len(results) == 1, 'expected one ast from parse(), ' 106 | 'got %d' % len(results)) 107 | result = results[0] 108 | 109 | # make sure our words are not empty 110 | class nullopvisitor(ast.nodevisitor): 111 | def visitword(_, node, word): 112 | self.assertTrue(word, 'node %r has no word' % node) 113 | 114 | nullopvisitor().visit(result) 115 | 116 | msg = 'ASTs not equal for %r\n\nresult:\n\n%s\n\n!=\n\nexpected:\n\n%s' % (s, result.dump(), expected.dump()) 117 | self.assertEqual(result, expected, msg) 118 | 119 | def assertASTsEquals(self, s, expectedlist, **parserargs): 120 | results = parse(s, **parserargs) 121 | self.assertEqual(len(results), len(expectedlist), 122 | 'mismatch on ASTs length') 123 | 124 | for result, expected in zip(results, expectedlist): 125 | msg = 'ASTs not equal for %r\n\nresult:\n\n%s\n\n!=\n\nexpected:\n\n%s' % (s, result.dump(), expected.dump()) 126 | self.assertEqual(result, expected, msg) 127 | 128 | def test_command(self): 129 | s = 'a b c' 130 | self.assertASTEquals(s, 131 | commandnode(s, 132 | wordnode('a'), 133 | wordnode('b'), 134 | wordnode('c'))) 135 | 136 | s = 'a b "c"' 137 | self.assertASTEquals(s, 138 | commandnode(s, 139 | wordnode('a'), 140 | wordnode('b'), 141 | wordnode('c', '"c"'))) 142 | 143 | s = '2>/dev/null a b "c"' 144 | self.assertASTEquals(s, 145 | commandnode(s, 146 | redirectnode('2>/dev/null', 2, '>', wordnode('/dev/null')), 147 | wordnode('a'), 148 | wordnode('b'), 149 | wordnode('c', '"c"'))) 150 | 151 | s = 'a b>&1 2>&1' 152 | self.assertASTEquals(s, 153 | commandnode(s, 154 | wordnode('a'), 155 | wordnode('b'), 156 | redirectnode('>&1', None, '>&', 1), 157 | redirectnode('2>&1', 2, '>&', 1))) 158 | 159 | def test_multiline(self): 160 | s = 'a\nb' 161 | self.assertASTsEquals(s, [ 162 | commandnode('a', 163 | wordnode('a')), 164 | commandnode('b', 165 | wordnode('b')) 166 | ]) 167 | 168 | def test_pipeline(self): 169 | s = 'a | b' 170 | self.assertASTEquals(s, 171 | pipelinenode(s, 172 | commandnode('a', wordnode('a')), 173 | pipenode('|', '|'), 174 | commandnode('b', wordnode('b')))) 175 | 176 | s = '! a | b' 177 | self.assertASTEquals(s, 178 | pipelinenode(s, 179 | reservedwordnode('!', '!'), 180 | commandnode('a', wordnode('a')), 181 | pipenode('|', '|'), 182 | commandnode('b', wordnode('b')) 183 | )) 184 | 185 | def test_list(self): 186 | s = 'a;' 187 | self.assertASTEquals(s, 188 | listnode(s, 189 | commandnode('a', wordnode('a')), 190 | operatornode(';', ';'), 191 | )) 192 | 193 | s = 'a && b' 194 | self.assertASTEquals(s, 195 | listnode(s, 196 | commandnode('a', wordnode('a')), 197 | operatornode('&&', '&&'), 198 | commandnode('b', wordnode('b')) 199 | )) 200 | 201 | s = 'a; b; c& d' 202 | self.assertASTEquals(s, 203 | listnode(s, 204 | commandnode('a', wordnode('a')), 205 | operatornode(';', ';'), 206 | commandnode('b', wordnode('b')), 207 | operatornode(';', ';'), 208 | commandnode('c', wordnode('c')), 209 | operatornode('&', '&'), 210 | commandnode('d', wordnode('d')) 211 | )) 212 | 213 | s = 'a | b && c' 214 | self.assertASTEquals(s, 215 | listnode(s, 216 | pipelinenode('a | b', 217 | commandnode('a', wordnode('a')), 218 | pipenode('|', '|'), 219 | commandnode('b', wordnode('b'))), 220 | operatornode('&&', '&&'), 221 | commandnode('c', wordnode('c')) 222 | )) 223 | 224 | def test_nestedsubs(self): 225 | s = '$($<$(a) b)' 226 | self.assertASTEquals(s, 227 | commandnode(s, 228 | wordnode(s, s, [ 229 | comsubnode(s, 230 | commandnode('$<$(a) b', 231 | wordnode('$'), 232 | redirectnode('<$(a)', None, '<', 233 | wordnode('$(a)', '$(a)', [ 234 | comsubnode('$(a)', 235 | commandnode('a', 236 | wordnode('a')) 237 | ) 238 | ]) 239 | ), 240 | wordnode('b'), 241 | ) 242 | ) 243 | ]) 244 | ) 245 | ) 246 | 247 | def test_paramexpand(self): 248 | s = 'a $1 $foo_bar "$@ $#" ~foo " ~bar" ${a} "${}"' 249 | self.assertASTEquals(s, 250 | commandnode(s, 251 | wordnode('a'), 252 | wordnode('$1', '$1', [ 253 | parameternode('1', '$1'), 254 | ]), 255 | wordnode('$foo_bar', '$foo_bar', [ 256 | parameternode('foo_bar', '$foo_bar'), 257 | ]), 258 | wordnode('$@ $#', '"$@ $#"', [ 259 | parameternode('@', '$@'), 260 | parameternode('#', '$#') 261 | ]), 262 | wordnode('~foo', '~foo', [ 263 | tildenode('~foo', '~foo'), 264 | ]), 265 | wordnode(' ~bar', '" ~bar"'), 266 | wordnode('${a}', '${a}', [ 267 | parameternode('a', '${a}'), 268 | ]), 269 | wordnode('${}', '"${}"', [ 270 | parameternode('', '${}'), 271 | ]), 272 | ) 273 | ) 274 | 275 | def test_processsub(self): 276 | s = 'a <(b $(c))' 277 | self.assertASTEquals(s, 278 | commandnode(s, 279 | wordnode('a'), 280 | wordnode('<(b $(c))', '<(b $(c))', [ 281 | procsubnode('<(b $(c))', 282 | commandnode('b $(c)', 283 | wordnode('b'), 284 | wordnode('$(c)', '$(c)', [ 285 | comsubnode('$(c)', 286 | commandnode('c', 287 | wordnode('c')) 288 | )] 289 | ) 290 | ) 291 | ) 292 | ]) 293 | ) 294 | ) 295 | 296 | s = 'a `b` "`c`" \'`c`\'' 297 | self.assertASTEquals(s, 298 | commandnode(s, 299 | wordnode('a'), 300 | wordnode('`b`', '`b`', [ 301 | comsubnode('`b`', 302 | commandnode('b', 303 | wordnode('b')) 304 | ), 305 | ]), 306 | wordnode('`c`', '"`c`"', [ 307 | comsubnode('`c`', 308 | commandnode('c', 309 | wordnode('c')) 310 | ), 311 | ]), 312 | wordnode('`c`', "'`c`'") 313 | ) 314 | ) 315 | 316 | def test_error(self): 317 | self.assertRaises(errors.ParsingError, parse, 'a))') 318 | 319 | def test_redirection_input(self): 320 | s = 'a /dev/null', 403 | reservedwordnode('(', '('), 404 | commandnode('b', 405 | wordnode('b')), 406 | reservedwordnode(')', ')'), 407 | redirects=[ 408 | redirectnode('> /dev/null', None, '>', 409 | wordnode('/dev/null'))]), 410 | )) 411 | 412 | s = '(a && (b; c&)) || d' 413 | self.assertASTEquals(s, 414 | listnode(s, 415 | compoundnode('(a && (b; c&))', 416 | reservedwordnode('(', '('), 417 | listnode('a && (b; c&)', 418 | commandnode('a', 419 | wordnode('a')), 420 | operatornode('&&', '&&'), 421 | compoundnode('(b; c&)', 422 | reservedwordnode('(', '('), 423 | listnode('b; c&', 424 | commandnode('b', 425 | wordnode('b')), 426 | operatornode(';', ';'), 427 | commandnode('c', 428 | wordnode('c')), 429 | operatornode('&', '&') 430 | ), 431 | reservedwordnode(')', ')'), 432 | ), 433 | ), 434 | reservedwordnode(')', ')'), 435 | ), 436 | operatornode('||', '||'), 437 | commandnode('d', 438 | wordnode('d')), 439 | )) 440 | 441 | def test_compound_redirection(self): 442 | s = '(a) > /dev/null' 443 | self.assertASTEquals(s, 444 | compoundnode(s, 445 | reservedwordnode('(', '('), 446 | commandnode('a', 447 | wordnode('a')), 448 | reservedwordnode(')', ')'), 449 | redirects=[redirectnode('> /dev/null', None, '>', wordnode('/dev/null'))] 450 | )) 451 | 452 | def test_compound_pipe(self): 453 | s = '(a) | b' 454 | self.assertASTEquals(s, 455 | pipelinenode(s, 456 | compoundnode('(a)', 457 | reservedwordnode('(', '('), 458 | commandnode('a', 459 | wordnode('a')), 460 | reservedwordnode(')', ')'), 461 | ), 462 | pipenode('|', '|'), 463 | commandnode('b', 464 | wordnode('b')) 465 | )) 466 | 467 | def test_group(self): 468 | # reserved words are recognized only at the start of a simple command 469 | s = 'echo {}' 470 | self.assertASTEquals(s, 471 | commandnode(s, 472 | wordnode('echo'), wordnode('{}')) 473 | ) 474 | 475 | # reserved word at beginning isn't reserved if quoted 476 | s = "'{' foo" 477 | self.assertASTEquals(s, 478 | commandnode(s, 479 | wordnode('{', "'{'"), wordnode('foo')) 480 | ) 481 | 482 | s = '{ a; }' 483 | self.assertASTEquals(s, 484 | compoundnode(s, 485 | reservedwordnode('{', '{'), 486 | listnode('a;', 487 | commandnode('a', wordnode('a')), 488 | operatornode(';', ';'), 489 | ), 490 | reservedwordnode('}', '}'), 491 | )) 492 | 493 | s = '{ a; b; }' 494 | self.assertASTEquals(s, 495 | compoundnode(s, 496 | reservedwordnode('{', '{'), 497 | listnode('a; b;', 498 | commandnode('a', wordnode('a')), 499 | operatornode(';', ';'), 500 | commandnode('b', wordnode('b')), 501 | operatornode(';', ';') 502 | ), 503 | reservedwordnode('}', '}'), 504 | )) 505 | 506 | s = '(a) && { b; }' 507 | self.assertASTEquals(s, 508 | listnode('(a) && { b; }', 509 | compoundnode('(a)', 510 | reservedwordnode('(', '('), 511 | commandnode('a', 512 | wordnode('a')), 513 | reservedwordnode(')', ')')), 514 | operatornode('&&', '&&'), 515 | compoundnode('{ b; }', 516 | reservedwordnode('{', '{'), 517 | listnode('b;', 518 | commandnode('b', 519 | wordnode('b')), 520 | operatornode(';', ';')), 521 | reservedwordnode('}', '}'), 522 | ) 523 | )) 524 | 525 | s = 'a; ! { b; }' 526 | self.assertASTEquals(s, 527 | listnode(s, 528 | commandnode('a', wordnode('a')), 529 | operatornode(';', ';'), 530 | pipelinenode('! { b; }', 531 | reservedwordnode('!', '!'), 532 | compoundnode('{ b; }', 533 | reservedwordnode('{', '{'), 534 | listnode('b;', 535 | commandnode('b', wordnode('b')), 536 | operatornode(';', ';'), 537 | ), 538 | reservedwordnode('}', '}'), 539 | ) 540 | ) 541 | )) 542 | 543 | def test_invalid_control(self): 544 | s = 'a &| b' 545 | self.assertRaisesRegex(errors.ParsingError, "unexpected token '|'.*position 3", parse, s) 546 | 547 | def test_invalid_redirect(self): 548 | s = 'a 2>' 549 | self.assertRaisesRegex(errors.ParsingError, r"unexpected token '\\n'.*position 4", parse, s) 550 | 551 | s = 'ssh -p 2222 @' 552 | self.assertRaisesRegex(errors.ParsingError, r"unexpected token '\\n'.*position %d" % len(s), parse, s) 553 | 554 | def test_if_redirection(self): 555 | s = 'if foo; then bar; fi >/dev/null' 556 | self.assertASTEquals(s, 557 | compoundnode(s, 558 | ifnode('if foo; then bar; fi', 559 | reservedwordnode('if', 'if'), 560 | listnode('foo;', 561 | commandnode('foo', wordnode('foo')), 562 | operatornode(';', ';')), 563 | reservedwordnode('then', 'then'), 564 | listnode('bar;', 565 | commandnode('bar', wordnode('bar')), 566 | operatornode(';', ';')), 567 | reservedwordnode('fi', 'fi'), 568 | ), 569 | redirects=[ 570 | redirectnode('>/dev/null', None, '>', 571 | wordnode('/dev/null')) 572 | ]) 573 | ) 574 | 575 | def test_if(self): 576 | s = 'if foo; then bar; fi' 577 | self.assertASTEquals(s, 578 | compoundnode(s, 579 | ifnode(s, 580 | reservedwordnode('if', 'if'), 581 | listnode('foo;', 582 | commandnode('foo', wordnode('foo')), 583 | operatornode(';', ';')), 584 | reservedwordnode('then', 'then'), 585 | listnode('bar;', 586 | commandnode('bar', wordnode('bar')), 587 | operatornode(';', ';')), 588 | reservedwordnode('fi', 'fi'), 589 | )) 590 | ) 591 | 592 | s = 'if foo; bar; then baz; fi' 593 | self.assertASTEquals(s, 594 | compoundnode(s, 595 | ifnode(s, 596 | reservedwordnode('if', 'if'), 597 | listnode('foo; bar;', 598 | commandnode('foo', wordnode('foo')), 599 | operatornode(';', ';'), 600 | commandnode('bar', wordnode('bar')), 601 | operatornode(';', ';')), 602 | reservedwordnode('then', 'then'), 603 | listnode('baz;', 604 | commandnode('baz', wordnode('baz')), 605 | operatornode(';', ';')), 606 | reservedwordnode('fi', 'fi'), 607 | )) 608 | ) 609 | 610 | s = 'if foo; then bar; else baz; fi' 611 | self.assertASTEquals(s, 612 | compoundnode(s, 613 | ifnode(s, 614 | reservedwordnode('if', 'if'), 615 | listnode('foo;', 616 | commandnode('foo', wordnode('foo')), 617 | operatornode(';', ';')), 618 | reservedwordnode('then', 'then'), 619 | listnode('bar;', 620 | commandnode('bar', wordnode('bar')), 621 | operatornode(';', ';')), 622 | reservedwordnode('else', 'else'), 623 | listnode('baz;', 624 | commandnode('baz', wordnode('baz')), 625 | operatornode(';', ';')), 626 | reservedwordnode('fi', 'fi'), 627 | )) 628 | ) 629 | 630 | s = 'if foo; then bar; elif baz; then barbaz; fi' 631 | self.assertASTEquals(s, 632 | compoundnode(s, 633 | ifnode(s, 634 | reservedwordnode('if', 'if'), 635 | listnode('foo;', 636 | commandnode('foo', wordnode('foo')), 637 | operatornode(';', ';')), 638 | reservedwordnode('then', 'then'), 639 | listnode('bar;', 640 | commandnode('bar', wordnode('bar')), 641 | operatornode(';', ';')), 642 | reservedwordnode('elif', 'elif'), 643 | listnode('baz;', 644 | commandnode('baz', wordnode('baz')), 645 | operatornode(';', ';')), 646 | reservedwordnode('then', 'then'), 647 | listnode('barbaz;', 648 | commandnode('barbaz', wordnode('barbaz')), 649 | operatornode(';', ';')), 650 | reservedwordnode('fi', 'fi'), 651 | )) 652 | ) 653 | 654 | s = 'if foo; then bar; elif baz; then barbaz; else foobar; fi' 655 | self.assertASTEquals(s, 656 | compoundnode(s, 657 | ifnode(s, 658 | reservedwordnode('if', 'if'), 659 | listnode('foo;', 660 | commandnode('foo', wordnode('foo')), 661 | operatornode(';', ';')), 662 | reservedwordnode('then', 'then'), 663 | listnode('bar;', 664 | commandnode('bar', wordnode('bar')), 665 | operatornode(';', ';')), 666 | reservedwordnode('elif', 'elif'), 667 | listnode('baz;', 668 | commandnode('baz', wordnode('baz')), 669 | operatornode(';', ';')), 670 | reservedwordnode('then', 'then'), 671 | listnode('barbaz;', 672 | commandnode('barbaz', wordnode('barbaz')), 673 | operatornode(';', ';')), 674 | reservedwordnode('else', 'else'), 675 | listnode('foobar;', 676 | commandnode('foobar', wordnode('foobar')), 677 | operatornode(';', ';')), 678 | reservedwordnode('fi', 'fi'), 679 | )) 680 | ) 681 | 682 | def test_malformed_if(self): 683 | s = 'if foo; bar; fi' 684 | self.assertRaisesRegex(errors.ParsingError, "unexpected token 'fi'.*position 13", parse, s) 685 | 686 | s = 'if foo; then bar;' 687 | self.assertRaisesRegex(errors.ParsingError, "unexpected EOF.*position 17", parse, s) 688 | 689 | s = 'if foo; then bar; elif baz; fi' 690 | self.assertRaisesRegex(errors.ParsingError, "unexpected token 'fi'.*position 28", parse, s) 691 | 692 | def test_word_expansion(self): 693 | s = "'a' ' b' \"'c'\"" 694 | self.assertASTEquals(s, 695 | commandnode(s, 696 | wordnode('a', "'a'"), 697 | wordnode(' b', "' b'"), 698 | wordnode("'c'", "\"'c'\""))) 699 | 700 | s = '"a\'b"' 701 | self.assertASTEquals(s, 702 | commandnode(s, 703 | wordnode("a'b", s))) 704 | 705 | s = 'a"b"\'c\'d' 706 | self.assertASTEquals(s, 707 | commandnode(s, 708 | wordnode("abcd", s))) 709 | 710 | s = "'$(a)' \"$(b)\"" 711 | self.assertASTEquals(s, 712 | commandnode(s, 713 | wordnode("$(a)", "'$(a)'"), 714 | wordnode("$(b)", '"$(b)"', [ 715 | comsubnode("$(b)", 716 | commandnode("b", wordnode("b")) 717 | ) 718 | ]))) 719 | 720 | s = "\"$(a \"b\" 'c')\" '$(a \"b\" 'c')'" 721 | self.assertASTEquals(s, 722 | commandnode(s, 723 | wordnode("$(a \"b\" 'c')", "\"$(a \"b\" 'c')\"", [ 724 | comsubnode("$(a \"b\" 'c')", 725 | commandnode("a \"b\" 'c'", 726 | wordnode('a'), 727 | wordnode('b', '"b"'), 728 | wordnode('c', "'c'") 729 | ) 730 | ) 731 | ]), 732 | wordnode("$(a \"b\" 'c')", "'$(a \"b\" 'c')'") 733 | )) 734 | 735 | def test_escape_not_part_of_word(self): 736 | s = "a \\;" 737 | self.assertASTEquals(s, 738 | commandnode(s, 739 | wordnode('a'), 740 | wordnode(';', '\\;'))) 741 | 742 | def test_heredoc_spec(self): 743 | for redirect_kind in ('<<', '<<<'): 744 | s = 'a %sEOF | b' % redirect_kind 745 | self.assertASTEquals(s, 746 | pipelinenode(s, 747 | commandnode('a %sEOF' % redirect_kind, 748 | wordnode('a', 'a'), 749 | redirectnode('%sEOF' % redirect_kind, None, 750 | redirect_kind, wordnode('EOF'))), 751 | pipenode('|', '|'), 752 | commandnode('b', wordnode('b', 'b'))), 753 | strictmode=False) 754 | 755 | s = 'a <<-b' 756 | self.assertASTEquals(s, 757 | commandnode(s, 758 | wordnode('a', 'a'), 759 | redirectnode('<<-b', None, '<<-', wordnode('b'))), 760 | strictmode=False) 761 | 762 | s = 'a <<<