├── .gitignore ├── .luacheckrc ├── .deploykey.enc ├── sh-parser ├── init.lua ├── parser.lua ├── fun_ext.lua ├── ast │ ├── name_captures.lua │ └── simple.lua ├── location_resolver.lua ├── utils.lua ├── lpeg_sugar.lua └── grammar.lua ├── script ├── git-ssh-wrapper ├── test ├── travis-decrypt ├── utils.sh ├── deploy-rocks ├── bootstrap └── deploy-docs ├── .envrc ├── config.ld ├── Rocksfile ├── .editorconfig ├── .deploykey.pub ├── README.adoc ├── sh-parser-cli-dev-0.rockspec ├── LICENSE ├── .travis.yml ├── sh-parser-dev-0.rockspec ├── bin └── sh-parser └── AST.adoc /.gitignore: -------------------------------------------------------------------------------- 1 | /.tmp/ 2 | /.venv/ 3 | /doc/ 4 | *.rock 5 | .deploykey 6 | -------------------------------------------------------------------------------- /.luacheckrc: -------------------------------------------------------------------------------- 1 | -- vim: set ft=lua: 2 | 3 | std = 'min' 4 | codes = true 5 | -------------------------------------------------------------------------------- /.deploykey.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jirutka/sh-parser/HEAD/.deploykey.enc -------------------------------------------------------------------------------- /sh-parser/init.lua: -------------------------------------------------------------------------------- 1 | local M = require 'sh-parser.parser' 2 | 3 | M._VERSION = '0.0.0' 4 | 5 | return M 6 | -------------------------------------------------------------------------------- /script/git-ssh-wrapper: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -n "$SSH_DEPLOY_KEY" ]; then 4 | ssh -i "$SSH_DEPLOY_KEY" "$@" 5 | else 6 | ssh "$@" 7 | fi 8 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # TIP: Use direnv or similar tool to automatically source this file 3 | # when you "cd" to the project's directory. 4 | 5 | PATH="$(pwd)/.venv/bin:$PATH" 6 | export PATH 7 | hash -r >/dev/null 2>&1 8 | -------------------------------------------------------------------------------- /config.ld: -------------------------------------------------------------------------------- 1 | -- vim: set ft=lua: 2 | 3 | project = 'sh-parser' 4 | title = 'sh-parser docs' 5 | file = 'sh-parser' 6 | dir = 'doc' 7 | format = 'markdown' 8 | all = true -- include even local funcs etc. 9 | -------------------------------------------------------------------------------- /Rocksfile: -------------------------------------------------------------------------------- 1 | # sh-parser 2 | fun 3 | lpeg 4 | PegDebug 5 | 6 | # sh-parser-cli 7 | rapidjson 8 | optarg 9 | 10 | # dev and testing 11 | lua-cjson # needed for deploying to LuaRocks 12 | inspect 13 | jirutka/ldoc # https://github.com/stevedonovan/LDoc/pull/263 14 | luacheck 15 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | indent_size = 2 8 | indent_style = space 9 | insert_final_newline = true 10 | trim_trailing_whitespace = true 11 | 12 | [script/*] 13 | indent_size = 4 14 | indent_style = tab 15 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # vim: set ts=4: 3 | # 4 | # Runs tests and linters. 5 | # 6 | set -eu 7 | 8 | cd "$(dirname "$0")/.." 9 | . script/bootstrap 10 | 11 | 12 | einfo 'Running linters' 13 | 14 | luacheck $PKG_NAME/* 15 | 16 | for rockspec in *.rockspec; do 17 | luarocks lint "$rockspec" || exit 1 18 | done 19 | -------------------------------------------------------------------------------- /.deploykey.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2uqyoaqmlVB628PzMfoy7L+Wrib+L5pQTf2Y/kASty5cApjhMldmbED8u2iTXbYqDkZKuxoXKRz2jOH1UFMGcrxEJsbp3DWS91jdnH46/iSQwVYhE4s5SSYhwn96lVbnx/bzXj7itpPaKHB7/W2TkY30zv53zyuDAyoRJYL4I54eVflGkbyjfgVjj92PUxPN5C7IBiTFEVc83V07Ygs2oBJeJ95LwjPvmhi8N2jgmtMO9HYXYkglK1iAEkCaQPd2KmOWb9zDbsMaM/9pWAgtrYDF8u6ENh3SGzg21LuVN3zGH2hj2ncn/UtsU7j58Ijt/CySIhWwf9ZteQLnatqCX 2 | -------------------------------------------------------------------------------- /script/travis-decrypt: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # vim: set ts=4: 3 | set -eu 4 | 5 | . "$(dirname "$0")"/utils.sh 6 | 7 | if [ -z "${encrypted_be665121ac17_key:-}" ]; then 8 | ewarn 'Decryption key is not available' 9 | exit 0 10 | fi 11 | 12 | einfo 'Decrypting .deploykey' 13 | openssl aes-256-cbc \ 14 | -K "$encrypted_be665121ac17_key" \ 15 | -iv "$encrypted_be665121ac17_iv" \ 16 | -in .deploykey.enc -out .deploykey -d 17 | chmod 600 .deploykey 18 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = Parser of POSIX Shell Command Language 2 | :gh-name: jirutka/sh-parser 3 | :gh-branch: master 4 | 5 | ifdef::env-github[] 6 | image:https://travis-ci.org/{gh-name}.svg?branch={gh-branch}["Build Status", link="https://travis-ci.org/{gh-name}"] 7 | endif::env-github[] 8 | 9 | Status: in early phase of development 10 | 11 | 12 | == License 13 | 14 | This project is licensed under http://opensource.org/licenses/MIT/[MIT License]. 15 | For the full text of the license, see the link:LICENSE[LICENSE] file. 16 | -------------------------------------------------------------------------------- /script/utils.sh: -------------------------------------------------------------------------------- 1 | # vim: set ts=4: 2 | 3 | readonly PKG_NAME='sh-parser' 4 | readonly TEMP_DIR="$(pwd)/.tmp" 5 | readonly VENV_DIR="$(pwd)/.venv" 6 | 7 | einfo() { 8 | # bold cyan 9 | printf '\033[1;36m> %s\033[0m\n' "$@" >&2 10 | } 11 | 12 | ewarn() { 13 | # bold yellow 14 | printf '\033[1;33m> %s\033[0m\n' "$@" >&2 15 | } 16 | 17 | die() { 18 | # bold red 19 | printf '\033[1;31mERROR:\033[0m %s\n' "$1" >&2 20 | shift 21 | printf ' %s\n' "$@" 22 | exit 2 23 | } 24 | 25 | # Fetches the given URL and verifies SHA256 checksum. 26 | wgets() ( 27 | local url="$1" 28 | local sha256="$2" 29 | local dest="${3:-.}" 30 | 31 | mkdir -p "$dest" \ 32 | && cd "$dest" \ 33 | && rm -f "${url##*/}" \ 34 | && wget -T 10 "$url" \ 35 | && echo "$sha256 ${url##*/}" | sha256sum -c 36 | ) 37 | -------------------------------------------------------------------------------- /sh-parser-cli-dev-0.rockspec: -------------------------------------------------------------------------------- 1 | -- vim: set ft=lua: 2 | 3 | package = 'sh-parser-cli' 4 | version = 'dev-0' 5 | 6 | source = { 7 | url = 'git://github.com/jirutka/sh-parser.git', 8 | branch = 'master', 9 | } 10 | 11 | description = { 12 | summary = 'Parser of POSIX Shell Command Language - command-line utility', 13 | detailed = [[ 14 | This is a command-line utility for sh-parser, a POSIX shell parser written in 15 | LPeg that generates AST of the parsed shell script without evaluating. 16 | It parsers the given shell script and generates its AST in JSON.]], 17 | homepage = 'https://github.com/jirutka/sh-parser', 18 | maintainer = 'Jakub Jirutka ', 19 | license = 'MIT', 20 | } 21 | 22 | dependencies = { 23 | 'lua >= 5.1', 24 | 'optarg ~> 0.2', 25 | 'rapidjson ~> 0.5', 26 | 'sh-parser', 27 | } 28 | 29 | build = { 30 | type = 'builtin', 31 | modules = {}, 32 | install = { 33 | bin = { 34 | ['sh-parser'] = 'bin/sh-parser' 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /sh-parser/parser.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- Parser of POSIX shell 3 | 4 | local lpeg = require 'lpeg' 5 | local simple_ast = require 'sh-parser.ast.simple' 6 | local grammar = require 'sh-parser.grammar' 7 | 8 | 9 | local function inject_tracing (grammar) --luacheck: ignore 431 10 | local ok, pegdebug = pcall(require, 'pegdebug') 11 | if not ok then 12 | error('You must install Lua module pegdebug to use trace mode!') 13 | end 14 | 15 | return pegdebug.trace(grammar) 16 | end 17 | 18 | 19 | local M = {} 20 | 21 | --- Parses the given shell script into AST. 22 | -- 23 | -- @tparam string input The script to parse. 24 | -- @tparam table|nil opts A map of options. 25 | -- @treturn A root node. 26 | function M.parse (input, opts) 27 | opts = opts or {} 28 | 29 | local gr = grammar.build() 30 | 31 | -- TODO: cache initialized parser 32 | local parser = opts.trace 33 | and lpeg.P(inject_tracing(gr)) 34 | or lpeg.P(gr) 35 | 36 | local create_node = simple_ast(input, opts) 37 | 38 | return parser:match(input, 1, create_node, input, {}) 39 | end 40 | 41 | return M 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright 2017 Jakub Jirutka . 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | dist: trusty 3 | env: 4 | global: 5 | - secure: Ai7wIggL88jnPTE89gviZWAgOa7Rvp5JkCPvr+BjAeC1rei5Awcf/9y6EvefG8VhxLDgAa/xwVBc7xrxCoqSjMRf50LKJDdhRRoT9evv+2mZTL9SQXbS0kYtqmWDNymbciwFhKZ/Qn0xuZhOsgsee0Erh8goVoizuB8MWzUw588kjb9PgeTzw5zkkU7xyqp9VjUG4JY/LgY59onoBRR2bPyOfXFF4oWuih89n4zaliLz+KO9/Ziyi/GeilQuW0AeYlGB+BN1US9jW+ooyF19j1oUurUCUDKfU0zZuDJsO3e2SyDcDN3fcnCI5S9qA+jrnK3RZmEveA6+9v4dSDjwjjFkRz/wjy6SdCP94gOfmffvcHvtwe7clnMl6ASVDhuWeRA+4AzcuXHYDcqXV4cmj7gwo4B4wtiO4N7ksxsg+wEsnDF/uN1DUFBjO5yAxZyhwFsPBDp4gu/ajbPRUnn5sV1r3nylNjZIGy4sZ23xArm9NJbyR5RXCxGOMU+MgtucnVazzeLoTRSZD9ANoIHk9ciBxtCs72FU61/S7iApwYOxWEhKCCtyZKFcRgQVmMKeyHQT8qRI87g0/EY6s+SeoC53XNBCc611mmNZqO5geUWTfQfOtjtZ2Jd+xbkw06JMswsfLc8OJ4i5C5H/lYV+NvdlCXBsfYGc2PnDWoMSJ6s= # LUAROCKS_KEY 6 | matrix: 7 | - LUA_VERSION=luajit-2.0 8 | - LUA_VERSION=luajit-2.1 9 | - LUA_VERSION=lua-5.1 10 | - LUA_VERSION=lua-5.2 11 | - LUA_VERSION=lua-5.3 12 | cache: 13 | directories: 14 | - .venv 15 | install: 16 | - source .envrc 17 | - script/bootstrap 18 | script: 19 | - script/test 20 | after_success: 21 | - script/travis-decrypt 22 | - script/deploy-docs 23 | deploy: 24 | provider: script 25 | script: script/deploy-rocks 26 | skip_cleanup: true 27 | all_branches: true 28 | on: 29 | condition: | 30 | "$TRAVIS_BRANCH" = master || "$TRAVIS_TAG" = v* && "$LUA_VERSION" = lua-5.3 31 | -------------------------------------------------------------------------------- /script/deploy-rocks: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # vim: set ts=4: 3 | # 4 | # If git HEAD is tagged as v*, then it generates the release rockspec(s) 5 | # from the dev-0 rockspec(s) and uploads them to LuaRocks. Otherwise it uploads 6 | # dev-0 rockspec(s) to LuaRocks. 7 | # 8 | set -eu 9 | 10 | create_release_spec() { 11 | local dev_spec="$1" 12 | local git_tag="$2" 13 | 14 | local spec_ver="${git_tag#v*}-1" 15 | local rel_spec="$(echo "$dev_spec" | sed "s/-dev-0.rockspec/-$spec_ver.rockspec/")" 16 | 17 | sed -E \ 18 | -e "s/^version *= *['\"].*['\"]/version = '$spec_ver'/" \ 19 | -e "/^source *= *{/,/^}/ s/(branch|tag) *= *['\"].*['\"]/tag = '$git_tag'/" \ 20 | "$dev_spec" > "$rel_spec" 21 | 22 | echo "$rel_spec" 23 | } 24 | 25 | git_release_tag() { 26 | git describe --tags --exact-match --match 'v*' 2>/dev/null 27 | } 28 | 29 | 30 | . "$(dirname "$0")/bootstrap" 31 | 32 | [ -n "${LUAROCKS_KEY:-}" ] || die 'LUAROCKS_KEY is not set!' 33 | 34 | for dev_spec in *-dev-0.rockspec; do 35 | if rel_tag=$(git_release_tag); then 36 | rel_spec=$(create_release_spec "$dev_spec" "$rel_tag") 37 | 38 | einfo "Uploading $(basename "$rel_spec") to LuaRocks" 39 | luarocks upload --api-key="$LUAROCKS_KEY" "$rel_spec" 40 | 41 | rm -- "$rel_spec" 42 | else 43 | einfo "Uploading $(basename "$dev_spec") to LuaRocks" 44 | luarocks upload --force --api-key="$LUAROCKS_KEY" "$dev_spec" 45 | fi 46 | done 47 | -------------------------------------------------------------------------------- /sh-parser-dev-0.rockspec: -------------------------------------------------------------------------------- 1 | -- vim: set ft=lua: 2 | 3 | package = 'sh-parser' 4 | version = 'dev-0' 5 | 6 | source = { 7 | url = 'git://github.com/jirutka/sh-parser.git', 8 | branch = 'master', 9 | } 10 | 11 | description = { 12 | summary = 'Parser of POSIX Shell Command Language', 13 | detailed = [[ 14 | This is a POSIX shell parser written in LPeg that generates AST of the parsed 15 | shell script without evaluating. It supports complete syntax of POSIX Shell 16 | Command Language plus few common extensions implemented e.g. by Busybox ash 17 | and dash. It does not support Bash/ZSH-specific features.]], 18 | homepage = 'https://github.com/jirutka/sh-parser', 19 | maintainer = 'Jakub Jirutka ', 20 | license = 'MIT', 21 | } 22 | 23 | dependencies = { 24 | 'lua >= 5.1', 25 | 'lpeg ~> 1.0', 26 | 'fun ~> 0.1.3', 27 | --'PegDebug ~> 0.40' optional 28 | } 29 | 30 | build = { 31 | type = 'builtin', 32 | modules = { 33 | ['sh-parser'] = 'sh-parser/init.lua', 34 | ['sh-parser.ast.name_captures'] = 'sh-parser/ast/name_captures.lua', 35 | ['sh-parser.ast.simple'] = 'sh-parser/ast/simple.lua', 36 | ['sh-parser.fun_ext'] = 'sh-parser/fun_ext.lua', 37 | ['sh-parser.grammar'] = 'sh-parser/grammar.lua', 38 | ['sh-parser.location_resolver'] = 'sh-parser/location_resolver.lua', 39 | ['sh-parser.lpeg_sugar'] = 'sh-parser/lpeg_sugar.lua', 40 | ['sh-parser.parser'] = 'sh-parser/parser.lua', 41 | ['sh-parser.utils'] = 'sh-parser/utils.lua', 42 | }, 43 | } 44 | -------------------------------------------------------------------------------- /script/bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # vim: set ts=4: 3 | # 4 | # This script ensures that lua interpreter and lua modules specified 5 | # in Rocksfile are installed. 6 | # 7 | # Environment variables: 8 | # LUA_VERSION : Version of the Lua interpreter (e.g. lua-5.3, luajit-2.0) to 9 | # be installed. Default is lua-5.3. 10 | # 11 | set -eu 12 | 13 | DEFAULT_LUA_VERSION='lua-5.3' 14 | HEREROCKS_URI='https://raw.githubusercontent.com/mpeterv/hererocks/0.14.0/hererocks.py' 15 | HEREROCKS_SHA256='4d667c12411d405eb3a64392c7fe89db0a557aca60e8b9e96d11030ecf3ce10f' 16 | 17 | install_rock() { 18 | local name="$1" 19 | local args="$name" 20 | 21 | case "$name" in 22 | */*) args="--server=https://luarocks.org/manifests/${name%/*} ${name#*/}";; 23 | esac 24 | 25 | luarocks install $args 26 | } 27 | 28 | 29 | cd "$(dirname "$0")/.." 30 | . script/utils.sh 31 | 32 | mkdir -p "$VENV_DIR" "$TEMP_DIR" 33 | 34 | if [ ! -x "$VENV_DIR"/bin/luarocks ]; then 35 | version="$(printf %s "${LUA_VERSION:-$DEFAULT_LUA_VERSION}" | tr - =)" 36 | 37 | einfo "Installing $version and luarocks into $VENV_DIR" 38 | wgets "$HEREROCKS_URI" "$HEREROCKS_SHA256" "$TEMP_DIR" 39 | python "$TEMP_DIR"/hererocks.py "$VENV_DIR" --luarocks=^ --$version 40 | printf '\n' 41 | fi 42 | 43 | einfo 'Installing lua modules' 44 | cat Rocksfile | sed -En 's/^([^#]+).*/\1/p' | while read -r rockname; do 45 | luarocks --mversion show "${rockname#*/}" >/dev/null 2>&1 \ 46 | || install_rock "$rockname" \ 47 | || die "Failed to install rock $rockname." 48 | done 49 | -------------------------------------------------------------------------------- /sh-parser/fun_ext.lua: -------------------------------------------------------------------------------- 1 | -- This module provides luafun with some bug fixes 2 | -- 3 | -- XXX: Remove after https://github.com/rtsisyk/luafun/pull/34 is merged 4 | -- and released. 5 | -- luacheck: ignore 6 | 7 | local fun = require 'fun' 8 | 9 | local iterator_mt = getmetatable(fun.wrap()) 10 | local exports = fun 11 | local methods = iterator_mt.__index 12 | 13 | 14 | local numargs = function(...) 15 | local n = select('#', ...) 16 | if n >= 3 then 17 | -- Fix last argument 18 | local it = select(n - 2, ...) 19 | if type(it) == 'table' and getmetatable(it) == iterator_mt and 20 | it.param == select(n - 1, ...) and it.state == select(n, ...) then 21 | return n - 2 22 | end 23 | end 24 | return n 25 | end 26 | 27 | local chain_gen_r1 28 | local chain_gen_r2 = function(param, state, state_x, ...) 29 | if state_x == nil then 30 | local i = state[1] 31 | i = i + 1 32 | if param[3 * i - 1] == nil then -- fixed 33 | return nil 34 | end 35 | local state_x = param[3 * i] 36 | return chain_gen_r1(param, {i, state_x}) 37 | end 38 | return {state[1], state_x}, ... 39 | end 40 | 41 | chain_gen_r1 = function(param, state) 42 | local i, state_x = state[1], state[2] 43 | local gen_x, param_x = param[3 * i - 2], param[3 * i - 1] 44 | return chain_gen_r2(param, state, gen_x(param_x, state[2])) 45 | end 46 | 47 | local chain = function(...) 48 | local n = numargs(...) 49 | if n == 0 then 50 | return fun.wrap(nil_gen, nil, nil) 51 | end 52 | 53 | local param = { [3 * n] = 0 } 54 | local i, gen_x, param_x, state_x 55 | for i=1,n,1 do 56 | local elem = select(i, ...) 57 | gen_x, param_x, state_x = fun.iter(elem) 58 | param[3 * i - 2] = gen_x 59 | param[3 * i - 1] = param_x 60 | param[3 * i] = state_x 61 | end 62 | 63 | return fun.wrap(chain_gen_r1, param, {1, param[3]}) 64 | end 65 | 66 | methods.chain = chain 67 | exports.chain = chain 68 | 69 | return exports 70 | -------------------------------------------------------------------------------- /sh-parser/ast/name_captures.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- Captures converter 3 | 4 | local utils = require 'sh-parser.utils' 5 | 6 | local is_string = utils.is_string 7 | 8 | local mapping = { 9 | Program = 'body', 10 | CompoundList = 'cmds', 11 | SequentialList = 'cmds', 12 | AndList = 'cmds', 13 | OrList = 'cmds', 14 | Not = 'cmd', 15 | PipeSequence = 'cmds', 16 | SimpleCommand = { 'prefix', 'cmd', 'suffix' }, 17 | BraceGroup = { 'body', 'redirs' }, 18 | Subshell = { 'body', 'redirs' }, 19 | If = { 'clauses', 'redirs' }, 20 | IfClause = { 'cond', 'body' }, 21 | ElifClause = { 'cond', 'body' }, 22 | ElseClause = { 'body' }, 23 | For = { 'var', 'items', 'body', 'redirs' }, 24 | Case = { 'var', 'cases', 'redirs' }, 25 | CaseItem = { 'pattern', 'body' }, 26 | While = { 'cond', 'body', 'redirs' }, 27 | Until = { 'cond', 'body', 'redirs' }, 28 | FunctionDef = { 'name', 'body', 'redirs' }, 29 | RedirectFile = { 'fd', 'op', 'file' }, 30 | RedirectHereDoc = { 'fd', 'op', 'delimiter', 'content' }, 31 | HereDocContent = { 'content' }, 32 | Assignments = { 'modifier', 'assignments' }, 33 | Assignment = { 'name', 'value' }, 34 | Name = { 'text' }, 35 | Word = 'content', 36 | ArithmeticExpansion = { 'text' }, 37 | ParameterExpansion = { 'op_pre', 'param', 'op_in', 'word' }, 38 | CommandSubstitution = 'cmds', 39 | Comment = { 'text' }, 40 | } 41 | 42 | 43 | --- Converts the given list of positional captures from the specified grammar 44 | -- rule into a map of named captures. 45 | -- 46 | -- @function __call 47 | -- @tparam string ttype Type of the grammar rule / AST node. 48 | -- @tparam table captures A list of the captures. 49 | -- @treturn table A new map with the captures under keys. 50 | return function (ttype, captures) 51 | local keys = mapping[ttype] 52 | local t = {} 53 | 54 | if keys then 55 | if is_string(keys) then 56 | t[keys] = captures 57 | else 58 | for i, k in ipairs(keys) do 59 | t[k] = captures[i] 60 | end 61 | end 62 | else 63 | t.children = captures 64 | end 65 | 66 | return t 67 | end 68 | -------------------------------------------------------------------------------- /sh-parser/ast/simple.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- Simple AST builder 3 | -- 4 | -- Builds simple AST suitable for encoding into JSON. 5 | ---- 6 | local loc_resolver = require 'sh-parser.location_resolver' 7 | local name_captures = require 'sh-parser.ast.name_captures' 8 | 9 | local push = table.insert 10 | 11 | 12 | local function named_loc (row, col, pos) 13 | return { row = row, col = col, pos = pos } 14 | end 15 | 16 | 17 | --- Creates a function that transforms positional captures produced by the 18 | -- shell parser into AST nodes. 19 | -- 20 | -- **Options**: 21 | -- 22 | -- * `comments`: Include comments. 23 | -- * `loc`: Include nodes' location as start and end absolute position in 24 | -- the `subject`. 25 | -- * `loc2`: Include nodes' location as start and end row, column and 26 | -- absolute position in the `subject`. 27 | -- * `source`: Include raw source in all nodes except the root node. 28 | -- 29 | -- @function __call 30 | -- @tparam string subject The parsed input. 31 | -- @tparam {[string]=bool,...} opts The options map. 32 | -- @return `create_node` 33 | return function (subject, opts) 34 | opts = opts or {} 35 | local comments = opts.comments and {} or nil 36 | local with_loc = opts.loc or false 37 | local with_loc2 = opts.loc2 or false 38 | local with_source = opts.source or false 39 | 40 | local resolve_loc 41 | if with_loc2 then 42 | resolve_loc = loc_resolver(subject) 43 | end 44 | 45 | --- Creates a new AST node. 46 | -- 47 | -- @tparam string node_type The node type. 48 | -- @tparam table captures A list of captures. 49 | -- @tparam int start_pos Start position in the parsed input. 50 | -- @tparam int end_pos End position in the parsed input. 51 | -- @treturn table|nil An AST node, or nil. 52 | local function create_node (node_type, captures, start_pos, end_pos) 53 | 54 | local node = name_captures(node_type, captures) 55 | node.type = node_type 56 | 57 | if with_loc2 then 58 | node.loc = { 59 | ['start'] = named_loc(resolve_loc(start_pos)), 60 | ['end'] = named_loc(resolve_loc(end_pos)), 61 | } 62 | elseif with_loc then 63 | node.loc = { 64 | ['start'] = start_pos, 65 | ['end'] = end_pos, 66 | } 67 | end 68 | 69 | if with_source and node_type ~= 'Program' then 70 | node.source = subject:sub(start_pos, end_pos) 71 | end 72 | 73 | if comments ~= nil then 74 | if node_type == 'Comment' then 75 | push(comments, node) 76 | return nil 77 | elseif node_type == 'Program' then 78 | node.comments = comments 79 | end 80 | end 81 | 82 | return node 83 | end 84 | 85 | return create_node 86 | end 87 | -------------------------------------------------------------------------------- /script/deploy-docs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # vim: set ts=4: 3 | # 4 | # Generates docs using ldoc and deploys them to the gh-pages branch in 5 | # the repository. 6 | # 7 | set -eu 8 | 9 | DEPLOY_BRANCH='gh-pages' 10 | SOURCE_BRANCH='master' 11 | SSH_DEPLOY_KEY='.deploykey' 12 | 13 | 14 | build_docs() { 15 | local build_dir="$1" 16 | 17 | # This is needed for cleaning stale files; all docs will be regenerated. 18 | rm -Rf -- "$build_dir"/ldoc/* 19 | 20 | einfo 'Running ldoc' 21 | ldoc --dir "$build_dir"/ldoc --verbose . 22 | 23 | # Remove Last updated timestamp. 24 | LANG=C find "$build_dir"/ldoc -name '*.html' \ 25 | -exec sed -i.BAK 's/Last updated.*//' {} \; \ 26 | -exec rm {}.BAK \; # sed -i behaves differently on BSD and GNU... 27 | } 28 | 29 | has_changes() { 30 | test -n "$(git status --porcelain)" 31 | } 32 | 33 | remote_origin_url() { 34 | if [ -r "$SSH_DEPLOY_KEY" ]; then 35 | git config remote.origin.url \ 36 | | sed -E 's|^https://([^/]+)/(.*)$|git@\1:\2|' 37 | else 38 | git config remote.origin.url 39 | fi 40 | } 41 | 42 | skip_push() { 43 | [ -n "${TRAVIS:-}" ] || return 1 44 | 45 | [ "$TRAVIS_PULL_REQUEST" != 'false' ] \ 46 | || [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ] \ 47 | || [ "$TRAVIS_BUILD_NUMBER.1" != "$TRAVIS_JOB_NUMBER" ] 48 | } 49 | 50 | 51 | #======================== Main =========================# 52 | 53 | cd "$(dirname "$0")/.." 54 | . script/bootstrap 55 | 56 | einfo 'Updating ldoc documentation' 57 | 58 | commit_rev="$(git rev-parse --short HEAD)" 59 | commit_author="$(git log -n 1 --format='%aN <%aE>')" 60 | commit_date="$(git log -n 1 --format='%aD')" 61 | remote_url="$(remote_origin_url)" || die 'Failed to get remote.origin.url' 62 | build_dir="$(mktemp -q -d "${TMPDIR:-"/tmp"}/docs.XXXX")" 63 | 64 | # Use the specified SSH key for cloning/pushing repository, if it's readable. 65 | if [ -r "$SSH_DEPLOY_KEY" ]; then 66 | export SSH_DEPLOY_KEY="$(pwd)/$SSH_DEPLOY_KEY" 67 | export GIT_SSH="$(pwd)/script/git-ssh-wrapper" 68 | fi 69 | 70 | if [ -n "${TRAVIS:-}" ]; then 71 | git config --global user.name 'Travis CI' 72 | git config --global user.email 'travis-ci@local.host' 73 | fi 74 | 75 | git clone --progress "$remote_url" "$build_dir" 76 | git -C "$build_dir" checkout "$DEPLOY_BRANCH" \ 77 | || die "Branch $DEPLOY_BRANCH does not exist" 78 | 79 | build_docs "$build_dir" 80 | 81 | cd "$build_dir" 82 | 83 | if ! has_changes; then 84 | ewarn 'No changes'; exit 0 85 | fi 86 | if skip_push; then 87 | ewarn 'Skipping push'; exit 0 88 | fi 89 | 90 | einfo 'Commiting changes' 91 | git add --all 92 | git commit \ 93 | --message="Built from $commit_rev" \ 94 | --author="$commit_author" \ 95 | --date="$commit_date" 96 | 97 | einfo "Pushing changes to $DEPLOY_BRANCH branch" 98 | git push --progress "$remote_url" "$DEPLOY_BRANCH:$DEPLOY_BRANCH" 99 | 100 | rm -Rf -- "$build_dir" 101 | -------------------------------------------------------------------------------- /bin/sh-parser: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | 3 | local optarg = require 'optarg' 4 | local fun = require 'sh-parser.fun_ext' 5 | local parser = require 'sh-parser.parser' 6 | 7 | local json 8 | for _, name in ipairs {'rapidjson', 'cjson', 'lunajson'} do 9 | local ok, res = pcall(require, name) 10 | if ok then 11 | json = res 12 | break 13 | end 14 | end 15 | if not json then 16 | error 'No json module found! Install rapidjson, cjson, or lunajson.' 17 | end 18 | 19 | local exit = os.exit 20 | local iter = fun.iter 21 | 22 | local HELP_MSG = [[ 23 | Usage: 24 | ${progname} [options] FILE 25 | ${progname} (-h | -V) 26 | 27 | Parse POSIX sh compliant script and produce its AST as JSON. 28 | 29 | Arguments: 30 | FILE Path of script to parse or "-" to read from stdin. 31 | 32 | Options: 33 | -c, --comments Include comments. 34 | -l, --loc Include nodes' location as start and end absolute position 35 | in the source script. 36 | -m, --loc2 Include nodes' location as start and end row, column and 37 | absolute position in the source script. 38 | -p, --pretty Print pretty formatted JSON. 39 | -t, --trace Trace parser (needs pegdebug module). 40 | -s, --source Include raw source in all nodes except the root node. 41 | -V, --version Show version information and exit 42 | -h, --help Show this message and exit 43 | 44 | Homepage: https://github.com/jirutka/sh-parser]] 45 | 46 | 47 | local function parse_args () 48 | return optarg.from_opthelp(HELP_MSG:match('Options:\n(.*)\n\n')) 49 | end 50 | 51 | local function printf_err (str, ...) 52 | io.stderr:write((str..'\n'):format(...)) 53 | end 54 | 55 | local function print_help () 56 | local msg, _ = HELP_MSG:gsub('${progname}', _G.arg[0]) 57 | print(msg) 58 | end 59 | 60 | 61 | --------- M a i n --------- 62 | 63 | local opts, args = parse_args() 64 | 65 | if opts.help then 66 | print_help() 67 | exit(0) 68 | 69 | elseif opts.version then 70 | print('sh-parser TODO') 71 | exit(0) 72 | 73 | elseif #args ~= 1 then 74 | print_help() 75 | exit(2) 76 | end 77 | 78 | if opts.pretty and json._NAME ~= 'rapidjson' then 79 | printf_err('Warning: Option --pretty is supported only with rapidjson.') 80 | end 81 | 82 | local file 83 | if args[1] == '-' then 84 | file = io.stdin 85 | else 86 | local err; file, err = io.open(args[1], 'r') 87 | if err then 88 | printf_err('ERROR: %s', err) 89 | exit(2) 90 | end 91 | end 92 | 93 | local script = file:read('*all') 94 | 95 | local parser_opts = iter(opts) 96 | :filter(function(k) return #k > 1 end) 97 | :map(function(k) return k:gsub('-', '_') end) 98 | :tomap() 99 | 100 | local ast = parser.parse(script, parser_opts) 101 | if not ast then 102 | printf_err('ERROR: Failed to parse the script') 103 | exit(1) 104 | end 105 | 106 | if opts.pretty and json._NAME == 'rapidjson' then 107 | print(json.encode(ast, { pretty = true, sort_keys = true })) 108 | else 109 | print(json.encode(ast)) 110 | end 111 | -------------------------------------------------------------------------------- /sh-parser/location_resolver.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- Location resolver 3 | -- 4 | -- Resolves row (line) and column from position in multiline string. 5 | ---- 6 | local byte = string.byte 7 | local floor = math.floor 8 | local inf = math.huge 9 | 10 | local NL = byte('\n') 11 | 12 | 13 | --- Returns a list with starting positions of lines in the given `text` string. 14 | -- Note: only LF (`\n`) is considered as a line separator. 15 | -- 16 | -- @tparam string text The string to index. 17 | -- @treturn {int,...} A list of numbers, where index corresponds to a line 18 | -- number and value to a position inside `text` where the line starts. 19 | local function index_lines_pos (text) 20 | text = '\n'..text 21 | 22 | local t, n = {}, 0 23 | for i=1, #text do 24 | if byte(text, i) == NL then 25 | n = n + 1 26 | t[n] = i 27 | end 28 | end 29 | 30 | return t 31 | end 32 | 33 | --- Returns an index of item in the given sorted `list` that is largest from 34 | -- the items smaller or equal to the specified numeric `value`. 35 | -- This implementation uses modified binary search algorithm. 36 | -- 37 | -- @tparam {number,...} list A sorted list of numbers. 38 | -- @tparam number value The reference value. 39 | -- @tparam ?int start_idx (default is 1) 40 | -- @tparam ?int end_idx (default is `#list`) 41 | -- @treturn number|nil An index of the item closest to the `value`, or nil if 42 | -- not found. 43 | local function find_nearest_lower (list, value, start_idx, end_idx) 44 | local low, high = start_idx or 1, end_idx or #list -- FIXME do not calculate list size each time 45 | local mid_val 46 | 47 | while low <= high do 48 | local mid = floor((low + high) / 2) 49 | mid_val = list[mid] 50 | 51 | if value < mid_val then 52 | high = mid - 1 53 | elseif value == mid_val 54 | or value > mid_val and value < (list[mid + 1] or inf) then 55 | return mid 56 | else 57 | low = mid + 1 58 | end 59 | end 60 | end 61 | 62 | local function resolve_row_col (lines_index, pos, skip_lines) 63 | local row = find_nearest_lower(lines_index, pos, skip_lines + 1) 64 | local col = pos - lines_index[row] + 1 65 | 66 | return row, col 67 | end 68 | 69 | 70 | --- Initializes the `resolve` function for the given `text`. 71 | -- 72 | -- **Type signature:** `string -> int -> int, int, int` 73 | -- 74 | -- @function __call 75 | -- @tparam string text 76 | -- @return `resolve` 77 | return function (text) 78 | -- TODO: try to optimize it, it's quite slow. 79 | local lines_index 80 | local last_pos, last_row = 1, 1 81 | 82 | --- 83 | -- @tparam int pos The position to resolve. 84 | -- @treturn int A 1-based row (line) number. 85 | -- @treturn int A 1-based column number. 86 | -- @treturn int The given `pos`. 87 | local function resolve (pos) 88 | if lines_index == nil then 89 | lines_index = index_lines_pos(text, #text) 90 | end 91 | -- Optimize lookup of next position using result of the previous lookup. 92 | local skip_lines = pos >= last_pos and last_row - 1 or 0 93 | local row, col = resolve_row_col(lines_index, pos, skip_lines) 94 | 95 | last_pos, last_row = pos, row 96 | 97 | return row, col, pos 98 | end 99 | 100 | return resolve 101 | end 102 | -------------------------------------------------------------------------------- /sh-parser/utils.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- General utility functions 3 | -- 4 | -- **Note: This module is not part of the stable API!** 5 | ---- 6 | local fun = require 'sh-parser.fun_ext' 7 | 8 | local insert = table.insert 9 | local ipairs = ipairs 10 | local map = fun.map 11 | local type = type 12 | 13 | -- unpack is not global since Lua 5.3 14 | local unpack = table.unpack or unpack --luacheck: ignore 15 | 16 | 17 | --- Calls the function `func` with the given arguments. This is equivalent to: 18 | -- 19 | -- func(unpack(args), ...) 20 | -- 21 | -- but in a form that can be highly optimized by LuaJIT (~20x faster) when 22 | -- called with less than 4 arguments in the `args` table. If `#args > 3`, then 23 | -- it fallbacks to `unpack` (that is not JIT-compiled in LuaJIT 2.0). 24 | local function call (func, args, ...) 25 | local n = #args 26 | 27 | if n == 1 then 28 | return func(args[1], ...) 29 | elseif n == 2 then 30 | return func(args[1], args[2], ...) 31 | elseif n == 3 then 32 | return func(args[1], args[2], args[3], ...) 33 | else 34 | return func(unpack(args), ...) 35 | end 36 | end 37 | 38 | 39 | local M = {} 40 | 41 | M.LUA_V = _VERSION:sub(-3) 42 | 43 | --- Asserts that the given argument is of the correct type. 44 | -- 45 | -- @tparam number n The argument index. 46 | -- @param value The value to assert. 47 | -- @tparam string ttype The expected type. 48 | -- @raise if the argument n is not the correct type. 49 | function M.assert_arg (n, value, ttype) 50 | if type(value) ~= ttype then 51 | error(("bad argument #%d: expected a '%s', got a '%s'") 52 | :format(n, ttype, type(value)), 2) 53 | end 54 | end 55 | 56 | --- Inserts items from the `src` list at the end of the `dest` list and returns 57 | -- modified `dest` (i.e. it modifies it in-place!). 58 | -- 59 | -- @tparam table dest The destination list to extend. 60 | -- @tparam table src The source list to take items from. 61 | -- @treturn table The given `dest` list. 62 | function M.extend (dest, src) 63 | for _, item in ipairs(src) do 64 | insert(dest, item) 65 | end 66 | return dest 67 | end 68 | 69 | --- Returns true if the given argument is a string. 70 | function M.is_string (value) 71 | return type(value) == 'string' 72 | end 73 | 74 | --- Returns true if the given str is all upper case. 75 | function M.is_upper (str) 76 | return str == str:upper() 77 | end 78 | 79 | --- Partial application. 80 | -- Takes a function `fn` and arguments, and returns a function *fn2*. 81 | -- When applied, *fn2* returns the result of applying `fn` to the arguments 82 | -- provided initially followed by the arguments provided to *fn2*. 83 | -- 84 | -- @tparam function fn 85 | -- @param ... Arguments to pass to the `fn`. 86 | -- @treturn function A partially applied function. 87 | function M.partial (fn, ...) 88 | local args1 = {...} 89 | 90 | return function(...) 91 | return call(fn, args1, ...) 92 | end 93 | end 94 | 95 | --- Inserts `value` at the front of the list, moving other elements upwards. 96 | -- This is just a shortcut for `table.insert(list, 1, value)`. 97 | function M.unshift (list, value) 98 | insert(list, 1, value) 99 | end 100 | 101 | --- Returns values from the given table. 102 | -- 103 | -- @function values 104 | -- @tparam table tab 105 | -- @treturn table A list of values. 106 | M.values = M.partial(map, function(_, v) return v end) 107 | 108 | return M 109 | -------------------------------------------------------------------------------- /sh-parser/lpeg_sugar.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- Syntactic sugar for building grammar with LPeg 3 | 4 | local lpeg = require 'lpeg' 5 | 6 | local utils = require 'sh-parser.utils' 7 | local fun = require 'sh-parser.fun_ext' 8 | 9 | local iter = fun.iter 10 | local assert_arg = utils.assert_arg 11 | local is_upper = utils.is_upper 12 | local lpeg_type = lpeg.type 13 | local LUA_V = utils.LUA_V 14 | 15 | local Carg = lpeg.Carg 16 | local Cc = lpeg.Cc 17 | local Cp = lpeg.Cp 18 | local Ct = lpeg.Ct 19 | local V = lpeg.V 20 | 21 | 22 | local function create_node (func, name, start_pos, captures, end_pos, subject) 23 | return func(name, captures, start_pos, end_pos - 1, subject) 24 | end 25 | 26 | 27 | local F = {} 28 | 29 | --- Handler called when the *pattern* (rule) is being assigned to 30 | -- the variable *name*. It should add the pattern to the `env.grammar` table. 31 | -- 32 | -- @tparam string name 33 | -- @tparam lpeg.Pattern pattern 34 | -- @tparam table env Environment of the function given to `build_grammar`. 35 | function F.on_define_rule (name, pattern, env) 36 | local name_init = name:sub(1, 1) 37 | 38 | if name_init ~= '_' and is_upper(name_init) then 39 | pattern = ( Carg(1) * Cc(name) * Cp() * Ct(pattern) * Cp() * Carg(2) ) 40 | / create_node 41 | end 42 | 43 | env.grammar[name] = pattern 44 | end 45 | 46 | --- Handler called when an undeclared variable is accessed inside the function 47 | -- given to `build_grammar`. It should return `lpeg.V`. 48 | -- 49 | -- @tparam string name 50 | -- @tparam table env Environment of the function given to `build_grammar`. 51 | -- @treturn lpeg.Pattern 52 | function F.on_get_variable (name, env) 53 | env.used_vars[name] = (env.used_vars[name] or 0) + 1 54 | return V(name) 55 | end 56 | 57 | --- Handler called right before the resulting grammar is returned. 58 | -- 59 | -- @tparam table env Environment of the function given to `build_grammar`. 60 | function F.on_grammar_built (env) 61 | for name, cnt in pairs(env.used_vars) do 62 | if not env.grammar[name] then 63 | error(('Undefined non-terminal "%s" referenced %d times'):format(name, cnt)) 64 | end 65 | end 66 | end 67 | 68 | 69 | local M = {} 70 | 71 | function M.build_grammar (func, defs, global_env) 72 | assert_arg(1, func, 'function') 73 | defs = defs or {} 74 | 75 | if not global_env then 76 | global_env = LUA_V == '5.1' and getfenv(func) or _G --luacheck: std lua51 77 | end 78 | 79 | local env_index = iter(lpeg) 80 | :filter(function(k) return is_upper(k:sub(1, 1)) end) 81 | :chain(F, defs) 82 | :tomap() 83 | 84 | local init_defined = false 85 | 86 | local env 87 | env = setmetatable({ 88 | grammar = {}, 89 | used_vars = {}, 90 | }, { 91 | __index = setmetatable(env_index, { 92 | __index = function(_, name) 93 | return global_env[name] or env.on_get_variable(name, env) 94 | end 95 | }), 96 | __newindex = function(tab, name, value) 97 | if lpeg_type(value) == 'pattern' then 98 | if not init_defined then 99 | env.grammar[1] = name 100 | init_defined = true 101 | end 102 | env.on_define_rule(name, value, env) 103 | else 104 | rawset(tab, name, value) 105 | end 106 | end 107 | }) 108 | 109 | -- Call passed function with custom environment (5.1- and 5.2-style). 110 | if LUA_V == '5.1' then 111 | setfenv(func, env) --luacheck: std lua51 112 | end 113 | func(env) 114 | 115 | env.on_grammar_built(env) 116 | 117 | return assert(env.grammar) 118 | end 119 | 120 | return M 121 | -------------------------------------------------------------------------------- /AST.adoc: -------------------------------------------------------------------------------- 1 | = AST 2 | :toc: 3 | :toclevels: 4 4 | // custom 5 | :uri-posix: http://pubs.opengroup.org/onlinepubs/9699919799 6 | :uri-posix-scl: {uri-posix}/utilities/V3_chap02.html 7 | 8 | TODO: Explain two types of representation (low-level and high-level API). 9 | 10 | 11 | == Program 12 | 13 | The root node of the AST tree. 14 | 15 | .Captures 16 | [source, subs="+macros"] 17 | { ( <> 18 | | <> 19 | | <> 20 | | <> 21 | | <> 22 | | <> )* } 23 | 24 | .Node 25 | [source, subs="+macros"] 26 | { 27 | type: 'Program', 28 | body: [ ( <> 29 | | <> 30 | | <> 31 | | <> 32 | | <> 33 | | <> )* ], 34 | comments: [ <>* ] 35 | } 36 | 37 | 38 | == CompoundList 39 | 40 | .Captures 41 | [source, subs="+macros"] 42 | { ( <> 43 | | <> 44 | | <> 45 | | <> 46 | | <> 47 | | <> )+ } 48 | 49 | .Node 50 | [source, subs="+macros"] 51 | { 52 | type: 'CompoundList', 53 | cmds: [ ( <> 54 | | <> 55 | | <> 56 | | <> 57 | | <> 58 | | <> )+ ] 59 | } 60 | 61 | 62 | == SequentialList 63 | 64 | > Commands that are separated by a semicolon `;` shall be executed sequentially. 65 | > {uri-posix-scl}#tag_02_09_03_04[Read more…] 66 | 67 | .Format 68 | [source] 69 | command1; command2 [; command3 ] ... [;] 70 | 71 | .Captures 72 | [source, subs="+macros"] 73 | { ( <> 74 | | <> 75 | | <> 76 | | <> 77 | | <> )+ } 78 | 79 | 80 | .Node 81 | [source, subs="+macros"] 82 | { 83 | type: 'SequentialList', 84 | cmds: [ ( <> 85 | | <> 86 | | <> 87 | | <> 88 | | <> )+ ] 89 | } 90 | 91 | 92 | == AsyncCommand 93 | 94 | > If a command is terminated by the control operator `&`, the shell shall execute the command asynchronously in a subshell. 95 | > {uri-posix-scl}#tag_18_09_03_02[Read more…] 96 | 97 | .Format 98 | [source] 99 | command & 100 | 101 | .Captures 102 | [source, subs="+macros"] 103 | { ( <> 104 | | <> 105 | | <> 106 | | <> ) } 107 | 108 | .Node 109 | [source, subs="+macros"] 110 | { 111 | type: 'AsyncCommand', 112 | cmd: ( <> 113 | | <> 114 | | <> 115 | | <> ) 116 | } 117 | 118 | 119 | == AndOrList 120 | 121 | > An _AND-OR list_ is a sequence of one or more pipelines separated by the operators `&&` and `||`. 122 | > They shall have equal precedence and shall be evaluated with left associativity. 123 | > {uri-posix-scl}#tag_02_09_03[Read more…] 124 | 125 | .Format 126 | [source] 127 | command1 && command2 [ || command3] ... 128 | command1 || command2 [ && command3] ... 129 | 130 | 131 | === AndList 132 | 133 | .Captures 134 | [source, subs="+macros"] 135 | { 136 | ( <> 137 | | <> 138 | | <> 139 | | <> ), 140 | ( 141 | ( <> 142 | | <> 143 | | <> ) 144 | )+ 145 | } 146 | 147 | .Node 148 | [source, subs="+macros"] 149 | { 150 | type: 'AndList', 151 | cmds: [ ( <> 152 | | <> 153 | | <> 154 | | <> ), 155 | ( 156 | ( <> 157 | | <> 158 | | <> ) 159 | )+ ] 160 | } 161 | 162 | 163 | === OrList 164 | 165 | .Captures 166 | [source, subs="+macros"] 167 | { 168 | ( <> 169 | | <> 170 | | <> 171 | | <> ), 172 | ( 173 | ( <> 174 | | <> 175 | | <> ) 176 | )+ 177 | } 178 | 179 | .Node 180 | [source, subs="+macros"] 181 | { 182 | type: 'OrList', 183 | cmds: [ ( <> 184 | | <> 185 | | <> 186 | | <> ), 187 | ( 188 | ( <> 189 | | <> 190 | | <> ) 191 | )+ ] 192 | } 193 | 194 | 195 | == Not 196 | 197 | .Format 198 | [source] 199 | ! command 200 | 201 | .Captures 202 | [source, subs="+macros"] 203 | { ( <> 204 | | <> ) } 205 | 206 | .Node 207 | [source, subs="+macros"] 208 | { 209 | type: 'Not', 210 | cmd: ( <> 211 | | <> ) 212 | } 213 | 214 | 215 | == PipeSequence 216 | 217 | > A _pipeline_ is a sequence of one or more commands separated by the control operator `|`. 218 | > For each command but the last, the shell shall connect the standard output of the command to the standard input of the next command as if by creating a pipe and passing the write end of the pipe as the standard output of the command and the read end of the pipe as the standard input of the next command. 219 | > {uri-posix-scl}#tag_18_09_02[Read more…] 220 | 221 | .Format 222 | [source] 223 | command1 | command2 [| command3 ] ... 224 | 225 | .Captures 226 | [source, subs="+macros"] 227 | { <>+ } 228 | 229 | .Node 230 | [source, subs="+macros"] 231 | { 232 | type: 'PipeSequence', 233 | cmds: [ <>+ ] 234 | } 235 | 236 | 237 | == Command 238 | 239 | 240 | === SimpleCommand 241 | 242 | > A “simple command” is a sequence of optional variable assignments and redirections, in any sequence, optionally followed by words and redirections, terminated by a control operator. 243 | > {uri-posix-scl}#tag_18_09_01[Read more…] 244 | 245 | .Captures 246 | [source, subs="+macros"] 247 | ---- 248 | { { ( <> | <> )* }, <> | nil, { ( <> | <> )* } | nil } 249 | ---- 250 | <1> Redirections and assignments (prefix). 251 | <2> Command name. If nil, then (1) is not empty and (3) is nil. 252 | <3> Redirections and command arguments (suffix). 253 | 254 | .Nodes 255 | TODO 256 | 257 | 258 | === Assignments 259 | 260 | .Format 261 | [source] 262 | [modifier] varname=value ... 263 | 264 | .Nodes 265 | [source, subs="+macros"] 266 | { 267 | type: 'Assignments', 268 | assignments: [ <>+ ], 269 | modifier: 'export' | 'local' | 'readonly' | null 270 | } 271 | 272 | 273 | === CompoundCommand 274 | 275 | 276 | ==== BraceGroup 277 | 278 | > Execute _compound-list_ in the current process environment. 279 | > {uri-posix-scl}#tag_18_09_04[Read more…] 280 | 281 | .Format 282 | [source] 283 | { compound-list ; } 284 | 285 | .Captures 286 | [source, subs="+macros"] 287 | { <>, { <>* } } 288 | 289 | .Node 290 | [source, subs="+macros"] 291 | { 292 | type: 'BraceGroup', 293 | body: <>, 294 | redirs: [ <>* ] 295 | } 296 | 297 | 298 | ==== Subshell 299 | 300 | > Execute _compound-list_ in a subshell environment; see {uri-posix-scl}#tag_18_12[Shell Execution Environment]. 301 | > {uri-posix-scl}#tag_18_09_04[Read more…] 302 | 303 | .Format 304 | [source] 305 | ( compound-list ) 306 | 307 | .Captures 308 | [source, subs="+macros"] 309 | { <> | nil, { <>* } } 310 | 311 | .Node 312 | [source, subs="+macros"] 313 | { 314 | type: 'Subshell', 315 | body: <> | null, 316 | redirs: [ <>* ] 317 | } 318 | 319 | 320 | ==== If 321 | 322 | > The *if* command shall execute a _compound-list_ and use its exit status to determine whether to execute another _compound-list_. 323 | > {uri-posix-scl}#tag_18_09_04_07[Read more…] 324 | 325 | .Format 326 | [source] 327 | if compound-list 328 | then 329 | compound-list 330 | [elif compound-list 331 | then 332 | compound-list] ... 333 | [else 334 | compound-list] 335 | fi 336 | 337 | .Captures 338 | [source, subs="+macros"] 339 | If : { { IfClause, ElifClause*, ElseClause? }, { <>* } } 340 | IfClause : { <>, <> } 341 | ElifClause : { <>, <> } 342 | ElseClause : { <> } 343 | 344 | .Node 345 | [source, subs="+macros"] 346 | { 347 | type: 'If', 348 | clauses: [ IfClause, ElifClause*, ElseClause? ], 349 | redirs: [ <>* ] 350 | } 351 | { 352 | type: 'IfClause', 353 | cond: <>, 354 | body: <> 355 | } 356 | { 357 | type: 'ElifClause', 358 | cond: <>, 359 | body: <> 360 | } 361 | { 362 | type: 'ElseClause', 363 | body: <> 364 | } 365 | 366 | 367 | ==== For 368 | 369 | > The *for* loop shall execute a sequence of commands for each member in a list of _items_. 370 | > {uri-posix-scl}#tag_18_09_04_03[Read more…] 371 | 372 | .Format 373 | [source] 374 | for name [ in [word ... ]] 375 | do 376 | compound-list 377 | done 378 | 379 | .Captures 380 | [source, subs="+macros"] 381 | ---- 382 | { <>, { <>* }, <>, { <>* } } 383 | ---- 384 | <1> Variable name. 385 | <2> List of items to loop over. 386 | <3> Body of the for loop. 387 | <4> Redirections. 388 | 389 | .Node 390 | [source, subs="+macros"] 391 | { 392 | type: 'For', 393 | var: <>, 394 | items: [ <>* ], 395 | body: <>, 396 | redirs: [ <>* ] 397 | } 398 | 399 | 400 | ==== Case 401 | 402 | > The conditional construct *case* shall execute the _compound-list_ corresponding to the first one of several _patterns_ (see {uri-posix-scl}#tag_18_13[Pattern Matching Notation]) that is matched by the string resulting from the tilde expansion, parameter expansion, command substitution, arithmetic expansion, and quote removal of the given word. 403 | > {uri-posix-scl}#tag_18_09_04_05[Read more…] 404 | 405 | .Format 406 | [source] 407 | case word in 408 | [(] pattern1 ) compound-list ;; 409 | [[(] pattern[ | pattern] ... ) compound-list ;;] ... 410 | [[(] pattern[ | pattern] ... ) compound-list] 411 | esac 412 | 413 | .Captures 414 | [source, subs="+macros"] 415 | Case : { <>, { CaseItem* }, { <>* } } 416 | CaseItem : { { <>+ }, <> } 417 | 418 | .Node 419 | [source, subs="+macros"] 420 | { 421 | type: 'Case', 422 | var: Name, 423 | cases: [ CaseItem* ], 424 | redirs: [ <>* ] 425 | } 426 | { 427 | type: 'CaseItem', 428 | pattern: [ <>+ ], 429 | body: <> 430 | } 431 | 432 | 433 | ==== While 434 | 435 | > The *while* loop shall continuously execute one _compound-list_ as long as another _compound-list_ has a zero exit status. 436 | > {uri-posix-scl}#tag_18_09_04_09[Read more…] 437 | 438 | .Format 439 | [source] 440 | while compound-list-1 441 | do 442 | compound-list-2 443 | done 444 | 445 | .Captures 446 | [source, subs="+macros"] 447 | { <>, { <>* } } 448 | 449 | .Node 450 | [source, subs="+macros"] 451 | { 452 | type: 'While', 453 | cond: <>, 454 | body: <>, 455 | redirs: [ <>* ] 456 | } 457 | 458 | 459 | ==== Until 460 | 461 | > The **until** loop shall continuously execute one _compound-list_ as long as another _compound-list_ has a non-zero exit status. 462 | > {uri-posix-scl}#tag_18_09_04_11[Read more…] 463 | 464 | .Format 465 | [source] 466 | until compound-list-1 467 | do 468 | compound-list-2 469 | done 470 | 471 | .Captures 472 | [source, subs="+macros"] 473 | { <>, { <>* } } 474 | 475 | .Node 476 | [source, subs="+macros"] 477 | { 478 | type: 'Until', 479 | cond: <>, 480 | body: <>, 481 | redirs: [ <>* ] 482 | } 483 | 484 | 485 | === FunctionDef 486 | 487 | > A function is a user-defined name that is used as a simple command to call a compound command with new positional parameters. 488 | > {uri-posix-scl}#tag_18_09_05[Read more…] 489 | 490 | 491 | .Format 492 | [source] 493 | fname ( ) compound-command [io-redirect ...] 494 | 495 | .Captures 496 | [source, subs="+macros"] 497 | { <>, <>, { <>* } } 498 | 499 | .Node 500 | [source, subs="+macros"] 501 | { 502 | type: 'FunctionDef', 503 | name: <>, 504 | body: <>, 505 | redirs: [ <>* ] 506 | } 507 | 508 | 509 | == Redirect 510 | 511 | > Redirection is used to open and close files for the current shell execution environment (see {uri-posix-scl}#tag_18_12[Shell Execution Environment]) or for any command. 512 | > {uri-posix-scl}#tag_18_07[Read more…] 513 | 514 | 515 | === RedirectFile 516 | 517 | .Format 518 | [source] 519 | [n]redir-file-op word 520 | 521 | .Captures 522 | [source, subs="+macros"] 523 | ---- 524 | { number | nil, redir-file-op, <> } 525 | ---- 526 | <1> File descriptor number (or _nil_ if not specified). 527 | <2> Operator. 528 | <3> File path, or file descriptor number. 529 | 530 | .Node 531 | [source, subs="+macros"] 532 | { 533 | type: 'RedirectFile', 534 | fd: number | null, 535 | op: redir-file-op, 536 | file: <> 537 | } 538 | 539 | .Terminals 540 | [source] 541 | redir-file-op: '<' | '<&' | '>' | '>|' | '>>' | '>&' | '<>' 542 | 543 | 544 | === RedirectHereDoc 545 | 546 | > The redirection operators `<<` and `<\<-` both allow redirection of subsequent lines read by the shell to the input of a command. 547 | > The redirected lines are known as a “here-document”. 548 | > {uri-posix-scl}#tag_18_07_04[Read more…] 549 | 550 | .Format 551 | [source] 552 | [n] <<[-] word 553 | here-document 554 | delimiter 555 | 556 | .Captures 557 | [source, subs="+macros"] 558 | ---- 559 | { number | nil, '<<' | '<<-', <>, <> (!), number } 560 | ---- 561 | <1> File descriptor number (or _nil_ if not specified). 562 | <2> Operator. 563 | <3> The delimiter word. 564 | <4> TODO 565 | <5> ID of this here-document. 566 | 567 | .Nodes 568 | [source, subs="+macros"] 569 | { 570 | type: 'RedirectHereDoc', 571 | fd: number | null, 572 | op: '<<' | '<<-', 573 | delimiter: <>, 574 | content: <> 575 | } 576 | 577 | 578 | == HereDocContent 579 | 580 | .Captures 581 | [source, subs="+macros"] 582 | ---- 583 | { { <>* }, number } 584 | ---- 585 | <1> File descriptor number (or _nil_ if not specified). 586 | <2> ID of this here-document. 587 | 588 | .Nodes 589 | [source, subs="+macros"] 590 | { 591 | type: 'HereDocContent', 592 | content: [ <>* ] 593 | } 594 | 595 | 596 | == Assignment 597 | 598 | .Format 599 | [source] 600 | varname=[value] 601 | 602 | .Captures 603 | [source, subs="+macros"] 604 | ---- 605 | { <>, <> | nil } 606 | ---- 607 | <1> Variable name. 608 | <2> Assigned value. 609 | 610 | .Nodes 611 | [source, subs="+macros"] 612 | { 613 | type: 'Assignment', 614 | name: <>, 615 | value: <> | null 616 | } 617 | 618 | 619 | == Name 620 | 621 | .Captures 622 | [source, subs="+macros"] 623 | { string } 624 | 625 | .Node 626 | [source, subs="+macros"] 627 | { 628 | type: 'Name', 629 | text: string 630 | } 631 | 632 | 633 | == Word 634 | 635 | .Captures 636 | [source, subs="+macros"] 637 | { ( string | <> )+ } 638 | 639 | .Node 640 | [source, subs="+macros"] 641 | { 642 | type: 'Word', 643 | content: [ ( string | <> )+ ] 644 | } 645 | 646 | 647 | === Expansion 648 | 649 | 650 | ==== ParameterExpansion 651 | 652 | See {uri-posix-scl}#tag_18_06_02[Parameter Expansion] for more information. 653 | 654 | .Format 655 | [source] 656 | ${[prefix-op] variable [infix-op [ word ]]} 657 | 658 | .Captures 659 | [source, subs="+macros"] 660 | { prefix-op?, param-name, ( infix-op, string )? } 661 | 662 | .Node 663 | [source, subs="+macros"] 664 | { 665 | type: 'ParameterExpansion', 666 | op_pre: prefix-op | null, 667 | param: string, 668 | op_in: infix-op | null, 669 | word: string | null 670 | } 671 | 672 | .Terminals 673 | [source] 674 | prefix-op: '#' 675 | infix-op: ':-' | '-' | ':=' | '=' | ':?' | '?' | ':+' | '+' | '%%' | '%' | '##' | '#' | ':' | '//' | '/' 676 | 677 | 678 | ==== ArithmeticExpansion 679 | 680 | > Arithmetic expansion provides a mechanism for evaluating an arithmetic expression and substituting its value. 681 | > {uri-posix-scl}#tag_18_06_04[Read more…] 682 | 683 | .Format 684 | [source] 685 | $((expression)) 686 | 687 | .Captures 688 | [source, subs="+macros"] 689 | { string } 690 | 691 | .Node 692 | [source, subs="+macros"] 693 | { 694 | type: 'ArithmeticExpansion', 695 | text: string 696 | } 697 | 698 | 699 | ==== CommandSubstitution 700 | 701 | > Command substitution allows the output of a command to be substituted in place of the command name itself. 702 | > {uri-posix-scl}#tag_18_06_03[Read more…] 703 | 704 | .Format 705 | [source] 706 | $(command) 707 | 708 | .Captures 709 | [source, subs="+macros"] 710 | { <>* } 711 | 712 | .Node 713 | [source, subs="+macros"] 714 | { 715 | type: 'CommandSubstitution', 716 | cmds: [ <>* ] 717 | } 718 | 719 | 720 | ==== CommandSubBackquote 721 | 722 | .Format 723 | [source] 724 | `command` 725 | 726 | .Captures 727 | [source, subs="+macros"] 728 | { string } 729 | 730 | .Node 731 | [source, subs="+macros"] 732 | { 733 | type: 'CommandSubBackquote', 734 | text: string 735 | } 736 | 737 | 738 | == Comment 739 | 740 | .Format 741 | [source] 742 | ---- 743 | # foo bar 744 | ---- 745 | 746 | .Captures 747 | [source, subs="+macros"] 748 | { string } 749 | 750 | .Node 751 | [source, subs="+macros"] 752 | { 753 | type: 'Comment', 754 | text: string 755 | } 756 | -------------------------------------------------------------------------------- /sh-parser/grammar.lua: -------------------------------------------------------------------------------- 1 | --------- 2 | -- LPeg grammar for POSIX shell 3 | 4 | local lpeg = require 'lpeg' 5 | local fun = require 'sh-parser.fun_ext' 6 | local lpeg_sugar = require 'sh-parser.lpeg_sugar' 7 | local utils = require 'sh-parser.utils' 8 | 9 | local build_grammar = lpeg_sugar.build_grammar 10 | local chain = fun.chain 11 | local extend = utils.extend 12 | local iter = fun.iter 13 | local op = fun.op 14 | local par = utils.partial 15 | local unshift = utils.unshift 16 | local values = utils.values 17 | 18 | local B = lpeg.B 19 | local C = lpeg.C 20 | local Carg = lpeg.Carg 21 | local Cb = lpeg.Cb 22 | local Cc = lpeg.Cc 23 | local Cf = lpeg.Cf 24 | local Cg = lpeg.Cg 25 | local Cp = lpeg.Cp 26 | local Cs = lpeg.Cs 27 | local P = lpeg.P 28 | local R = lpeg.R 29 | local S = lpeg.S 30 | 31 | 32 | -- Terminals 33 | local ALPHA = R('AZ', 'az') 34 | local ANY = P(1) 35 | local BOF = P(function(_, pos) return pos == 1 end) -- Beginning Of File 36 | local BQUOTE = P('`') 37 | local DIGIT = R('09') 38 | local DOLLAR = P('$') 39 | local DQUOTE = P('"') 40 | local EOF = P(-1) -- End Of File 41 | local EQUALS = P('=') 42 | local ESC = P('\\') -- escape character 43 | local HASH = P('#') 44 | local LBRACE = P('{') 45 | local LDPAREN = P('((') 46 | local LF = P('\n') 47 | local LPAREN = P('(') 48 | local RBRACE = P('}') 49 | local RDPAREN = P('))') 50 | local RPAREN = P(')') 51 | local SEMI = P(';') 52 | local SQUOTE = P("'") 53 | local WORD = R('AZ', 'az', '09') + P('_') 54 | local WSP = S(' \t') 55 | 56 | -- Shell operators containing single character. 57 | local operators1 = { 58 | AND_OP = '&', 59 | GREAT_OP = '>', 60 | LESS_OP = '<', 61 | LPAREN_OP = '(', 62 | PIPE_OP = '|', 63 | RPAREN_OP = ')', 64 | SEMI_OP = ';', 65 | } 66 | -- Shell operators containing more than one character. 67 | local operators2 = { 68 | AND_IF_OP = '&&', 69 | CLOBBER_OP = '>|', 70 | DGREAT_OP = '>>', 71 | DLESS_OP = '<<', 72 | DLESSDASH_OP = '<<-', 73 | DSEMI_OP = ';;', 74 | GREATAND_OP = '>&', 75 | LESSAND_OP = '<&', 76 | LESSGREAT_OP = '<>', 77 | OR_IF_OP = '||', 78 | } 79 | 80 | -- Shell reserved words. 81 | local reserved_words = { 82 | CASE = 'case', 83 | DO = 'do', 84 | DONE = 'done', 85 | ELIF = 'elif', 86 | ELSE = 'else', 87 | ESAC = 'esac', 88 | FI = 'fi', 89 | FOR = 'for', 90 | IF = 'if', 91 | THEN = 'then', 92 | UNTIL = 'until', 93 | WHILE = 'while', 94 | 95 | BANG_R = '!', 96 | IN = 'in', 97 | LBRACE_R = '{', 98 | RBRACE_R = '}', 99 | } 100 | 101 | -- Pattern for Special parameters. 102 | local SPECIAL_PARAM = S('@*#?-$!0') 103 | 104 | -- Pattern that matches any parameter expansion "operator" that may be used 105 | -- between and . 106 | local PARAM_EXP_OP = iter({ 107 | ':-', '-', ':=', '=', ':?', '?', ':+', '+', '%%', '%', '##', '#', -- POSIX 108 | ':', '//', '/' -- non-POSIX 109 | }):map(P):reduce(op.add, P(false)) 110 | 111 | -- Pattern that matches any character used in shell operators. 112 | local OPERATOR_CHARS = values(operators1):map(P):reduce(op.add, P(false)) 113 | 114 | -- XXX: is this correct? 115 | local WORD_BOUNDARY = S(' \t\n') + BOF + EOF + OPERATOR_CHARS 116 | 117 | local reserved_words_patt = iter(reserved_words) 118 | :map(function(k, v) return k, P(v) * #WORD_BOUNDARY end) 119 | 120 | -- Pattern that matches any shell reserved word. 121 | -- XXX: sort them? 122 | local RESERVED_WORD = values(reserved_words_patt):reduce(op.add, P(false)) 123 | 124 | -- Map of special terminal symbols (patterns). 125 | local terminals = chain( 126 | iter(operators1):map(function(k, v) 127 | -- Ensure that operator x does not match xx when xx is valid operator. 128 | return k, values(operators2):index_of(v..v) and P(v) * -P(v) or P(v) 129 | end), 130 | iter(operators2):map(function(k, v) 131 | return k, P(v) 132 | end), 133 | reserved_words_patt 134 | ):tomap() 135 | 136 | 137 | --- Creates a pattern that captures escaped `patt`. 138 | -- 139 | -- @tparam lpeg.Pattern patt The pattern to escape. 140 | -- @treturn lpeg.Pattern 141 | local function escaped (patt) 142 | return patt == LF 143 | and ESC * patt / '' -- produce empty capture 144 | or ESC / '' * patt -- omit escaping char from capture 145 | end 146 | 147 | --- Creates a pattern that captures any character, except the specified 148 | -- patterns when not preceded by the escape character. 149 | -- 150 | -- @usage any_except(P' ') --> escaped(P' ') + 1 - P' ' 151 | -- 152 | -- @tparam lpeg.Pattern ... The patterns to *not* capture. 153 | -- @treturn lpeg.Pattern 154 | local function any_except (...) 155 | local patts = iter({...}) 156 | return patts:map(escaped):reduce(op.add, P(false)) 157 | + patts:reduce(op.sub, ANY) 158 | end 159 | 160 | --- Transforms captures from *and\_or\_list* into left-associative tree of n-ary 161 | -- nodes *AndList* and *OrList*. 162 | -- 163 | -- This function is basically a workaround to create AST for left-associative 164 | -- operators with the same precedence - `&&` and `||`. 165 | -- 166 | -- @usage 167 | -- subject = "a && b && c || d || e && f" 168 | -- captures = { {a}, 2, "&&", {b}, 7, "&&", {c}, 12, "||", 169 | -- {d}, 17, "||", {e}, 22, "&&", {f}, 27 } 170 | -- 171 | -- capture_and_or(create_node, 1, captures, subject) --> Z 172 | -- ~> create_node("AndList", 1, {a, b, c}, 12) --> X 173 | -- ~> create_node("OrList", 1, {X, d, e}, 22) --> Y 174 | -- ~> create_node("AndList", 1, {Y, f}, 27) --> Z 175 | -- 176 | -- Z Y X 177 | -- (AndList (OrList (AndList a b c) d e) f) 178 | -- 179 | -- @tparam func create_node The function to be called to create AST nodes. 180 | -- @tparam int start_pos Index of the first character of the captured substring. 181 | -- @tparam table captures Table with shape `{table,int,string, table,int,string, ...}`. 182 | -- Element *i* is table of children nodes (pipeline and optional comments), 183 | -- *i + 1* is position of the end of the last child node (int), *i + 2* is 184 | -- operator ("&&", or "||"). 185 | -- @tparam string subject The entire subject (i.e. input text). 186 | -- @return Result of the last call of `create_node`. 187 | local function capture_and_or (create_node, start_pos, captures, subject) 188 | local node_name = { ['&&'] = 'AndList', ['||'] = 'OrList' } 189 | local node, last_op 190 | local children = {} 191 | 192 | for i=1, #captures, 3 do 193 | local caps, end_pos, next_op = captures[i], captures[i + 1], captures[i + 2] 194 | 195 | extend(children, caps) 196 | 197 | if last_op and last_op ~= next_op then 198 | local name = assert(node_name[last_op], 'invalid operator '..last_op) 199 | node = create_node(name, children, start_pos, end_pos, subject) 200 | children = { node } 201 | end 202 | last_op = next_op 203 | end 204 | 205 | return node 206 | end 207 | 208 | --- Predicate function that matches start of the here-document's content and 209 | -- returns the corresponding HereDocInfo table. 210 | -- 211 | -- This match-time capture function is called by the parser each time when 212 | -- a new line is consumed (see the rule *newline_list*). 213 | -- 214 | -- @usage 215 | -- Cg(Cmt(_heredocs_stack, find_heredoc), 'heredoc') 216 | -- 217 | -- @tparam string _ The entire subject (unused). 218 | -- @tparam int pos The current position. 219 | -- @tparam {HereDocInfo,...} heredocs (see `capture_heredoc`) 220 | -- @treturn[1] false (no match) 221 | -- @treturn[2] true (match) 222 | -- @treturn[2] HereDocInfo 223 | local function find_heredoc (_, pos, heredocs) 224 | -- Heredocs list is ordered from latest to earliest to optimize this lookup. 225 | for _, heredoc in ipairs(heredocs) do 226 | local cont_start = heredoc.cont_start 227 | 228 | if pos == cont_start then 229 | return true, heredoc 230 | elseif pos > cont_start then 231 | return false 232 | end 233 | end 234 | 235 | return false 236 | end 237 | 238 | --- Captures here-document redirection. 239 | -- 240 | -- @tparam bool strip_tabs Whether to strip leading tabs (for `<<-`). 241 | -- @tparam string subject The entire subject (i.e. input text). 242 | -- @tparam int pos The current position. 243 | -- @tparam string delimiter The captured delimiter string. 244 | -- @tparam bool quoted Is any character in the delimiter word quoted? 245 | -- @tparam {HereDocInfo,...} heredocs The list of parsed here-document 246 | -- redirections into which a new HereDocInfo will be added. 247 | -- @treturn true Match and do not consume any input. 248 | -- @treturn string The delimiter word. 249 | -- @treturn table A "placeholder" for future content. 250 | -- @treturn int ID of the here-document. 251 | -- @raise Error if here-document is not terminated. 252 | local function capture_heredoc (strip_tabs, subject, pos, delimiter, quoted, heredocs) 253 | local delim_pat = '\n'..(strip_tabs and '\t*' or '') 254 | ..delimiter:gsub('%p', '%%%1') -- escape puncatation chars 255 | ..'\n' 256 | 257 | local nl_pos = subject:find('\n', pos, true) 258 | local cont_start = nl_pos and nl_pos + 1 or #subject 259 | 260 | local cont_end, delim_end = (subject..'\n'):find(delim_pat, nl_pos or #subject) 261 | if not cont_end then 262 | -- This is somehow valid in shell implementations, but we can't parse it. 263 | -- Since this is most likely an error in the script, just raise an error. 264 | error(('%d: Here-document with delimiter "%s" is not terminated'):format(pos, delimiter)) 265 | end 266 | 267 | -- Skip overlapping heredocs (multiple heredoc redirects on the same line). 268 | while true do 269 | local _, entry = find_heredoc(nil, cont_start, heredocs) 270 | if entry then 271 | cont_start = entry.delim_end + 1 272 | else 273 | break 274 | end 275 | end 276 | 277 | local content = {} 278 | local id = #heredocs + 1 279 | 280 | --- @table HereDocInfo 281 | local heredoc = { 282 | id = id, -- int: ID of this heredoc. 283 | cont_start = cont_start, -- int: Position of the first character of the heredoc's content. 284 | cont_end = cont_end, -- int: Position of trailing newline of the heredoc's content. 285 | -- It's `cont_start - 1` if there's no content (not even blank line)! 286 | delim_end = delim_end, -- int: Position of a newline after the closing delimiter. 287 | quoted = quoted, -- bool: false if word expansions should be parsed, true otherwise. 288 | content = content, -- table: An empty table that will be mutated into *HereDocContent*. 289 | } 290 | unshift(heredocs, heredoc) 291 | 292 | return true, delimiter, content, id 293 | end 294 | 295 | --- Predicate function that matches when the `pos` is inside the here-document 296 | -- specified by the `heredoc`. It does not consume any input. 297 | -- 298 | -- @usage 299 | -- Cg(Cmt(_heredocs_stack, find_heredoc), 'heredoc') 300 | -- Cmt(Cb'heredoc', inside_heredoc) 301 | -- 302 | -- @tparam string _ The entire subject (unused). 303 | -- @tparam int pos The current position. 304 | -- @tparam HereDocInfo heredoc (see `capture_heredoc`) 305 | -- @treturn bool Whether the `pos` is inside the here-document. 306 | local function inside_heredoc (_, pos, heredoc) 307 | local cont_end = heredoc.cont_end 308 | 309 | if pos == heredoc.cont_start then 310 | return true 311 | end 312 | 313 | if pos > cont_end then 314 | return false 315 | elseif pos >= heredoc.cont_start then 316 | return true 317 | else 318 | return false 319 | end 320 | end 321 | 322 | --- Captures content of the specified quoted or empty here-document. 323 | -- 324 | -- This function is used as a match-time capture to match and capture content 325 | -- of a here-document with quoted delimiter (which means that the content is 326 | -- not expanded). 327 | -- 328 | -- @usage 329 | -- Cg(Cmt(_heredocs_stack, find_heredoc), 'heredoc') 330 | -- Cmt(Cb'heredoc', capture_nonexp_heredoc) 331 | -- 332 | -- @tparam string subject The entire subject (i.e. input text). 333 | -- @tparam int pos The current position. 334 | -- @tparam HereDocInfo heredoc (see `capture_heredoc`) 335 | -- @treturn[1] boolean true to match without consuming any input, 336 | -- false to not match. 337 | -- @treturn[2] int A new position (at the end of the content). 338 | -- @treturn[2] string The here-document's content. 339 | local function capture_nonexp_heredoc (subject, pos, heredoc) 340 | local cont_end = heredoc.cont_end 341 | 342 | -- If there's no content (e.g. `<