├── test ├── tests │ ├── tilde_arith │ ├── redir_indirect │ ├── diverge.sh │ ├── escaping │ ├── test.sh │ ├── backslash │ ├── single_quoted_dollar_sign.sh │ ├── empty_case │ ├── nested_shell_in_subshell.sh │ ├── builtin.trap.exitcode.test │ ├── for_spaces.sh │ ├── braces_amp.sh │ ├── grab_submissions.sh │ ├── send_emails.sh │ ├── weird_tilde.sh │ ├── aaaa │ ├── aaaa_single │ ├── run_grader.sh │ ├── grade.sh │ ├── run_lda.sh │ └── syntax ├── failing │ ├── array.sh │ ├── test-exclam.sh │ ├── safe6.sh │ ├── superoptimize.sh │ ├── invalidate1.sh │ ├── invalidate0.sh │ ├── heredoc2.sh │ ├── incomplete-arith.sh │ ├── test-shlex-aux.sh │ ├── pay_respects.sh │ ├── run_parser_on_scripts.sh │ ├── async.sh │ ├── 3.resiz.sh │ ├── split_pipe.sh │ ├── 1.tomp3.sh │ ├── remote_exec_graph.sh │ ├── quickcheck.sh │ ├── wc.2.sh │ ├── mk_meta.sh │ ├── split-unix50.sh │ ├── sieve.sh │ ├── append_nl_if_not.sh │ ├── unzip-1.sh │ ├── buggy_comm_script.sh │ ├── timing-JSON.sh │ ├── pash_set_from_to.sh │ ├── multiply.sh │ ├── exit_error.sh │ ├── process-aliases.sh │ ├── bio3.sh │ ├── dgsh_tee.sh │ ├── pash_source_declare_vars.sh │ ├── maximal.sh │ ├── auto-split.sh │ ├── run_alias.sh │ ├── timing.sh │ ├── up.sh │ ├── sq.sh │ ├── with-ec2.sh │ ├── statistics.sh │ ├── bio.sh │ ├── test_rt_py.sh │ ├── test_ast2shell_py.sh │ ├── test_rt.sh │ └── test_JSON_to_shell2.sh ├── pash_tests │ ├── unsafe0.sh │ ├── unsafe1.sh │ ├── safe0.sh │ ├── safe1.sh │ ├── set-v.sh │ ├── cat-redir-fail.sh │ ├── echo_args.sh │ ├── heredoc1.sh │ ├── safe3.sh │ ├── safe4.sh │ ├── args_with_spaces.sh │ ├── concat.sh │ ├── expand-u-positional.sh │ ├── redirect_wrapper.sh │ ├── safe2.sh │ ├── unsafe2.sh │ ├── grep.sh │ ├── no_in_script.sh │ ├── redirect.sh │ ├── star-escape.sh │ ├── add.sh │ ├── cmd_sbst_subscript.sh │ ├── deadlock_test.sh │ ├── expand-u.sh │ ├── diverge.sh │ ├── export_var_script.sh │ ├── loop1.sh │ ├── sort-opt_env.sh │ ├── sort_env.sh │ ├── test.sh │ ├── wf_env_test.sh │ ├── bigrams_env_test.sh │ ├── grep_env_test.sh │ ├── micro_10_env_test.sh │ ├── minimal_sort.sh │ ├── sort_env_small.sh │ ├── sort_env_test.sh │ ├── alt_bigrams_env_test.sh │ ├── diff_env_test.sh │ ├── double_sort.sh │ ├── micro_1000_env_test.sh │ ├── minimal_grep_env_test.sh │ ├── minimal_sort_env_test.sh │ ├── set-dash-v-x.sh │ ├── deadlock_test_env_test.sh │ ├── double_sort_env_test.sh │ ├── set-diff_env_test.sh │ ├── sort-opt.sh │ ├── uniq.sh │ ├── comm-par-test2_env_test.sh │ ├── comm-par-test_env_test.sh │ ├── minimal_grep_stdin.sh │ ├── set-e-2.sh │ ├── shortest_scripts_env_test.sh │ ├── topn_env_test.sh │ ├── call_distrib_planner_example.sh │ ├── escape-madness.sh │ ├── for_loop_simple.sh │ ├── head.sh │ ├── tilde.sh │ ├── wc.sh │ ├── remote_read.sh │ ├── tail.sh │ ├── dfs_split_reader.sh │ ├── exit_code.sh │ ├── for_loop_simple_env_test.sh │ ├── remote_write.sh │ ├── for-echo.sh │ ├── cat_output_files.sh │ ├── var_assgn.sh │ ├── count.sh │ ├── safe7.sh │ ├── distrotest_env.sh │ ├── safe5.sh │ ├── exec-redirections.sh │ ├── topn.sh │ ├── spell_env_test.sh │ ├── autogen.sh │ ├── trap.sh │ ├── for_spaces.sh │ ├── redirect_stdin_to.sh │ ├── set_bug.sh │ ├── sort.sh │ ├── longest-man.sh │ ├── max-temp-process.sh │ ├── readonly.sh │ ├── archive.sh │ ├── count_packets.sh │ ├── for-loop.sh │ ├── install-deps.sh │ ├── redir-var-test.sh │ ├── sort-sort.sh │ ├── pretty_print_json.sh │ ├── sine.sh │ ├── 1.sh │ ├── braces_amp.sh │ ├── pash_ptempfile_name.sh │ ├── temp_test.sh │ ├── 2.sh │ ├── 18.sh │ ├── 24.sh │ ├── 3.sh │ ├── 5.sh │ ├── 7.sh │ ├── eager-no-task-par.sh │ ├── remove_adapter.sh │ ├── micro_10.sh │ ├── 27.sh │ ├── 4.sh │ ├── bam_to_sam.sh │ ├── get_hash.sh │ ├── head_deadlock.sh │ ├── 13.sh │ ├── 15.sh │ ├── 29.sh │ ├── 22.sh │ ├── 25.sh │ ├── 8.sh │ ├── 8.varlog.sh │ ├── 35.sh │ ├── alt_bigrams.sh │ ├── gen_pl.sh │ ├── grep-test.sh │ ├── set.sh │ ├── nfa-regex.sh │ ├── 21.sh │ ├── search.sh │ ├── 33.sh │ ├── comm-par-test.sh │ ├── head_deadlock_fixed_2.sh │ ├── 32.sh │ ├── comm-par-test2.sh │ ├── 17.sh │ ├── 6.sh │ ├── head_deadlock_fixed.sh │ ├── wf.sh │ ├── 2.unrtf.sh │ ├── 20.sh │ ├── 23.sh │ ├── 11.sh │ ├── 16.sh │ ├── 26.sh │ ├── 31.sh │ ├── 34.sh │ ├── 9.sh │ ├── top-n.sh │ ├── 36.sh │ ├── hello-world.sh │ ├── 19.sh │ ├── 30.sh │ ├── cmd_sbst.sh │ ├── incr.sh │ ├── merge-wc.sh │ ├── trim_primers.sh │ ├── 12.sh │ ├── eager_test.sh │ ├── fun-def.sh │ ├── newline_in_var.sh │ ├── ann-agg.sh │ ├── bi-grams.sh │ ├── shortest_scripts.sh │ ├── 28.sh │ ├── setup.sh │ ├── 10.sh │ ├── grab_submissions.sh │ ├── p2.sh │ ├── drain_stream.sh │ ├── encrypt_files.sh │ ├── ldconfig.sh │ ├── demo-spell.sh │ ├── 14.sh │ ├── get_type_count.sh │ ├── merge-uniq.sh │ ├── ann-agg-2.sh │ ├── convert_to_fast.sh │ ├── pash_declare_vars.sh │ ├── max-temp-preprocess.sh │ ├── send_emails.sh │ ├── sed-test.sh │ ├── 7.nginx.sh │ ├── circus.sh │ ├── compress_files.sh │ ├── img_convert.sh │ ├── buggy_non_newline_input.sh │ ├── clone_compress_repo.sh │ ├── 4.gitkernel.sh │ ├── 5.apachelog.sh │ ├── page-per-line.sh │ ├── 1_1.sh │ ├── 7_1.sh │ ├── 3_1.sh │ ├── 6_1_1.sh │ ├── 3_2.sh │ ├── 2_2.sh │ ├── 6_1_2.sh │ ├── 6_3.sh │ ├── 3_3.sh │ ├── mk_dot_install.sh │ ├── 7_2.sh │ ├── alt_bigrams_funs.sh │ ├── 2_1.sh │ ├── 6_4.sh │ ├── 6_5.sh │ ├── 8.2_1.sh │ ├── max-temp.sh │ ├── set-e.sh │ ├── tee_web_index_bug.sh │ ├── compile.sh │ ├── p1.sh │ ├── unparsing-special-chars.sh │ ├── set-diff.sh │ ├── symtab-sha.sh │ ├── page-count.sh │ ├── parse.sh │ ├── test-common.sh │ ├── diff.sh │ ├── run_grader.sh │ ├── to_mp3.sh │ ├── innefficient_auto_split.sh │ ├── proginf.sh │ ├── 6_2.sh │ ├── uniq-c.2.sh │ ├── round_trip.sh │ ├── run_tests.sh │ ├── pcap_bench.sh │ ├── trigrams.sh │ ├── spell-grep.sh │ ├── 4_3.sh │ ├── make-ec2.sh │ ├── 8.3_2.sh │ ├── 8_1.sh │ ├── wrap_cat.sh │ ├── 6_7.sh │ ├── set-e-3.sh │ ├── tr_cs_wc_test.sh │ ├── 4_3b.sh │ ├── 6.msg.sh │ ├── identity.sh │ ├── 8.3_3.sh │ ├── eager.sh │ ├── spell.sh │ ├── get-summary.sh │ ├── tr-test.sh │ ├── r-wc.sh │ ├── 8.2_2.sh │ ├── temp-analytics.sh │ ├── grep_f_script.sh │ ├── pcap.sh │ ├── split_pcap.sh │ ├── update-img.sh │ ├── suggest-ec2.sh │ ├── dgsh-wc.sh │ ├── 6_1.sh │ ├── distrotest_funs.sh │ ├── tailprogs.sh │ ├── dgsh-raw-sort.sh │ ├── raw-r-sort.sh │ ├── worker.sh │ ├── grade.sh │ ├── generate_single_chrom.sh │ ├── pash_runtime_shell_to_pash.sh │ ├── dgsh-sort.sh │ ├── r-bell_grep.sh │ ├── pkg.sh │ ├── r-minimal_grep.sh │ ├── r-sort.sh │ ├── run_lda.sh │ ├── pacaur.sh │ ├── bell_grep.sh │ ├── head_deadlock_fixed3.sh │ ├── nginx.sh │ ├── minimal_grep.sh │ ├── test1.sh │ ├── wait_for_output_and_sigpipe_rest.sh │ ├── get_results.sh │ ├── bio4.sh │ ├── run_all_benchmarks.sh │ └── bigrams.sh ├── .gitignore ├── README.md ├── Makefile ├── round_trip.sh └── test_ocaml_python.sh ├── src ├── bltin │ ├── .gitignore │ └── times.c └── .gitignore ├── libdash ├── .gitignore └── __init__.py ├── python ├── .gitignore ├── Makefile ├── rt.py └── LICENSE ├── Makefile.am ├── ChangeLog.O ├── dune-workspace ├── autogen.sh ├── ocaml ├── .gitignore ├── mk_meta.sh ├── rt.sh ├── json_to_shell.ml ├── LICENSE ├── shell_to_json.ml ├── ast.mli └── function_description.ml ├── TODO.md ├── ldconfig.sh ├── version.sh ├── dune-project ├── mk_dot_install.sh ├── MANIFEST.in ├── pyproject.toml ├── libdash.opam ├── .gitignore ├── .travis.yml ├── dune ├── .dockerignore └── setup.py /test/tests/tilde_arith: -------------------------------------------------------------------------------- 1 | echo $((~10)) 2 | -------------------------------------------------------------------------------- /src/bltin/.gitignore: -------------------------------------------------------------------------------- 1 | .deps 2 | .dirstamp 3 | -------------------------------------------------------------------------------- /test/failing/array.sh: -------------------------------------------------------------------------------- 1 | p=${cmd_array[$i]} 2 | -------------------------------------------------------------------------------- /test/pash_tests/unsafe0.sh: -------------------------------------------------------------------------------- 1 | echo ${x=uhoh} 2 | -------------------------------------------------------------------------------- /test/pash_tests/unsafe1.sh: -------------------------------------------------------------------------------- 1 | echo $((x=2)) 2 | -------------------------------------------------------------------------------- /libdash/.gitignore: -------------------------------------------------------------------------------- 1 | libdash.dylib 2 | libdash.so 3 | -------------------------------------------------------------------------------- /test/pash_tests/safe0.sh: -------------------------------------------------------------------------------- 1 | echo nothing to expand 2 | -------------------------------------------------------------------------------- /test/pash_tests/safe1.sh: -------------------------------------------------------------------------------- 1 | echo ~ is always safe 2 | -------------------------------------------------------------------------------- /test/pash_tests/set-v.sh: -------------------------------------------------------------------------------- 1 | set -v 2 | echo hello 3 | -------------------------------------------------------------------------------- /test/tests/redir_indirect: -------------------------------------------------------------------------------- 1 | x=1; echo msg 2>&$x 2 | -------------------------------------------------------------------------------- /test/pash_tests/cat-redir-fail.sh: -------------------------------------------------------------------------------- 1 | cat < no.such.file 2 | -------------------------------------------------------------------------------- /test/pash_tests/echo_args.sh: -------------------------------------------------------------------------------- 1 | echo "$# $@" 2 | echo $0 3 | -------------------------------------------------------------------------------- /test/pash_tests/heredoc1.sh: -------------------------------------------------------------------------------- 1 | cat << foo 2 | line one 3 | foo -------------------------------------------------------------------------------- /test/pash_tests/safe3.sh: -------------------------------------------------------------------------------- 1 | echo $((2 + 2)) = 4, safely 2 | -------------------------------------------------------------------------------- /test/pash_tests/safe4.sh: -------------------------------------------------------------------------------- 1 | echo ${PWD} is fine to show 2 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dylib 4 | python.log 5 | -------------------------------------------------------------------------------- /test/failing/test-exclam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "!" 3 | 4 | -------------------------------------------------------------------------------- /test/pash_tests/args_with_spaces.sh: -------------------------------------------------------------------------------- 1 | echo $1 2 | echo $2 3 | -------------------------------------------------------------------------------- /test/pash_tests/concat.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cat "$@" 3 | -------------------------------------------------------------------------------- /test/pash_tests/expand-u-positional.sh: -------------------------------------------------------------------------------- 1 | set -u 2 | echo $1 3 | -------------------------------------------------------------------------------- /test/pash_tests/redirect_wrapper.sh: -------------------------------------------------------------------------------- 1 | exec $1 redirect.sh 9>&1 -------------------------------------------------------------------------------- /test/pash_tests/safe2.sh: -------------------------------------------------------------------------------- 1 | echo "quoting safe stuff is safe" 2 | -------------------------------------------------------------------------------- /test/pash_tests/unsafe2.sh: -------------------------------------------------------------------------------- 1 | echo ${nonesuch?uhoh} is unsafe 2 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS=-I m4 2 | 3 | SUBDIRS = src 4 | 5 | -------------------------------------------------------------------------------- /test/pash_tests/grep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat $IN | grep 'the' 3 | -------------------------------------------------------------------------------- /test/pash_tests/no_in_script.sh: -------------------------------------------------------------------------------- 1 | N=100 2 | seq 1 $N | sort -rn 3 | -------------------------------------------------------------------------------- /test/pash_tests/redirect.sh: -------------------------------------------------------------------------------- 1 | echo hello 1>&9 2 | # ls -laL /dev/fd -------------------------------------------------------------------------------- /test/pash_tests/star-escape.sh: -------------------------------------------------------------------------------- 1 | x=$(echo "*" '*' \*); echo "$x" 2 | -------------------------------------------------------------------------------- /ChangeLog.O: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binpash/libdash/HEAD/ChangeLog.O -------------------------------------------------------------------------------- /test/failing/safe6.sh: -------------------------------------------------------------------------------- 1 | x=5 ; { x=6 ; echo $x; } | { x=7; echo $x; } 2 | -------------------------------------------------------------------------------- /test/pash_tests/add.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | paste -d+ "$@" | bc 3 | -------------------------------------------------------------------------------- /test/pash_tests/cmd_sbst_subscript.sh: -------------------------------------------------------------------------------- 1 | echo 'eval echo $?' 2 | exit 123 -------------------------------------------------------------------------------- /test/pash_tests/deadlock_test.sh: -------------------------------------------------------------------------------- 1 | cat $IN | tr A-Z a-z | head -n 1 2 | -------------------------------------------------------------------------------- /test/pash_tests/expand-u.sh: -------------------------------------------------------------------------------- 1 | unset foobar 2 | set -u 3 | echo ${foobar} -------------------------------------------------------------------------------- /test/tests/diverge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while true; do true; done 4 | -------------------------------------------------------------------------------- /test/tests/escaping: -------------------------------------------------------------------------------- 1 | ${x=;|&!~*\}\{()\$\' "this is a \"quoted\" string"} -------------------------------------------------------------------------------- /test/tests/test.sh: -------------------------------------------------------------------------------- 1 | # this is a comment 2 | 3 | ls 4 | cd .. 5 | 6 | -------------------------------------------------------------------------------- /test/pash_tests/diverge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while true; do true; done 4 | -------------------------------------------------------------------------------- /test/pash_tests/export_var_script.sh: -------------------------------------------------------------------------------- 1 | export N=1000 2 | seq 1 $N | sort -rn 3 | -------------------------------------------------------------------------------- /test/pash_tests/loop1.sh: -------------------------------------------------------------------------------- 1 | for idFor1 in A B ; do 2 | echo $idFor1 3 | done 4 | -------------------------------------------------------------------------------- /test/pash_tests/sort-opt_env.sh: -------------------------------------------------------------------------------- 1 | IN=../evaluation/scripts/input/10G.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/sort_env.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/scripts/input/10G.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/test.sh: -------------------------------------------------------------------------------- 1 | # this is a comment 2 | 3 | ls 4 | cd .. 5 | 6 | -------------------------------------------------------------------------------- /test/pash_tests/wf_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/tests/backslash: -------------------------------------------------------------------------------- 1 | printf %s\\n foobar\|\&\;\<\>\(\)\$\`\\\"\'\ \?\*\[\ 2 | -------------------------------------------------------------------------------- /test/pash_tests/bigrams_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/grep_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/micro_10_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/1M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/minimal_sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat $IN | tr A-Z a-z | sort 3 | -------------------------------------------------------------------------------- /test/pash_tests/sort_env_small.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/scripts/input/1G.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/sort_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /libdash/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import parse 2 | from .printer import to_string 3 | -------------------------------------------------------------------------------- /test/failing/superoptimize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Superotmizer run!" 4 | 5 | -------------------------------------------------------------------------------- /test/pash_tests/alt_bigrams_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/diff_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | 3 | -------------------------------------------------------------------------------- /test/pash_tests/double_sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat $IN | tr A-Z a-z | sort | sort -r 3 | -------------------------------------------------------------------------------- /test/pash_tests/micro_1000_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/1M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/minimal_grep_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/1M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/minimal_sort_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/set-dash-v-x.sh: -------------------------------------------------------------------------------- 1 | set - 2 | echo hello 3 | echo $# $1 $2 $3 $4 $5 4 | 5 | -------------------------------------------------------------------------------- /test/pash_tests/deadlock_test_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/double_sort_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | 3 | -------------------------------------------------------------------------------- /test/pash_tests/set-diff_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | 3 | -------------------------------------------------------------------------------- /test/pash_tests/sort-opt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sort --buffer-size=30% --parallel=$1 $IN 4 | -------------------------------------------------------------------------------- /test/pash_tests/uniq.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # simply rerun uniq 3 | cat "$@" | uniq 4 | -------------------------------------------------------------------------------- /dune-workspace: -------------------------------------------------------------------------------- 1 | (lang dune 3.12) 2 | (env 3 | (dev 4 | (flags (:standard -warn-error -27)))) -------------------------------------------------------------------------------- /test/pash_tests/comm-par-test2_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | 3 | -------------------------------------------------------------------------------- /test/pash_tests/comm-par-test_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | 3 | -------------------------------------------------------------------------------- /test/pash_tests/minimal_grep_stdin.sh: -------------------------------------------------------------------------------- 1 | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' 2 | -------------------------------------------------------------------------------- /test/pash_tests/set-e-2.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | ( { false; } 3 | { echo one; } ) | cat 4 | echo two 5 | -------------------------------------------------------------------------------- /test/pash_tests/shortest_scripts_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/all_cmds.txt 2 | -------------------------------------------------------------------------------- /test/pash_tests/topn_env_test.sh: -------------------------------------------------------------------------------- 1 | N=1000 2 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 3 | 4 | -------------------------------------------------------------------------------- /test/pash_tests/call_distrib_planner_example.sh: -------------------------------------------------------------------------------- 1 | python3 distr_plan.py "/tmp/dish_temp_ir_file0" 2 | -------------------------------------------------------------------------------- /test/pash_tests/escape-madness.sh: -------------------------------------------------------------------------------- 1 | echo "$(echo *)" 2 | echo "$(echo "*")" 3 | echo "${unset-*}" 4 | -------------------------------------------------------------------------------- /test/pash_tests/for_loop_simple.sh: -------------------------------------------------------------------------------- 1 | for _ in $times; do 2 | cat $IN | tr A-Z a-z | sort 3 | done 4 | -------------------------------------------------------------------------------- /test/pash_tests/head.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # FIXME missing head parameters 3 | 4 | cat "${1}" 5 | -------------------------------------------------------------------------------- /test/pash_tests/tilde.sh: -------------------------------------------------------------------------------- 1 | HOME='abc xyz' 2 | printf '%s\n' ~ 3 | HOME='test.*' 4 | printf '%s\n' ~ -------------------------------------------------------------------------------- /test/pash_tests/wc.sh: -------------------------------------------------------------------------------- 1 | # IN=/home/ubuntu/pash/evaluation/scripts/input/10M.txt 2 | 3 | cat $IN | wc -------------------------------------------------------------------------------- /test/tests/single_quoted_dollar_sign.sh: -------------------------------------------------------------------------------- 1 | # TEST: single roundtrip 2 | 3 | echo '$1' 4 | echo $ a 5 | -------------------------------------------------------------------------------- /test/pash_tests/remote_read.sh: -------------------------------------------------------------------------------- 1 | "$PASH_TOP/runtime/dspash/file_reader/datastream_client" --type read "$@" 2 | -------------------------------------------------------------------------------- /test/pash_tests/tail.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # FIXME missing tail parameters 3 | 4 | cat "${@: -1}" 5 | -------------------------------------------------------------------------------- /test/tests/empty_case: -------------------------------------------------------------------------------- 1 | case foo in 2 | bar) 3 | ;; 4 | *) 5 | echo hi;; 6 | esac -------------------------------------------------------------------------------- /test/pash_tests/dfs_split_reader.sh: -------------------------------------------------------------------------------- 1 | "$PASH_TOP/runtime/dspash/file_reader/dfs_split_reader" --config "$@" 2 | -------------------------------------------------------------------------------- /test/pash_tests/exit_code.sh: -------------------------------------------------------------------------------- 1 | if read -r && read -r 2 | then 3 | exit 1 4 | fi < log_results/failed.log 5 | -------------------------------------------------------------------------------- /test/pash_tests/for_loop_simple_env_test.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 2 | times="1 2 3" 3 | -------------------------------------------------------------------------------- /test/pash_tests/remote_write.sh: -------------------------------------------------------------------------------- 1 | "$PASH_TOP"/runtime/dspash/file_reader/datastream_client --type write "$@" 2 | -------------------------------------------------------------------------------- /test/pash_tests/for-echo.sh: -------------------------------------------------------------------------------- 1 | N=${N:-100} 2 | for i in $(seq $N) 3 | do 4 | echo $i 5 | done 6 | echo "end" 7 | -------------------------------------------------------------------------------- /test/tests/nested_shell_in_subshell.sh: -------------------------------------------------------------------------------- 1 | ( (echo abc) ) 2 | echo $( (echo abc) ) 3 | echo `(echo abc)` 4 | echo $() 5 | -------------------------------------------------------------------------------- /test/pash_tests/cat_output_files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | distr_output_dir=$1 4 | 5 | cat "$distr_output_dir"/* 6 | -------------------------------------------------------------------------------- /test/pash_tests/var_assgn.sh: -------------------------------------------------------------------------------- 1 | echo "foobar: ${FOOBAR}" 2 | echo "foobar: ${FOOBAR:=baz}" 3 | echo "foobar: ${FOOBAR}" 4 | -------------------------------------------------------------------------------- /test/failing/invalidate1.sh: -------------------------------------------------------------------------------- 1 | : ${x=a}; echo $x;\ 2 | for x in b c d 3 | do 4 | echo $x 5 | done;\ 6 | x=e;\ 7 | echo $x 8 | -------------------------------------------------------------------------------- /test/pash_tests/count.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$@" 3 | -------------------------------------------------------------------------------- /test/pash_tests/safe7.sh: -------------------------------------------------------------------------------- 1 | echo ${#PWD} is also cool, as is ${x-default} and "${x+alt}" and ${x=set now} and ${x?won\'t run} 2 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | libtoolize \ 4 | && aclocal \ 5 | && autoheader \ 6 | && automake --add-missing \ 7 | && autoconf 8 | -------------------------------------------------------------------------------- /test/failing/invalidate0.sh: -------------------------------------------------------------------------------- 1 | : ${x=a}; echo $x;\ 2 | for x in b c d 3 | do 4 | echo $x 5 | done;\ 6 | : ${x=e};\ 7 | echo $x 8 | -------------------------------------------------------------------------------- /test/pash_tests/distrotest_env.sh: -------------------------------------------------------------------------------- 1 | SHELLCHECK_DIR=/home/nikos/shellcheck/ 2 | IN=../evaluation/usecases/shellcheck/temp_input.txt 3 | -------------------------------------------------------------------------------- /test/pash_tests/safe5.sh: -------------------------------------------------------------------------------- 1 | echo ${#PWD} is also cool, as is ${x-default} and ${x+alt} and and ${x%%a*} ${x%a*} ${x#a*} ${x##a*} 2 | -------------------------------------------------------------------------------- /test/pash_tests/exec-redirections.sh: -------------------------------------------------------------------------------- 1 | exec < exec-redirections.in > exec-redirections.out 2> exec-redirections.err 2 | touch 3 | cat 4 | -------------------------------------------------------------------------------- /test/pash_tests/topn.sh: -------------------------------------------------------------------------------- 1 | # Top-N (1000) terms 2 | cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed ${N}q 3 | -------------------------------------------------------------------------------- /test/pash_tests/spell_env_test.sh: -------------------------------------------------------------------------------- 1 | dict=$PASH_TOP/evaluation/tests/input/sorted_words 2 | IN=$PASH_TOP/evaluation/tests/input/10M.txt 3 | 4 | -------------------------------------------------------------------------------- /test/failing/heredoc2.sh: -------------------------------------------------------------------------------- 1 | for i in '#' 2 | do 3 | cat << EOF 4 | x=\`printf '%s' \\$i\`; printf '%s\\n' "\$x" 5 | EOF 6 | done 7 | -------------------------------------------------------------------------------- /test/pash_tests/autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | libtoolize \ 4 | && aclocal \ 5 | && autoheader \ 6 | && automake --add-missing \ 7 | && autoconf 8 | -------------------------------------------------------------------------------- /test/pash_tests/trap.sh: -------------------------------------------------------------------------------- 1 | myfunction() 2 | { 3 | echo myfunction invoked 4 | } 5 | trap myfunction EXIT 6 | echo hello one 7 | echo hello two 8 | -------------------------------------------------------------------------------- /test/failing/incomplete-arith.sh: -------------------------------------------------------------------------------- 1 | cat=1 2 | EOH=2 3 | echo $((cat < "${1?No file to redirect to}" <&3 3<&- & } 3<&0 5 | -------------------------------------------------------------------------------- /test/pash_tests/set_bug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | f() { 4 | echo "f: $@" 5 | } 6 | 7 | set -- a b c 8 | echo "$@" 9 | f 10 | echo "$@" 11 | 12 | -------------------------------------------------------------------------------- /test/pash_tests/sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Sort input 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | 6 | cat $IN | sort 7 | 8 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.cmxa 3 | test 4 | *.native 5 | *~ 6 | *.o 7 | *.cmx 8 | *.cmi 9 | test.err 10 | test.byte 11 | test.cmo 12 | ocaml_python.log 13 | -------------------------------------------------------------------------------- /test/pash_tests/longest-man.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Find the 10 largest man pages 4 | 5 | find /usr/share/man -type f | xargs du -scb | sort -rn | head -n 10 6 | -------------------------------------------------------------------------------- /test/pash_tests/max-temp-process.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Processing 4 | cat $IN | 5 | cut -c 89-92 | 6 | grep -v 999 | 7 | sort -rn | 8 | head -n1 9 | -------------------------------------------------------------------------------- /test/pash_tests/readonly.sh: -------------------------------------------------------------------------------- 1 | var1=value1 2 | readonly var1 var2=value2 3 | var1=foo 4 | var2=foo 5 | echo $var1 $var2 6 | unset var1 7 | unset var2 8 | echo $var1 $var2 9 | -------------------------------------------------------------------------------- /test/pash_tests/archive.sh: -------------------------------------------------------------------------------- 1 | timestamp=`date +"%Y%m%d%H%M%S"` 2 | 3 | tar cf cdash.tar \ 4 | *.c *.h *.sh *.py Makefile 5 | 6 | cp -p cdash.tar "cdash-${timestamp}.tar" 7 | -------------------------------------------------------------------------------- /test/pash_tests/count_packets.sh: -------------------------------------------------------------------------------- 1 | # count the packet number in a pcap file 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} 3 | tcpdump -nn -r ${INPUT} | wc -l 4 | -------------------------------------------------------------------------------- /test/pash_tests/for-loop.sh: -------------------------------------------------------------------------------- 1 | mkdir -p temp-out 2 | 3 | for i in $(seq 100) 4 | do 5 | cat $PASH_TOP/README.md | grep pash | grep pash > temp-out/$i.out 6 | done 7 | 8 | echo done 9 | -------------------------------------------------------------------------------- /test/pash_tests/install-deps.sh: -------------------------------------------------------------------------------- 1 | # install dependencies 2 | pkgs='ffmpeg unrtf imagemagick' 3 | if ! dpkg -s $pkgs >/dev/null 2>&1; then 4 | sudo apt-get install $pkgs -y 5 | fi 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/redir-var-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | func_emit_tests_Makefile_am () 3 | { 4 | ofd=3 5 | { 6 | echo hi 7 | } >&$ofd 8 | } 9 | fd=1 10 | echo hi >&$fd 11 | -------------------------------------------------------------------------------- /test/pash_tests/sort-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Calculate sort twice 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | 6 | cat $IN | tr A-Z a-z | sort | sort -r 7 | -------------------------------------------------------------------------------- /test/pash_tests/pretty_print_json.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sed 's//}/g' | \ 5 | sed 's/(/[/g' | \ 6 | sed 's/)/]/g' | \ 7 | python -m json.tool 8 | -------------------------------------------------------------------------------- /test/pash_tests/sine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | F="temp.txt" 3 | [ -f $F ] && (rm $F && echo 1 >$F ) 4 | tail -f temp.txt | while read n; do echo "1+s(3*$n)" | bc -l; sleep 1; done | tee -a temp.txt 5 | -------------------------------------------------------------------------------- /test/pash_tests/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN1=$IN_PRE/1.txt 4 | # 1.0: extract the last name 5 | cat $IN1 | cut -d ' ' -f 2 6 | 7 | -------------------------------------------------------------------------------- /test/tests/braces_amp.sh: -------------------------------------------------------------------------------- 1 | for x in foo; do a & b & c & d & done 2 | echo a & echo b 3 | for y in foo; do a & b & done 4 | while false; do a & b & done 5 | until true; do forever & ever & ever & done 6 | -------------------------------------------------------------------------------- /test/failing/test-shlex-aux.sh: -------------------------------------------------------------------------------- 1 | comment_fun() 2 | { 3 | cat > /dev/null #Consume data from pipe so writers don't get SIGPIPE 4 | } 5 | 6 | bad_quote_fun() 7 | { 8 | echo ${asf"asd} 9 | } 10 | -------------------------------------------------------------------------------- /test/pash_tests/braces_amp.sh: -------------------------------------------------------------------------------- 1 | for x in foo; do a & b & c & d & done 2 | echo a & echo b 3 | for y in foo; do a & b & done 4 | while false; do a & b & done 5 | until true; do forever & ever & ever & done 6 | -------------------------------------------------------------------------------- /test/pash_tests/pash_ptempfile_name.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | distro=${1??Distro not given} 4 | # echo "$PASH_TMP_PREFIX/pash_$RANDOM$RANDOM$RANDOM" 5 | mktemp -u "$PASH_TMP_PREFIX/pash_XXXXXXXXXX" 6 | -------------------------------------------------------------------------------- /test/pash_tests/temp_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | func() { read -r distro setup; echo $distro $setup; } 4 | 5 | export -f func 6 | 7 | cat ../evaluation/usecases/shellcheck/temp_input.txt | 8 | func 9 | -------------------------------------------------------------------------------- /test/pash_tests/2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN1=$IN_PRE/1.txt 4 | # 1.1: extract names and sort 5 | cat $IN1 | cut -d ' ' -f 2 | sort 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/18.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN8=$IN_PRE/8.txt 4 | # 8.1: count unix birth-year 5 | cat $IN8 | tr ' ' '\n' | grep 1969 | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/24.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN92=$IN_PRE/9.2.txt 4 | # 9.2: extract the word BELL 5 | cat $IN92 | cut -c 1-1 | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN1=$IN_PRE/1.txt 4 | # 1.2: extract names and sort 5 | cat $IN1 | head -n 2 | cut -d ' ' -f 2 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN2=$IN_PRE/2.txt 4 | # 2.1: get all Unix utilities 5 | cat $IN2 | cut -d ' ' -f 4 | tr -d ',' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/7.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.1: find number of rounds 5 | cat $IN4 | tr ' ' '\n' | grep '\.' | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/eager-no-task-par.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # $input="${1}" 4 | # $output="${2}" 5 | # $temp="${3}" 6 | 7 | touch "$3" 8 | 9 | cat "$1" > "$3" 10 | cat "$3" > "$2" 11 | rm "$3" 12 | -------------------------------------------------------------------------------- /test/pash_tests/remove_adapter.sh: -------------------------------------------------------------------------------- 1 | INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} 2 | # remove adapter 3 | find ${INPUT} -name "*.fastq" | sort | uniq | xargs -I {} cutadapt -a AGATCGGAAGAGCACAC {} > /dev/null 4 | -------------------------------------------------------------------------------- /test/pash_tests/micro_10.sh: -------------------------------------------------------------------------------- 1 | cat $IN | 2 | tr " " " " | 3 | tr " " " " | 4 | tr " " " " | 5 | tr " " " " | 6 | tr " " " " | 7 | tr " " " " | 8 | tr " " " " | 9 | tr " " " " | 10 | tr " " " " | 11 | tr " " " " 12 | -------------------------------------------------------------------------------- /test/pash_tests/27.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN95=$IN_PRE/9.5.txt 4 | # # 9.5: backwards running clock, in a backwards poem 5 | # cat $IN95 > /dev/null 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN1=$IN_PRE/1.txt 4 | # 1.3: sort top first names 5 | cat $IN1 | cut -d ' ' -f 1 | sort | uniq -c | sort -r 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/bam_to_sam.sh: -------------------------------------------------------------------------------- 1 | INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/bam} 2 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} 3 | cd ${INPUT} 4 | find . -name "*.bam" | xargs -I {} samtools view -h -o ${OUTPUT} {} 5 | -------------------------------------------------------------------------------- /test/pash_tests/get_hash.sh: -------------------------------------------------------------------------------- 1 | # calculate a hash? can we change it to calculate hashes for all the files? 2 | head -c32 /dev/urandom | openssl dgst -sha256 -binary -hmac $(xxd -p -l32 -c32 /dev/urandom) | base64 | cut -b-32 3 | -------------------------------------------------------------------------------- /test/pash_tests/head_deadlock.sh: -------------------------------------------------------------------------------- 1 | mkfifo s1 s2 2 | 3 | cat ../evaluation/scripts/input/1M.txt > s1 & 4 | cat ../evaluation/scripts/input/1M.txt > s2 & 5 | cat s1 s2 | head -n 1 & 6 | 7 | wait 8 | 9 | rm s1 s2 10 | -------------------------------------------------------------------------------- /ocaml/.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.cmxa 3 | test 4 | *.native 5 | *~ 6 | *.o 7 | *.cmx 8 | *.cmi 9 | *.cmo 10 | *.cma 11 | META 12 | ast_json.ml 13 | ast_json.mli 14 | json_to_shell 15 | shell_to_json 16 | *-e 17 | ocaml.log 18 | -------------------------------------------------------------------------------- /test/pash_tests/13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN5=$IN_PRE/5.txt 4 | # 5.1: extract hello world 5 | cat $IN5 | grep 'print' | cut -d "\"" -f 2 | cut -c 1-12 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/15.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN7=$IN_PRE/7.txt 4 | # 7.1: identify number of AT&T unix versions 5 | cat $IN7 | cut -f 1 | grep 'AT&T' | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/29.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN97=$IN_PRE/9.7.txt 4 | # 9.7: Four corners 5 | cat $IN97 | sed 2d | sed 2d | tr -c '[A-Z]' '\n' | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/failing/pay_respects.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | cat < /dev/null 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/25.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN93=$IN_PRE/9.3.txt 4 | # 9.3: animal that used to decorate the Unix room 5 | cat $IN93 | cut -c 1-2 | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.2: find pieces captured by Belle 5 | cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/8.varlog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | # 1308; or line above, w/ -vE 4 | # Doesn't do much 5 | find /var/log -type f -exec file {} \; | grep 'text' | cut -d' ' -f1 | sed -e's/:$//g' | grep -v '[0-9]$' | xargs tail 6 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | builtins.[ch] 3 | builtins.def 4 | dash 5 | init.c 6 | mkinit 7 | mknodes 8 | mksignames 9 | mksyntax 10 | nodes.[ch] 11 | signames.c 12 | syntax.[ch] 13 | token.h 14 | .libs 15 | libdash.la 16 | dlldash.la 17 | -------------------------------------------------------------------------------- /test/pash_tests/35.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN11=$IN_PRE/11.txt 4 | # 11.1: year Ritchie and Thompson receive the Hamming medal 5 | cat $IN11 | grep 'UNIX' | cut -f 1 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/alt_bigrams.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Find all 2-grams in a piece of text 4 | # FIXME: does not calculate frequencies 5 | 6 | cat $IN | 7 | tr -cs A-Za-z '\n' | 8 | tr A-Z a-z | 9 | alt_bigrams_aux 10 | 11 | -------------------------------------------------------------------------------- /test/pash_tests/gen_pl.sh: -------------------------------------------------------------------------------- 1 | # generate a playlist 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} 3 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} 4 | find $1 -type f -name *.mp3 -o -name *.wav | sort > $OUTPUT/playlist.pls 5 | -------------------------------------------------------------------------------- /test/pash_tests/grep-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This test contains all occurences of tr (to test the annotation) 4 | 5 | FILE="$PASH_TOP/evaluation/tests/input/1M.txt" 6 | 7 | cat $FILE | grep "the" 8 | cat $FILE | grep -c "the" 9 | -------------------------------------------------------------------------------- /test/pash_tests/set.sh: -------------------------------------------------------------------------------- 1 | dotFile=set.sh.tempfile 2 | variable="value value" 3 | 4 | # the problem is that this returns more things (we have functions that are exported in set) 5 | set | grep variable > $dotFile 6 | . ./$dotFile 7 | # set 8 | -------------------------------------------------------------------------------- /test/pash_tests/nfa-regex.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Match complex regular-expression over input 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | 6 | cat $IN | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' 7 | -------------------------------------------------------------------------------- /test/pash_tests/21.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN8=$IN_PRE/8.txt 4 | # 8.4: find longest words without hyphens 5 | cat $IN8 | tr -c "[a-z][A-Z]" '\n' | sort | awk "length >= 16" 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Complicated grep expression 4 | 5 | IN=./input/1G.txt # Change G to M for small input 6 | OUT=./output/out.txt 7 | 8 | cat $IN | 9 | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $OUT 10 | -------------------------------------------------------------------------------- /test/pash_tests/33.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN10=$IN_PRE/10.txt 4 | # 10.2: list Turing award recipients while working at Bell Labs 5 | cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 6 | 7 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | - [x] clear out old C stuff 2 | - [x] get roundtrips correct (fix OCaml pretty printing) 3 | - [x] correct libdash.so installation (locally) 4 | - [x] pip setup 5 | - [x] testpypi setup 6 | - [ ] pash pull request 7 | - [ ] smoosh pull request 8 | 9 | -------------------------------------------------------------------------------- /test/pash_tests/comm-par-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Tests the parallelization of comm with a configuration input and a stream input. 4 | mkfifo s1 s2 5 | 6 | cat $IN > s1 & 7 | cat $IN | grep "king" > s2 & 8 | comm -23 s1 s2 9 | 10 | rm s1 s2 11 | -------------------------------------------------------------------------------- /test/pash_tests/head_deadlock_fixed_2.sh: -------------------------------------------------------------------------------- 1 | mkfifo s1 s2 2 | 3 | cat ../evaluation/scripts/input/1M.txt > s1 & 4 | cat ../evaluation/scripts/input/1M.txt > s2 & 5 | (cat s1 s2; head -n 1 s2 > /dev/null) | head -n 1 & 6 | 7 | wait 8 | 9 | rm s1 s2 10 | -------------------------------------------------------------------------------- /test/pash_tests/32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN10=$IN_PRE/10.txt 4 | # 10.1: count Turing award recipients while working at Bell Labs 5 | cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/comm-par-test2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Tests the parallelization of comm with a configuration input and a stream input. 4 | mkfifo s1 s2 5 | 6 | cat $IN > s1 & 7 | cat $IN | grep "king" > s2 & 8 | comm -23 - s2 < s1 9 | 10 | rm s1 s2 11 | -------------------------------------------------------------------------------- /test/pash_tests/17.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN7=$IN_PRE/7.txt 4 | # 7.3: all the decades in which a unix version was released 5 | cat $IN7 | cut -f 4 | sort -n | cut -c 3-3 | uniq | sed s/\$/'0s'/ 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/6.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN3=$IN_PRE/3.txt 4 | # 3.1: get lowercase first letter of last names (awk) 5 | cat $IN3 | cut -d ' ' -f 2 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/head_deadlock_fixed.sh: -------------------------------------------------------------------------------- 1 | mkfifo s1 s2 2 | 3 | cat ../evaluation/scripts/input/1M.txt > s1 & 4 | cat ../evaluation/scripts/input/1M.txt > s2 & 5 | cat s1 s2 | (head -n 1; ../evaluation/tools/drain_stream.sh) & 6 | 7 | wait 8 | 9 | rm s1 s2 10 | -------------------------------------------------------------------------------- /test/pash_tests/wf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Calculate the frequency of each word in the document, and sort by frequency 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | 6 | cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn 7 | -------------------------------------------------------------------------------- /test/pash_tests/2.unrtf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #tag: rtf-to-txt 4 | set -e 5 | IN=${RTF:-$PASH_TOP/evaluation/benchmarks/aliases/input/rtf} 6 | OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} 7 | find $IN -name '*.rtf' | xargs -I {} unrtf {} --text > /dev/null 8 | -------------------------------------------------------------------------------- /test/pash_tests/20.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN8=$IN_PRE/8.txt 4 | # 8.3: find names of the four people most involved with unix 5 | cat $IN8 | grep '(' | cut -d '(' -f 2 | cut -d ')' -f 1 | head -n 1 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/23.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN91=$IN_PRE/9.1.txt 4 | # 9.1: extract the word PORT 5 | cat $IN91 | tr ' ' '\n' | grep '[A-Z]' | tr '[a-z]' '\n' | grep '[A-Z]' | tr -d '\n' | cut -c 1-4 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/11.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.5: 4.4 + pawns 5 | cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort | uniq -c | sort -nr 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN7=$IN_PRE/7.txt 4 | # 7.2: find most frequently occurring machine 5 | cat $IN7 | cut -f 2 | sort -n | uniq -c | sort -nr | head -n 1 | tr -s ' ' '\n' | tail -n 1 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/26.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN94=$IN_PRE/9.4.txt 4 | # 9.4: four corners with E centered, for an "X" configuration 5 | cat $IN94 | tr ' ' '\n' | grep "\"" | sed 4d | cut -d "\"" -f 2 | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/31.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN99=$IN_PRE/9.9.txt 4 | # 9.9: 5 | cat $IN99 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 1d | sed 2d | sed 3d | sed 5d | tr -c '[A-Z]' '\n' | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/34.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN10=$IN_PRE/10.txt 4 | # 10.3: extract Ritchie's username 5 | cat $IN10 | grep 'Bell' | cut -f 2 | head -n 1 | fmt -w1 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/9.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.3: find pieces captured by Belle with a pawn 5 | cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep -v '[KQRBN]' | wc -l 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/top-n.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Top-N (1000) terms 3 | # from https://dl.acm.org/doi/10.1145/5948.315654 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 6 | 7 | cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed 100q 8 | 9 | -------------------------------------------------------------------------------- /test/pash_tests/36.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN11=$IN_PRE/11.txt 4 | # 11.2: most repeated first name in the list? 5 | cat $IN11 | cut -f 2 | cut -d ' ' -f 1 | sort | uniq -c | sort -nr | head -n 1 | fmt -w1 | sed 1d 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/hello-world.sh: -------------------------------------------------------------------------------- 1 | [ $(uname) = 'Darwin' ] && a=/usr/share/dict/web2 || a=/usr/share/dict/words 2 | 3 | if [ -f $a ]; then 4 | cat $a $a $a $a $a $a $a $a | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' | wc -l 5 | else 6 | echo "Dictionary file $a not found.." 7 | fi 8 | 9 | -------------------------------------------------------------------------------- /test/pash_tests/19.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN8=$IN_PRE/8.txt 4 | # 8.2: find Bell Labs location where Dennis Ritchie had his office 5 | cat $IN8 | grep 'Bell' | awk 'length <= 45' | cut -d ',' -f 2 | awk "{\$1=\$1};1" 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/30.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN98=$IN_PRE/9.8.txt 4 | # 9.8: TELE-communications 5 | cat $IN98 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 2d | sed 3d | sed 4d | tr -c '[A-Z]' '\n' | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/cmd_sbst.sh: -------------------------------------------------------------------------------- 1 | echo $(Testvar=set 2 | unset Testvar 3 | echo $Testvar${Testvar-sh_352.10}${Testvar+set} 4 | ) 5 | x=$(set one two three; echo sh_352.11 $1 $2 $3 $# $* "$@"); echo "$x" 6 | x=$(set one "twoA twoB"; echo sh_352.12 $1 "$2" $3 $# $* "$@"); echo "$x" -------------------------------------------------------------------------------- /test/failing/run_parser_on_scripts.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | SCRIPTS_DIR="../scripts/" 4 | 5 | for script in "$SCRIPTS_DIR"*.sh 6 | do 7 | echo "Parsing $script..." 8 | output=${script/"scripts"/"scripts/json"}.json 9 | ./parse_to_json.native "$script" > "$output" 10 | done 11 | -------------------------------------------------------------------------------- /test/pash_tests/incr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands 3 | F="temp.txt" 4 | [ -f $F ] && (rm $F && echo 1 >$F ) 5 | tail -f $F | while read n; do echo $((n+1)); sleep 1; done | tee -a $F 6 | -------------------------------------------------------------------------------- /test/pash_tests/merge-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | A="paste -d '+' " 4 | for i in "$@"; do 5 | # cat "$i" | tr -s ' ' '\n' | tail -n +2 6 | A="$A <(cat $i | tr -s ' ' '\n' | tail -n +2) " 7 | done 8 | A="$A | head -n +3 | bc | tr -s '\n' ' ' | sed 's/$/\ /'" 9 | 10 | eval $A 11 | 12 | -------------------------------------------------------------------------------- /test/pash_tests/trim_primers.sh: -------------------------------------------------------------------------------- 1 | # trim primers 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} 3 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} 4 | cd ${INPUT} 5 | find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -a TCCTCCGCTTATTGATAGC -o ${OUTPUT}/{}\_trimmed.fastq {}; 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/12.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.6: piece used the most by Belle 5 | cat $IN4 | tr ' ' '\n' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort -r | uniq | head -n 3 | tail -n 1 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/eager_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkfifo s1 s2 4 | 5 | # IN=test_in.txt 6 | IN=../scripts/input/1G.txt 7 | 8 | cat "$IN" > s1 & 9 | cat s2 > test_out.txt & 10 | ./eager s1 s2 intermediate & 11 | 12 | wait 13 | 14 | rm s1 s2 15 | 16 | # diff -s $IN test_out.txt 17 | -------------------------------------------------------------------------------- /test/pash_tests/fun-def.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | custom_sort() { 4 | sort $@ 5 | } 6 | 7 | custom_tr() { 8 | tr A-Z a-z 9 | } 10 | 11 | export -f custom_tr 12 | 13 | FILES="$PASH_TOP/evaluation/tests/input/1M.txt ../evaluation/tests/input/1M.txt" 14 | 15 | cat $FILES | custom_tr | custom_sort 16 | -------------------------------------------------------------------------------- /test/pash_tests/newline_in_var.sh: -------------------------------------------------------------------------------- 1 | x=$(for i in sh_352.09 one two 2 | do 3 | if [ "$i" != "one" ] 4 | then echo "$i" 5 | fi 6 | done 7 | ); echo "$x" 8 | 9 | x=$(echo sh_352.18 line 1 > sh_352.18tmp && echo sh_352.18 line 2 >> sh_352.18tmp && cat sh_352.18tmp ); echo "$x" -------------------------------------------------------------------------------- /test/pash_tests/ann-agg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Test contains command aliases with annotations that point to custom aggregators 4 | 5 | FILE="$PASH_TOP/evaluation/tests/input/1M.txt" 6 | 7 | test_one() { 8 | cat 9 | } 10 | 11 | test_two() { 12 | cat 13 | } 14 | 15 | cat $FILE | test_one | test_two 16 | -------------------------------------------------------------------------------- /test/pash_tests/bi-grams.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Find all 2-grams in a piece of text 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | 6 | . bi-gram.aux.sh 7 | 8 | cat $IN | 9 | tr -cs A-Za-z '\n' | 10 | tr A-Z a-z | 11 | bigrams_aux | 12 | sort | 13 | uniq 14 | 15 | 16 | -------------------------------------------------------------------------------- /ldconfig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | cd _build/lib 6 | 7 | trylink() { 8 | [ -f "$2" ] || ln -sf $1 $2 9 | } 10 | 11 | trylink dlldash.so.0.0.0 dlldash.so 12 | trylink dlldash.so.0.0.0 dlldash.so.0 13 | 14 | trylink libdash.so.0.0.0 libdash.so 15 | trylink libdash.so.0.0.0 libdash.so.0 16 | 17 | -------------------------------------------------------------------------------- /test/pash_tests/shortest_scripts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # A bash script for finding the shortest scripts 3 | # From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 4 | # +p.95 multiple sed 5 | # +p.XX crawler 6 | 7 | cat $IN | xargs file | grep "shell script" | cut -d: -f1 | xargs -L 1 wc -l | grep -v '^0$' | sort -n | head -15 8 | -------------------------------------------------------------------------------- /test/pash_tests/28.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN96=$IN_PRE/9.6.txt 4 | # 9.6: Follow the directions for grep 5 | cat $IN96 | tr ' ' '\n' | grep '[A-Z]' | sed 1d | sed 3d | sed 3d | tr '[a-z]' '\n' | grep '[A-Z]' | sed 3d | tr -c '[A-Z]' '\n' | tr -d '\n' 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | setup_dataset() { 3 | echo 'This experiment is expected to fetch data from a remote server' 4 | echo 'To fetch the original dataset, use an FTP client' 5 | echo 'e.g., "lftp ftp://ftp.ncdc.noaa.gov/pub/data/noaa"' 6 | } 7 | 8 | source_var() { 9 | export IN= 10 | } 11 | -------------------------------------------------------------------------------- /test/tests/grab_submissions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ -d "$1" ]; then 9 | echo "Grading directory already exists" 10 | exit 2 11 | fi 12 | 13 | mkdir $1 14 | mkdir $1/submissions 15 | cp ../dropbox/$1/* $1/submissions 16 | -------------------------------------------------------------------------------- /test/pash_tests/10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN4=$IN_PRE/4.txt 4 | # 4.4: histogram of Belle's captures (-pawns) by each type of piece 5 | cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep '[KQRBN]' | cut -c 1-1 | sort | uniq -c | sort -nr 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/grab_submissions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ -d "$1" ]; then 9 | echo "Grading directory already exists" 10 | exit 2 11 | fi 12 | 13 | mkdir $1 14 | mkdir $1/submissions 15 | cp ../dropbox/$1/* $1/submissions 16 | -------------------------------------------------------------------------------- /test/pash_tests/p2.sh: -------------------------------------------------------------------------------- 1 | sed "s#^#$HOME/wikipedia/#" | 2 | xargs cat | 3 | iconv -c -t ascii//TRANSLIT | 4 | pandoc +RTS -K64m -RTS --from html --to plain --quiet | 5 | tr -cs A-Za-z '\n' | 6 | tr A-Z a-z | 7 | grep -vwFf ../evaluation/scripts/web-index/stopwords.txt | 8 | ../evaluation/scripts/web-index/stem-words.js 9 | -------------------------------------------------------------------------------- /test/pash_tests/drain_stream.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## This command drains a stream. It is used if we want a prefix of a 4 | ## stream that was written by tee. Since tee writes in both streams 5 | ## "almost" in lockstep, if we get a prefix on one side, the other 6 | ## side cannot progress. 7 | dd of=/dev/null > /dev/null 2>&1 8 | -------------------------------------------------------------------------------- /test/pash_tests/encrypt_files.sh: -------------------------------------------------------------------------------- 1 | # compress and encrypt all files in a directory 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} 3 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} 4 | cd $INPUT 5 | find . -name "*.rtf" | xargs -I {} sh -c "tar -czf - {} | openssl enc -e -pbkdf2 -out {}.enc; mv {}.enc $OUTPUT" sh {} 6 | -------------------------------------------------------------------------------- /test/pash_tests/ldconfig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | cd _build/lib 6 | 7 | trylink() { 8 | [ -f "$2" ] || ln -sf $1 $2 9 | } 10 | 11 | trylink dlldash.so.0.0.0 dlldash.so 12 | trylink dlldash.so.0.0.0 dlldash.so.0 13 | 14 | trylink libdash.so.0.0.0 libdash.so 15 | trylink libdash.so.0.0.0 libdash.so.0 16 | 17 | -------------------------------------------------------------------------------- /test/pash_tests/demo-spell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd "$(dirname $0)" 4 | 5 | [ -z $PASH_TOP ] && { 6 | echo "PASH_TOP not set, maybe $(git rev-parse --show-toplevel)?" 7 | exit 8 | } 9 | FILE="input/100M.txt" 10 | DICT="input/sorted_words" 11 | 12 | cat "$FILE" | tr A-Z a-z | tr -cs A-Za-z '\n' | sort | uniq | comm -13 $DICT - 13 | -------------------------------------------------------------------------------- /test/pash_tests/14.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} 3 | IN6=$IN_PRE/6.txt 4 | # 6.1: order the bodies by how easy it would be to land on them in Thompson's Space Travel game when playing at the highest simulation scale 5 | cat $IN6 | awk "{print \$2, \$0}" | sort -nr | cut -d ' ' -f 2 6 | 7 | -------------------------------------------------------------------------------- /test/pash_tests/get_type_count.sh: -------------------------------------------------------------------------------- 1 | # count how many times each file type exist in a directory 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} 3 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} 4 | find $INPUT -type f | while read f; do echo ""${f##*.}""; done | sed ""/^\s*$/d"" | sort | uniq -c | sort -rn > $OUTPUT/get_type_count_res 5 | -------------------------------------------------------------------------------- /test/pash_tests/merge-uniq.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is how to merge results of `uniq -c`, contained in {1,2,3}.txt 4 | # I am using 3 inputs to stress it works with more than just pairs:-) 5 | 6 | A=${1:-1.txt} 7 | B=${1:-2.txt} 8 | C=${1:-3.txt} 9 | awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$A" "$B" "$C" 10 | -------------------------------------------------------------------------------- /test/pash_tests/ann-agg-2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Test contains command aliases with annotations that point to custom aggregators 4 | 5 | FILE="$PASH_TOP/evaluation/tests/input/ab.txt" 6 | 7 | test_uniq_1() { 8 | uniq 9 | } 10 | 11 | test_uniq_2() { 12 | uniq -c 13 | } 14 | 15 | cat $FILE | sort | test_uniq_1 | tr 'a' 'b' | test_uniq_2 16 | 17 | -------------------------------------------------------------------------------- /test/pash_tests/convert_to_fast.sh: -------------------------------------------------------------------------------- 1 | # convert fastq to fasta format 2 | # It recognizes the extension .fasta and it converts the input to fasta.gz format 3 | INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} 4 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} 5 | cd ${INPUT} 6 | find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -o ${OUTPUT}/{}.fasta.gz {} 7 | -------------------------------------------------------------------------------- /test/failing/async.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A script that showcases truly async pipes (via fs) 4 | # Note to self: remember | { lambda } 5 | 6 | fz () { sleep $0; echo "1-"$0; } 7 | 8 | export -f fz 9 | 10 | : > f1 11 | 12 | tail -f ./f1 | xargs -n 1 bash -c 'fz "$@"' & 13 | 14 | # {seq 5; echo 'yay!' >&2 ; } > ./f1 15 | seq 5 > ./f1 16 | echo 'yay!' 17 | -------------------------------------------------------------------------------- /test/failing/3.resiz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # tag: resize image 4 | set -e 5 | 6 | 7 | IN=${JPG:-$PASH_TOP/evaluation/benchmarks/aliases/input/jpg} 8 | OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} 9 | 10 | find $IN -name "*.jpg" | 11 | xargs -n1 basename | 12 | sed "s;\(.*\);-resize 70% $IN/\1 $OUT/\1.70;" | 13 | xargs -L1 convert 14 | -------------------------------------------------------------------------------- /test/pash_tests/pash_declare_vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | vars_file="${1?File not given}" 4 | 5 | pash_redir_output echo "Writing vars to: $vars_file" 6 | 7 | declare -p > "$vars_file" 8 | ## KK 2021-11-23 We don't actually need to export functions in the vars file. 9 | ## We never expand them in the compiler 10 | ## declare -f >> "$vars_file" 11 | -------------------------------------------------------------------------------- /test/tests/send_emails.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ ! -d "$1/mail" ]; then 9 | echo "Couldn't find mail directory (looked in $1/grading)" 10 | exit 2 11 | fi 12 | 13 | cd $1/mail 14 | 15 | for s in `ls`; do 16 | ../../mail.scpt "[cs131] $1 grade" $s 17 | done 18 | -------------------------------------------------------------------------------- /test/pash_tests/max-temp-preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sed 's;^;http://ndr.md/data/noaa/;' | 4 | sed 's;$;/;' | 5 | xargs -r -n 1 curl -s | 6 | grep gz | 7 | tr -s ' \n' | 8 | cut -d ' ' -f9 | 9 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | 10 | sed 's;^;http://ndr.md/data/noaa/;' | 11 | xargs -n1 curl -s | 12 | gunzip 13 | -------------------------------------------------------------------------------- /test/pash_tests/send_emails.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ ! -d "$1/mail" ]; then 9 | echo "Couldn't find mail directory (looked in $1/grading)" 10 | exit 2 11 | fi 12 | 13 | cd $1/mail 14 | 15 | for s in `ls`; do 16 | ../../mail.scpt "[cs131] $1 grade" $s 17 | done 18 | -------------------------------------------------------------------------------- /test/pash_tests/sed-test.sh: -------------------------------------------------------------------------------- 1 | cat $PASH_TOP/evaluation/tests/input/1M.txt | 2 | sed 's;^d;da;' | 3 | sed 's;^;http://ndr.md/data/noaa/;' | 4 | sed 's;$;/;' | 5 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | 6 | sed 's;^;http://ndr.md/data/noaa/;' | 7 | sed "s#^#$WIKI#" | 8 | sed s/\$/'0s'/ | 9 | sed 1d | 10 | sed 4d | 11 | sed "\$d" -------------------------------------------------------------------------------- /test/pash_tests/7.nginx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # fetch hit count for each ip ? 3 | set -e 4 | IN=${IN:-${PASH_TOP}/evaluation/benchmarks/aliases/input} 5 | # original command tail -10000 /var/log/nginx/access.log | cut -d "" "" -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 | awk 6 | cat ${IN}/access.log | cut -d ' ' -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 7 | -------------------------------------------------------------------------------- /test/pash_tests/circus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands 4 | 5 | echo 1 >file 6 | 7 | rm s1 8 | mkfifo s1 9 | tail -f file | 10 | sed -u 's/^/1 + /' | 11 | tee -a s1 > /dev/null & 12 | 13 | cat s1 | 14 | xargs -0 -n 1 -d '\n' expr | 15 | tee -a file 16 | -------------------------------------------------------------------------------- /python/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test clean 2 | 3 | test: rt.py ../libdash/*.py 4 | @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.py "$$f"; done | tee python.log 5 | @cat python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c 6 | @grep ':' python.log && echo "FAILED" && exit 1 || exit 0 7 | 8 | clean: 9 | rm *.o *.so *.log 10 | -------------------------------------------------------------------------------- /test/failing/split_pipe.sh: -------------------------------------------------------------------------------- 1 | BATCH_SIZE=$1 2 | VIRTUAL_DIR=$2 3 | OUTPUT1=$3 4 | OUTPUT2=$4 5 | 6 | tee >( 7 | head -n "$BATCH_SIZE" > "${VIRTUAL_DIR}/${OUTPUT1}"; 8 | "$PASH_TOP"/evaluation/tools/drain_stream.sh & 9 | cat "${VIRTUAL_DIR}/${OUTPUT1}" > "${OUTPUT1}") | 10 | ( tail -n $((BATCH_SIZE+1)) > "${OUTPUT2}"; 11 | "$PASH_TOP"/evaluation/tools/drain_stream.sh) 12 | -------------------------------------------------------------------------------- /test/failing/1.tomp3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # tag: wav-to-mp3 4 | set -e 5 | 6 | IN=${WAV:-$PASH_TOP/evaluation/benchmarks/aliases/input/wav} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} 8 | 9 | find $IN -name '*.wav' | 10 | xargs -n1 basename | 11 | sed "s;\(.*\);-i $IN/\1 -ab 192000 $OUT/\1.mp3;" | 12 | xargs -L1 ffmpeg -y -loglevel quiet -hide_banner 13 | -------------------------------------------------------------------------------- /test/pash_tests/compress_files.sh: -------------------------------------------------------------------------------- 1 | # compress all the files in a directory using dd and tar 2 | INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} 3 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} 4 | cd $INPUT 5 | # get all rtf and compress them 6 | find . -name "*.rtf" | xargs -P16 -I {} sh -c "dd if={} bs=1 status=none > '{}f'; tar -zcf {}.tar.gz {}f; rm {}f; mv {}.tar.gz $OUTPUT" sh {} 7 | -------------------------------------------------------------------------------- /test/pash_tests/img_convert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: resize image 3 | IN=${JPG:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/jpg} 4 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/jpg} 5 | mkdir -p ${OUT} 6 | for i in $IN/*.jpg; 7 | do 8 | out=$OUT/$(basename -- $i) 9 | convert -resize 70% "$i" "$out"; 10 | done 11 | 12 | echo 'done'; 13 | -------------------------------------------------------------------------------- /test/pash_tests/buggy_non_newline_input.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## No newline before EOF bug 4 | echo -n "popo" > /tmp/in 5 | IN=/tmp/in 6 | 7 | cat $IN $IN | grep "popopopo" > /tmp/seq.out 8 | 9 | rm -f s1 s2 10 | mkfifo s1 s2 11 | 12 | cat $IN | grep "popopopo" > s1 & 13 | cat $IN | grep "popopopo" > s2 & 14 | cat s1 s2 > /tmp/buggy.out 15 | 16 | rm -f s1 s2 17 | 18 | diff /tmp/seq.out /tmp/buggy.out -------------------------------------------------------------------------------- /test/pash_tests/clone_compress_repo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | OUTPUT=${1:="pash.tar.gz"} 4 | BRANCH=${2:="master"} 5 | 6 | # TODO: Make the temp_repo_dir be variable and random named 7 | 8 | mkdir temp_repo_dir 9 | cd temp_repo_dir 10 | git clone --recursive git@github.com:andromeda/pash.git 11 | cd pash 12 | git checkout $BRANCH 13 | cd ../ 14 | tar -czf ../$OUTPUT pash 15 | cd ../ 16 | rm -rf temp_repo_dir 17 | -------------------------------------------------------------------------------- /test/failing/remote_exec_graph.sh: -------------------------------------------------------------------------------- 1 | ir_file=$1 2 | 3 | # pash_redir_output echo "Sending msg to worker manager: $message" 4 | response=($(echo "Exec-Graph: $ir_file $declared_functions" | nc -U "$DSPASH_SOCKET")) 5 | # pash_redir_output echo "Got response from worker manager: $response" 6 | 7 | status=${response[0]} #do something if false 8 | script_to_execute=${response[1]} 9 | 10 | source "$script_to_execute" 11 | -------------------------------------------------------------------------------- /test/pash_tests/4.gitkernel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # First command is almost always a generator 3 | set -e 4 | IN=${GIT:-$PASH_TOP/evaluation/benchmarks/aliases/input/linux} 5 | 6 | #FIXME define a complex expression 7 | COMPLEX="" 8 | # linux git 9 | cd ${IN}/linux 10 | git ls-tree --name-only -z -r HEAD | grep -z -Z -E '\.(cc|h|cpp|hpp|c|txt|java)$' | xargs -0 -n1 git blame --line-porcelain | grep ${COMPLEX} 11 | 12 | -------------------------------------------------------------------------------- /test/pash_tests/5.apachelog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # fetch hit count for each ip ? 3 | set -e 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/aliases/input/} 5 | 6 | # 405 original 7 | # cat ${IN}/apache.log | grep "\->" | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr | less 8 | # FIXME need apache error logs .. 9 | cat ${IN}apache.log | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr 10 | -------------------------------------------------------------------------------- /test/failing/quickcheck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd $PASH_TOP 4 | 5 | echo confirms the necessary components for running the artifact 6 | echo 7 | echo Git commit ID: $(git rev-parse --short HEAD) 8 | echo \$PASH_TOP: $(echo $PASH_TOP) 9 | echo pash executable: $PASH_TOP/pa.sh 10 | 11 | echo 12 | $PASH_TOP/pa.sh --help 13 | 14 | echo "Testing graph generation" 15 | $PASH_TOP/pa.sh -c 'echo Pash Installation is complete!' 16 | -------------------------------------------------------------------------------- /test/failing/wc.2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Part of a distributed-`wc` wrapper, merging two `wc` results 4 | # FIXME needs correct padding 5 | 6 | paste -d '+' 7 | <(cat "$1" | 8 | wc | 9 | tr -s ' ' '\n' | 10 | tail -n +2) 11 | <(cat "$2" | 12 | wc | 13 | tr -s ' ' '\n' | 14 | tail -n +2) | 15 | bc | 16 | tr -s '\n' ' ' | 17 | sed 's/^/ /' | 18 | sed 's/$/\ /' 19 | -------------------------------------------------------------------------------- /python/rt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | import libdash 6 | 7 | sys.setrecursionlimit (9001) 8 | 9 | def print_asts(new_asts): 10 | for (ast, lines, linno_before, linno_after) in new_asts: 11 | print(libdash.to_string(ast)) 12 | 13 | if (len(sys.argv) == 1): 14 | new_asts = libdash.parse("-", True) 15 | else: 16 | new_asts = libdash.parse(sys.argv[1], True) 17 | 18 | print_asts(new_asts) 19 | -------------------------------------------------------------------------------- /test/pash_tests/page-per-line.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Squash all HTML for each URL into a single line, streaming fashion 4 | # It also prefixes with the URL 5 | 6 | page_per_line () { 7 | curl -s "$1" | tr -d "\n\r" | tr -d '\n' | sed "s/^/$0 /" | sed -e '/.$/a\' 8 | } 9 | 10 | export -f page_per_line 11 | 12 | # xargs: 13 | # add `-t` for debugging 14 | cat ./urls.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' _ 15 | -------------------------------------------------------------------------------- /test/pash_tests/1_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: count_words 3 | 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 5 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/1_1/} 6 | ENTRIES=${ENTRIES:-1060} 7 | mkdir -p "$OUT" 8 | 9 | for input in $(ls ${IN} | head -n ${ENTRIES}) 10 | do 11 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out 12 | done 13 | 14 | echo 'done'; 15 | rm -rf "${OUT}" 16 | -------------------------------------------------------------------------------- /version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') 4 | 5 | PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") 6 | 7 | [ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$PYTHON_VERSION" && exit 0 8 | 9 | echo "Version numbers don't match!" 10 | echo " Python is '$PYTHON_VERSION' in pyproject.toml" 11 | echo " Python is '$PYTHON_VERSION2' in setup.py" 12 | exit 1 13 | -------------------------------------------------------------------------------- /ocaml/mk_meta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | LIB="$1" 6 | : ${LIB:=$(opam var lib)/libdash} 7 | 8 | cat >META < ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm ${OUT} 17 | -------------------------------------------------------------------------------- /test/tests/weird_tilde.sh: -------------------------------------------------------------------------------- 1 | case $nm_file_list_spec~$to_tool_file_cmd in 2 | *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) 3 | try_normal_branch=yes 4 | eval cmd=\"$cmd1\" 5 | func_len " $cmd" 6 | len=$func_len_result 7 | ;; 8 | *) 9 | try_normal_branch=no 10 | ;; 11 | esac 12 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | There are three directories of tests: 2 | 3 | - `tests` are the original libdash tests, mostly handwritten 4 | - `pash_tests` are shell scripts taken from [`pash`](https://github.com/binpash/pash) 5 | - `failing` are shell scripts that aren't working right now (which is probably a bug) 6 | 7 | Both OCaml and Python bindings use the `round_trip.sh` to test round tripping. The `test_ocaml_python.sh` script compares the output from Python and OCaml. 8 | -------------------------------------------------------------------------------- /test/failing/mk_meta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | LIB="$1" 6 | : ${LIB:=$(opam var lib)/libdash} 7 | 8 | cat >META < ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/pash_tests/6_1_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: uppercase_by_token 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_1/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^[A-Z]' > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf ${OUT} 17 | -------------------------------------------------------------------------------- /test/pash_tests/3_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: sort_words_by_folding 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_2/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | sort -f > ${OUT}/${input} 13 | done 14 | 15 | echo 'done'; 16 | rm -rf ${OUT} 17 | -------------------------------------------------------------------------------- /test/pash_tests/2_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: count_vowel_seq 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_2/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr 'a-z' '[A-Z]' | tr -sc 'AEIOU' '[\012*]'| sort | uniq -c > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/pash_tests/6_1_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: uppercase_by_type 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_2/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^[A-Z]' > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf ${OUT} 17 | -------------------------------------------------------------------------------- /test/pash_tests/6_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: words_no_vowels 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_3/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -vi '[aeiou]' | sort | uniq -c > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/pash_tests/3_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: sort_words_by_rhyming.sh 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_3/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | rev | sort | rev > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/pash_tests/mk_dot_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | libdash_files=$(ls _build/lib) 6 | bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" 7 | 8 | files= 9 | for f in ${libdash_files} 10 | do 11 | files="${files} \"_build/lib/${f}\"" 12 | done 13 | 14 | for f in ${bindings_files} 15 | do 16 | files="${files} \"ocaml/${f}\"" 17 | done 18 | 19 | cat >libdash.install < ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf ${OUT} 17 | -------------------------------------------------------------------------------- /test/pash_tests/alt_bigrams_funs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alt_bigrams_aux() 4 | { 5 | s2=$(mktemp -u) 6 | ( mkfifo $s2 > /dev/null ) ; 7 | 8 | tee $s2 | 9 | tail -n +2 | 10 | paste $s2 - | 11 | sed '$d' | 12 | sort | 13 | uniq 14 | rm $s2 15 | } 16 | 17 | alt_bigram_aux_reduce() 18 | { 19 | IN1=$1 20 | IN2=$2 21 | 22 | sort -m $IN1 $IN2 | 23 | uniq 24 | } 25 | 26 | export -f alt_bigrams_aux 27 | export -f alt_bigram_aux_reduce 28 | -------------------------------------------------------------------------------- /test/pash_tests/2_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: merge_upper 3 | # set -e 4 | 5 | # Merge upper and lower counts 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_1/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p "$OUT" 10 | 11 | for input in $(ls ${IN} | head -n ${ENTRIES}) 12 | do 13 | cat $IN/$input | tr '[a-z]' '[A-Z]' | tr -sc '[A-Z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out 14 | done 15 | 16 | echo 'done'; 17 | rm -rf "${OUT}" 18 | -------------------------------------------------------------------------------- /test/failing/split-unix50.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | awk -v RS= '{print > (NR ".txt")}' unix50.sh 4 | 5 | for file in *.txt; do 6 | fname=$(basename -- "$file") 7 | fscript="${fname%.*}".sh 8 | echo $fscript 9 | echo '#!/bin/bash' > $fscript 10 | 11 | echo 'export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input}' >> $fscript 12 | input=$(grep -o 'IN..' $file) 13 | grep "^$(echo $input | xargs)=" unix50.sh >> $fscript 14 | cat $file >> $fscript 15 | echo '' >> $fscript 16 | done 17 | 18 | -------------------------------------------------------------------------------- /test/tests/aaaa: -------------------------------------------------------------------------------- 1 | eval "\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" 2 | -------------------------------------------------------------------------------- /test/pash_tests/6_4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: 1-syllable words 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_4/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat ${IN}/${input} | tr -sc '[A-Z][a-z]' '[\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*$' | sort | uniq -c | sed 5q > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/tests/aaaa_single: -------------------------------------------------------------------------------- 1 | eval '"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' 2 | -------------------------------------------------------------------------------- /test/failing/sieve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Doug McIlroy's implementation of Sieve of Eratosthenes 5 | 6 | # A combination of: 7 | # https://swtch.com/~rsc/thread/ 8 | # https://stackoverflow.com/questions/14927895/sieve-of-eratosthenes-unix-script 9 | 10 | OUT=./output/out.txt 11 | 12 | limit=10000 13 | sieve="$(seq 2 $limit | sort)" 14 | 15 | for n in 2 $(seq 3 2 $limit) 16 | do 17 | sieve="$(comm -23 <(echo "$sieve") <(seq $(($n * $n)) $n $limit|sort))" 18 | done 19 | 20 | echo "$sieve" | sort -n > $OUT 21 | -------------------------------------------------------------------------------- /test/failing/append_nl_if_not.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Adds a newline at the end of a file if it doesn't already end in a newline. 4 | ## Used to prepare inputs for PaSh. 5 | 6 | if [ -z "$1" ]; then 7 | echo "No file argument given!" 8 | exit 1 9 | else 10 | if [ ! -f "$1" ]; then 11 | echo "File $1 doesn't exist!" 12 | exit 1 13 | else 14 | tail -c 1 "$1" | od -ta | grep -q nl 15 | if [ $? -eq 1 ] 16 | then 17 | echo >> "$1" 18 | fi 19 | fi 20 | fi 21 | -------------------------------------------------------------------------------- /test/pash_tests/6_5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: 2-syllable words 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_5/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' ' [\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$' | sort | uniq -c | sed 5q > ${OUT}${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/pash_tests/8.2_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: vowel_sequences_gr_1K.sh 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_1/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | for input in $(ls ${IN} | head -n ${ENTRIES}) 11 | do 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | tr -sc 'AEIOUaeiou' '[\012*]' | sort | uniq -c | awk "\$1 >= 1000" > ${OUT}/${input}.out 13 | done 14 | 15 | echo 'done'; 16 | rm -rf "${OUT}" 17 | -------------------------------------------------------------------------------- /test/failing/unzip-1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # https://gist.github.com/noamross/86fba413e0769069e3955d1c9bc530ae 4 | funzip $1| # uncompress first file in zip 5 | tr -d '\000' | #remove null characters 6 | sed "/^\s*$/d; s/ \{1,\}\t/\t/g; s/\t \{1,\}/\t/g; s/\r//" | #removes empty lines, whitespace around tabs, extra newlines 7 | cut -s -f 1,3,4,5,6,8,12,13,14,15,16,17,18,19,20,21,23,24,25,26,34,35,36,38,40,42,44,45,46,85,86,87,88,89 #| #only select certain columns 8 | pv -N Process -c | 9 | gzip -9 | 10 | pv -N Compress -c > $1.gz 11 | -------------------------------------------------------------------------------- /test/pash_tests/max-temp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FROM=${FROM:-2015} 4 | TO=${TO:-2015} 5 | IN=${IN:-'http://ndr.md/data/noaa/'} 6 | fetch=${fetch:-"curl -s"} 7 | 8 | seq $FROM $TO | 9 | sed "s;^;$IN;" | 10 | sed 's;$;/;' | 11 | xargs -r -n 1 $fetch | 12 | grep gz | 13 | tr -s ' \n' | 14 | cut -d ' ' -f9 | 15 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | 16 | sed "s;^;$IN;" | 17 | xargs -n1 curl -s | 18 | gunzip | 19 | ## Processing 20 | cut -c 89-92 | 21 | grep -v 999 | 22 | sort -rn | 23 | head -n1 24 | -------------------------------------------------------------------------------- /test/pash_tests/set-e.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | # part of a compound list of a 'while', 'until' or 'if' 3 | while false; do break; done 4 | echo passed while 5 | until false; do break; done 6 | echo passed until 7 | if false; then :; fi 8 | echo passed if 9 | # any command of an AND-OR list other than the last 10 | false && : 11 | echo passed AND list 12 | false || : 13 | echo passed OR list 14 | : && false || false && : 15 | echo passed AND-OR list 16 | # part of a pipeline preceded by the '!' reserved word 17 | ! false 18 | echo passed negated pipeline -------------------------------------------------------------------------------- /test/pash_tests/tee_web_index_bug.sh: -------------------------------------------------------------------------------- 1 | IN=$PASH_TOP/evaluation/tests/input/1M.txt 2 | 3 | mkfifo {1,2,3}grams 4 | 5 | cat "$IN" | 6 | tr -cs A-Za-z '\n' | 7 | tr A-Z a-z | 8 | tee 3grams 2grams 1grams > /dev/null & 9 | 10 | cat 1grams | 11 | sort | 12 | uniq -c | 13 | sort -rn > 1-grams.txt & 14 | 15 | cat 2grams | 16 | sort | 17 | uniq -c | 18 | sort -rn > 2-grams.txt & 19 | 20 | cat 3grams | 21 | sort | 22 | uniq -c | 23 | sort -rn # >> 3-grams.txt 24 | 25 | rm {1,2,3}grams {1,2,3}-grams.txt 26 | -------------------------------------------------------------------------------- /test/failing/buggy_comm_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkfifo s1 s2 s3 s4 s5 4 | 5 | PREV_IN=../evaluation/scripts/input/1M.txt 6 | IN=/tmp/1M.txt 7 | 8 | cat $PREV_IN > $IN 9 | echo "end" >> $IN 10 | 11 | cat $IN | grep "king" | tee s4 >s3 & 12 | comm -23 $IN s3 > s1 & 13 | comm -23 $IN s4 > s2 & 14 | { ../runtime/eager s2 s5 "/tmp/eager_intermediate_#file1" & } 15 | cat s1 s5 > /tmp/buggy.out 16 | 17 | comm -23 <(cat $IN $IN) <(cat $IN | grep "king") > /tmp/seq.out 18 | 19 | rm s1 s2 s3 s4 s5 20 | 21 | diff /tmp/buggy.out /tmp/seq.out 22 | -------------------------------------------------------------------------------- /test/pash_tests/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Find markdown files in the current directory tree, compile them to HTML, and 4 | # serve them over the network 5 | 6 | # Requires: pandoc 7 | 8 | IN=./input/ 9 | OUT=./output/out.txt 10 | 11 | find $IN -name '*.md' | # Parallelizable, given a distributed FS 12 | xargs pandoc | # xargs is higher-order, pandoc is third-party 13 | gzip > $OUT # Compress the result 14 | # nc -l 80 # netcat could default-but-configurably parallelizable 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/pash_tests/p1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PROXY=$([ "$(hostname)" == "deathstar" ] && echo "gamma.ndr.md" || echo "localhost") 3 | WIKI="$HOME/wikipedia/" 4 | export WIKI 5 | # Squash all HTML for each URL into a single line, streaming fashion 6 | # It also prefixes with the URL 7 | 8 | page_per_line () { 9 | cat "$WIKI/$0" | tr -d "\n\r" | tr -d '\n' | sed -e '/.$/a\' 10 | } 11 | 12 | export -f page_per_line 13 | 14 | # xargs: 15 | # add `-t` for debugging 16 | cat $WIKI/index_h_100.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' 17 | -------------------------------------------------------------------------------- /test/pash_tests/unparsing-special-chars.sh: -------------------------------------------------------------------------------- 1 | x=`printf '%s' \#`; printf '%s\n' "$x" 2 | x=`printf '%s' "#"`; printf '%s\n' "$x" 3 | x=`printf '%s' \<`; printf '%s\n' "$x" 4 | x=`printf '%s' "<"`; printf '%s\n' "$x" 5 | x=`printf '%s' \>`; printf '%s\n' "$x" 6 | x=`printf '%s' ">"`; printf '%s\n' "$x" 7 | x=`printf '%s' \~`; printf '%s\n' "$x" 8 | x=`printf '%s' "~"`; printf '%s\n' "$x" 9 | x=`printf '%s' \ `; printf '%s\n' "$x" 10 | x=`printf '%s' " "`; printf '%s\n' "$x" -------------------------------------------------------------------------------- /test/pash_tests/set-diff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Show the set-difference between two streams (i.e., elements in the first that are not in the second). 3 | # https://stackoverflow.com/questions/2509533/bash-linux-set-difference-between-two-text-files 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 6 | 7 | mkfifo s1 s2 8 | 9 | cat $IN | 10 | cut -d ' ' -f 1 | 11 | tr [:lower:] [:upper:] | 12 | sort > s1 & 13 | 14 | cat $IN | 15 | cut -d ' ' -f 1 | 16 | sort > s2 & 17 | 18 | comm -23 s1 s2 19 | 20 | rm s1 s2 21 | -------------------------------------------------------------------------------- /test/pash_tests/symtab-sha.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # To build and sign an SGX enclave, this script extracts the executable's symbol 4 | # table and calculates its SHA256 hashsum. 5 | 6 | # Require 7 | # Data: /usr/lib/libz3.so 8 | 9 | IN=/usr/lib/libz3.so 10 | OUT=./output/out.txt 11 | 12 | readelf -x .symtab $IN | 13 | tail -n +3 | 14 | head -n -1 | # next three implement `awk '{print $2$3$4$5}'` 15 | sed 's/^[[:space:]]*//' | 16 | cut -d ' ' -f2-5 | 17 | tr -d ' ' | 18 | tr -d "\n" | 19 | xxd -r -p | 20 | sha256sum > $OUT 21 | -------------------------------------------------------------------------------- /test/pash_tests/page-count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A bash script for determining how many pages are in a folder of OpenOffice documents 4 | # From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 5 | 6 | # Require: libimage-exiftool-perl, bc 7 | # Data: 8 | # http://ndr.md/data/dummy/large.pdf 9 | # More data: 10 | # https://arxiv.org/help/bulk_data 11 | 12 | IN=./input/large.pdf 13 | OUT=./output/out.txt 14 | 15 | echo "$(exiftool $IN | 16 | grep Page-count | 17 | cut -d ":" -f2 | 18 | tr '\n' '+')""0" | 19 | bc | 20 | sed 's/^/\n/' > $OUT 21 | -------------------------------------------------------------------------------- /test/failing/timing-JSON.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | input_script='/pash/compiler/parser/libdash/ltmain.sh' 5 | 6 | 7 | if [ $# -eq 1 ] 8 | then 9 | input_script="$1" 10 | fi 11 | 12 | 13 | echo "Input script: $input_script" 14 | echo 15 | 16 | echo "OCaml:" 17 | time (../parse_to_json.native "$input_script" | tee /tmp/json.$$ | md5sum) 18 | echo 19 | 20 | echo "C:" 21 | time (./parse_to_json2 "$input_script" | tee /tmp/json.$$ | md5sum) 22 | echo 23 | 24 | echo "Python (ROUND-TRIP):" 25 | time (python3 ceda_rt.py "$input_script" | md5sum) 26 | echo 27 | 28 | -------------------------------------------------------------------------------- /test/pash_tests/parse.sh: -------------------------------------------------------------------------------- 1 | bash ./get_results.sh > out 2 | mv out log_results 3 | cat log_results/out 4 | while read p; do 5 | PASSED=$(echo $p | awk -F'[^0-9]+' '{ print $2 }') 6 | TOTAL=$(echo $p | awk -F'[^0-9]+' '{ print $3 }') 7 | FAILED=$((passed - failed)) 8 | # failed, print to stdout 9 | if [ $PASSED -ne $TOTAL ]; then 10 | # get the benchmark name 11 | f=${p%% *} 12 | # strip the : 13 | f="${f%?}" 14 | # dump the failed tests 15 | cat log_results/${f}_failed.log 16 | fi 17 | done < log_results/out 18 | -------------------------------------------------------------------------------- /test/pash_tests/test-common.sh: -------------------------------------------------------------------------------- 1 | CMD="$1" 2 | FLG="$2" 3 | AGG="$3" 4 | 5 | cat $IN1 $IN2 | $CMD $FLG > ./temp/reference 6 | cat $IN1 | $CMD $FLG > ./temp/partial1 7 | cat $IN2 | $CMD $FLG > ./temp/partial2 8 | 9 | $AGG ./temp/partial1 ./temp/partial2 $FLG > ./temp/aggregated 10 | 11 | diff ./temp/aggregated ./temp/reference > ./temp/log 12 | if [ $? -ne 0 ]; then 13 | cat ./temp/log | head 14 | echo $CMD "$FLG ...FAIL" 15 | else 16 | echo $CMD "$FLG ...pass" 17 | fi 18 | 19 | rm -f ./temp/partial1 ./temp/partial2 ./temp/aggregated ./temp/reference ./temp/log 20 | -------------------------------------------------------------------------------- /test/tests/run_grader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ ! -d "$1/grading" ]; then 9 | echo "Couldn't find grading directory (looked in $1/grading)" 10 | exit 2 11 | fi 12 | 13 | cd $1/grading 14 | 15 | errors="" 16 | for s in `ls`; do 17 | echo "GRADING $s" 18 | (cd $s; make) 19 | if [ "$?" != "0" ]; then 20 | errors+=" $s" 21 | fi 22 | done 23 | 24 | echo 25 | echo "There were errors for the following students:${errors}" 26 | echo ${errors} >"$1/grading/errors.log" 27 | -------------------------------------------------------------------------------- /test/pash_tests/diff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Compares two streams element by element 3 | # Taken from https://crashingdaily.wordpress.com/2008/03/06/diff-two-stdout-streams/ 4 | # shuf() { awk 'BEGIN {srand(); OFMT="%.17f"} {print rand(), $0}' "$@" | sort -k1,1n | cut -d ' ' -f2-; } 5 | 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 7 | 8 | mkfifo s1 s2 9 | 10 | cat $IN | 11 | # shuf | 12 | tr [:lower:] [:upper:] | 13 | sort > s1 & 14 | 15 | cat $IN | 16 | # shuf | 17 | tr [:upper:] [:lower:] | 18 | sort > s2 & 19 | 20 | diff -B s1 s2 21 | rm s1 s2 22 | -------------------------------------------------------------------------------- /test/pash_tests/run_grader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" != "1" ]; then 4 | echo "Usage: $0 [hwXX]" 5 | exit 1 6 | fi 7 | 8 | if [ ! -d "$1/grading" ]; then 9 | echo "Couldn't find grading directory (looked in $1/grading)" 10 | exit 2 11 | fi 12 | 13 | cd $1/grading 14 | 15 | errors="" 16 | for s in `ls`; do 17 | echo "GRADING $s" 18 | (cd $s; make) 19 | if [ "$?" != "0" ]; then 20 | errors+=" $s" 21 | fi 22 | done 23 | 24 | echo 25 | echo "There were errors for the following students:${errors}" 26 | echo ${errors} >"$1/grading/errors.log" 27 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.12) 2 | (name libdash) 3 | (using ctypes 0.3) 4 | 5 | (source (github mgree/libdash)) 6 | (license BSD-3-Clause) 7 | (authors "Michael Greenberg") 8 | (maintainers "michael@greenberg.science") 9 | 10 | (package 11 | (name libdash) 12 | (synopsis "Bindings to the dash shell's parser") 13 | (depends 14 | ("ctypes" (>= "0.21.1")) 15 | ("ctypes-foreign" (>= "0.21.1")) 16 | ("atdgen" (>= "2.15.0")) 17 | ("conf-autoconf" (>= 0.1)) 18 | ("conf-aclocal" (>= 2)) 19 | ("conf-automake" (>= 1)) 20 | ("conf-libtool" (>= 1)))) 21 | 22 | (generate_opam_files) -------------------------------------------------------------------------------- /mk_dot_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | libdash_files=$(ls _build/lib) 6 | bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" 7 | 8 | lib_files= 9 | for f in ${libdash_files} 10 | do 11 | lib_files="${lib_files} \"_build/lib/${f}\"" 12 | done 13 | 14 | for f in ${bindings_files} 15 | do 16 | lib_files="${lib_files} \"ocaml/${f}\"" 17 | done 18 | 19 | bin_files="\"ocaml/shell_to_json\" \"ocaml/json_to_shell\"" 20 | 21 | cat >libdash.install </dev/null 10 | } 11 | 12 | export -f run_tests 13 | 14 | pkg_count=0 15 | for item in ${IN}/*; 16 | do 17 | pkg_count=$((pkg_count + 1)); 18 | run_tests $item > ${LOGS}/${pkg_count}.log 19 | done 20 | 21 | echo 'done'; 22 | -------------------------------------------------------------------------------- /test/pash_tests/innefficient_auto_split.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Running it with PaSh: 4 | ## time ./pa.sh -w 4 -d 1 --output_time evaluation/scripts/innefficient_auto_split.sh 5 | ## 6 | ## is slower than running it with bash: 7 | ## time ./evaluation/scripts/innefficient_auto_split.sh 8 | ## 9 | ## because the script doesn't do a lot of processing so 10 | 11 | FILE="$PASH_TOP/evaluation/scripts/input/1G.txt" 12 | cat $FILE | sed 1d | grep 'Bell' | cut -f 2 | wc -l 13 | 14 | ## If instead we run the following, we get the expected results 15 | # cat $FILE $FILE | grep 'Bell' | cut -f 2 | wc -l 16 | -------------------------------------------------------------------------------- /test/pash_tests/proginf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/node_modules} 3 | MIR_BIN=${MIR_BIN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/mir-sa/.bin/mir-sa} 4 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/mir} 5 | mkdir -p ${OUT}/ 6 | pkg_count=0 7 | run_tests() { 8 | cd $1; 9 | ${MIR_BIN} -p 2>>${OUT}/error.log 10 | } 11 | export -f run_tests 12 | for item in ${IN}/*; 13 | do 14 | pkg_count=$((pkg_count + 1)); 15 | run_tests $item > ${OUT}/$pkg_count.log 16 | done 17 | 18 | echo 'done'; 19 | -------------------------------------------------------------------------------- /test/pash_tests/6_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: four-letter words 3 | # set -e 4 | 5 | # the original script has both versions 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_2/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p "$OUT" 10 | 11 | for input in $(ls ${IN} | head -n ${ENTRIES}) 12 | do 13 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^....$' > ${OUT}/${input}.out0 14 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^....$' > ${OUT}/${input}.out1 15 | done 16 | 17 | echo 'done'; 18 | rm -rf "${OUT}" 19 | -------------------------------------------------------------------------------- /test/pash_tests/uniq-c.2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | END_OF_1=$(tail -n 1 "$1") 4 | END_NUM=$(echo "$END_OF_1" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") 5 | END_WORD=$(echo "$END_OF_1" | sed 's/^[ ]*[0-9]*[ ]*//g') 6 | 7 | START_OF_2=$(head -n 1 "$2") 8 | START_NUM=$(echo "$START_OF_2" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") 9 | START_WORD=$(echo "$START_OF_2" | sed 's/^[ ]*[0-9]*[ ]*//g') 10 | 11 | if [[ $START_WORD == "$END_WORD" ]]; then 12 | TOTAL_NUM=$((START_NUM + END_NUM)) 13 | sed '$d' "$1" 14 | printf "%7s %s\n" "$TOTAL_NUM" "$START_WORD" 15 | sed '1d' "$2" 16 | else 17 | cat "$1" "$2" 18 | fi 19 | -------------------------------------------------------------------------------- /test/pash_tests/round_trip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "Usage: ${0##*/} program target" 5 | exit 2 6 | fi 7 | 8 | p=$1 9 | tgt=$2 10 | 11 | orig=$(${p} ${tgt} 2>&1) 12 | if [ "$?" -ne 0 ]; 13 | then echo "${tgt} FAILED, couldn't run (output: ${orig})"; exit 2 14 | fi 15 | 16 | rt=$(${p} ${tgt} | ${p} 2>&1) 17 | if [ "$?" -ne 0 ]; 18 | then echo "${tgt} FAILED round trip, couldn't run (output: $rt)"; exit 3 19 | fi 20 | 21 | if [ "${orig}" = "${rt}" ]; 22 | then echo ${tgt} OK; exit 0 23 | else 24 | echo ${tgt} FAILED 25 | echo ${orig} 26 | echo ========== 27 | echo ${rt} 28 | exit 1 29 | fi 30 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | SCRIPTS_DIR=$(PASH_TOP) 2 | 3 | PYTHON_FILES=../python/rt.py $(addprefix ../libdash/,__init__.py _dash.py ast.py parser.py printer.py) 4 | OCAML_FILES=../ocaml/rt.sh 5 | 6 | .PHONY : test clean 7 | 8 | test: test_ocaml_python.sh $(PYTHON_FILES) $(OCAML_FILES) 9 | @echo "LOCAL TESTS" 10 | @find tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee ocaml_python.log 11 | 12 | @echo "PASH TESTS" 13 | @find pash_tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee -a ocaml_python.log 14 | @cat ocaml_python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c 15 | 16 | clean : 17 | rm -f ocaml_python.log 18 | -------------------------------------------------------------------------------- /test/pash_tests/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x e 4 | 5 | export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} 6 | 7 | echo "Running intro tests..." 8 | cd "$PASH_TOP/evaluation/intro" 9 | ./test.sh 10 | 11 | echo "Running interface tests..." 12 | cd "$PASH_TOP/evaluation/tests/interface_tests" 13 | ./run.sh 14 | 15 | echo "Running compiler tests..." 16 | cd "$PASH_TOP/compiler" 17 | ./test_evaluation_scripts.sh 18 | 19 | echo "Running aggregator tests..." 20 | cd "$PASH_TOP/evaluation/tests/agg/" 21 | ./run.sh 22 | 23 | echo "Running aggregator tests..." 24 | cd "$PASH_TOP/runtime/agg/cpp/tests" 25 | ./test.sh 26 | -------------------------------------------------------------------------------- /test/pash_tests/pcap_bench.sh: -------------------------------------------------------------------------------- 1 | INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} 2 | INPUT2=${INPUT2:-$PASH_TOP/evaluation/scripts/input/2018-07-20-17-31-20-192.168.100.108.pcap} 3 | tcpdump -nn -r ${INPUT} -A 'port 53'| sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null 4 | tcpdump -nn -r ${INPUT} -A 'port 53'| sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null 5 | # without the pipes, bash takes 11 sec, with pipes, it takes 12 sec, same performance 6 | # with pash 7 | time tcpdump -nn -r ${INPUT2} -A -c 1000000 > /dev/null 8 | time tcpdump -nn -r ${INPUT2} -A -c 1000000 | sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null 9 | -------------------------------------------------------------------------------- /test/pash_tests/trigrams.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A somewhat suboptimal way of calculating 3-grams. 4 | # Part of the intention is to highlight overheads of tagging each stream element 5 | 6 | IN=./input/1G.txt 7 | OUT=./output/out.txt 8 | 9 | mkfifo s2 s3 10 | 11 | cat $IN | 12 | # head -n 2 | 13 | sed 's/[^a-zA-Z0-9]/ /g' | 14 | tr -cs A-Za-z '\n' | 15 | tr A-Z a-z | 16 | tee s2 | 17 | tail +2 | 18 | paste s2 - | # At this point the stream has two elements 19 | tee s3 | 20 | cut -f 1 | 21 | tail +3 | 22 | paste s3 - | # Joining (1) the first two words , (2) 23 | sort | 24 | uniq > $OUT 25 | rm s2 s3 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /test/failing/pash_set_from_to.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | from_set=${1?From set not given} 4 | to_set=${2?To set not given} 5 | 6 | ## Finds the difference of set variables (removing the c, s one since it cannot be actually set and unset) 7 | pash_redir_output echo "From set: $from_set" 8 | pash_redir_output echo "To set: $to_set" 9 | IFS=',' read -r pash_set_to_remove pash_set_to_add <<<"$("$RUNTIME_LIBRARY_DIR/set-diff" "$from_set" "$to_set")" 10 | pash_redir_output echo "To add: $pash_set_to_add" 11 | pash_redir_output echo "To remove: $pash_set_to_remove" 12 | pash_redir_all_output_always_execute set "-$pash_set_to_add" 13 | pash_redir_all_output_always_execute set "+$pash_set_to_remove" 14 | -------------------------------------------------------------------------------- /test/failing/multiply.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## Multiplies its stdin by -m times 4 | 5 | multiply_factor=1 6 | 7 | ## TODO: Implement 8 | size_limit=0 9 | 10 | while getopts 'm:l' opt; do 11 | case $opt in 12 | m) multiply_factor=$OPTARG ;; 13 | l) echo "Option -l not implemented yet" 14 | exit 1 ;; 15 | *) echo 'Error in command line parsing' >&2 16 | exit 1 17 | esac 18 | done 19 | shift "$(( OPTIND - 1 ))" 20 | 21 | # echo "Mult by: $multiply_factor" 22 | 23 | temp_file="$(mktemp -u)" 24 | 25 | cat > temp_file 26 | 27 | for (( i = 0; i < $multiply_factor; i++ )); do 28 | cat temp_file 29 | done 30 | 31 | rm temp_file 32 | -------------------------------------------------------------------------------- /test/failing/exit_error.sh: -------------------------------------------------------------------------------- 1 | ( ( true ) 3>/dev/null/abc; echo $?; false); echo $? 2 | ({ true; } 3>/dev/null/abc; echo $?; false); echo $? 3 | (for i in 1; do true; done 3>/dev/null/abc; echo $?; false); echo $? 4 | (case x in (x) true ;; esac 3>/dev/null/abc; echo $?; false); echo $? 5 | (if true; then true; fi 3>/dev/null/abc; echo $?; false); echo $? 6 | (while false; do true; done 3>/dev/null/abc; echo $?; false); echo $? 7 | (until true; do true; done 3>/dev/null/abc; echo $?; false); echo $? 8 | (func() { true; } 3>/dev/null/abc && func; echo $?; false); echo $? 9 | func() { true; }; (func 3>/dev/null/abc; echo $?; false); echo $? 10 | (name_of_a_command_that_will_not_be_found; echo $?; false); echo $? -------------------------------------------------------------------------------- /test/pash_tests/spell-grep.sh: -------------------------------------------------------------------------------- 1 | set_diff() 2 | { 3 | grep -vx -f $1 - 4 | } 5 | 6 | dict=$PASH_TOP/evaluation/tests/input/sorted_words 7 | IN=$PASH_TOP/evaluation/tests/input/1M.txt 8 | 9 | cat $IN | 10 | # groff -t -e -mandoc -Tascii | # remove formatting commands 11 | col -bx | # remove backspaces / linefeeds 12 | tr -cs A-Za-z '\n' | 13 | tr A-Z a-z | # map upper to lower case 14 | tr -d '[:punct:]' | # remove punctuation 15 | sort | # put words in alphabetical order 16 | uniq | # remove duplicate words 17 | set_diff $dict # report words not in dictionary 18 | -------------------------------------------------------------------------------- /test/pash_tests/4_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: bigrams.sh 3 | # set -e 4 | 5 | # Bigrams (contrary to our version, this uses intermediary files) 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p "$OUT" 10 | 11 | for input in $(ls ${IN} | head -n ${ENTRIES}) 12 | do 13 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words 14 | tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords 15 | paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams 16 | done 17 | 18 | echo 'done'; 19 | rm -rf ${OUT} 20 | -------------------------------------------------------------------------------- /test/pash_tests/make-ec2.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Pair with ./suggest-ec2.sh 4 | 5 | main() { 6 | set -x 7 | aws ec2 run-instances \ 8 | --output text \ 9 | --query "Instances[0].InstanceId" \ 10 | --image-id "$PASH_AWS_EC2_AMI" \ 11 | --instance-type "$PASH_AWS_EC2_INSTANCE_TYPE" \ 12 | --key-name "$PASH_AWS_EC2_KEY_NAME" \ 13 | --security-group-ids "$PASH_AWS_EC2_SECURITY_GROUP" \ 14 | --monitoring "Enabled=false" \ 15 | --subnet-id "$PASH_AWS_EC2_SUBNET" \ 16 | --query 'Instances[0].InstanceId' \ 17 | --block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=$PASH_AWS_EC2_DISK_SIZE_GB}" \ 18 | --output text 19 | } 20 | 21 | main 22 | -------------------------------------------------------------------------------- /test/pash_tests/8.3_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: find_anagrams.sh 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_2/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | run_tests() { 11 | input=$1 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.types 13 | rev < ${OUT}/${input}.types > ${OUT}/${input}.types.rev 14 | sort ${OUT}/${input}.types ${OUT}/${input}.types.rev | uniq -c | awk "\$1 >= 2 {print \$2}" 15 | } 16 | 17 | export -f run_tests 18 | for input in $(ls ${IN} | head -n ${ENTRIES}) 19 | do 20 | run_tests $input > ${OUT}/${input}.out 21 | done 22 | 23 | echo 'done'; 24 | rm -rf "${OUT}" 25 | -------------------------------------------------------------------------------- /test/pash_tests/8_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: sort_words_by_num_of_syllables 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.1/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | run_tests() { 11 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.words 12 | tr -sc '[AEIOUaeiou\012]' ' ' < ${OUT}/${input}.words | awk '{print NF}' > ${OUT}/${input}.syl 13 | paste ${OUT}/${input}.syl ${OUT}/${input}.words | sort -nr | sed 5q 14 | } 15 | export -f run_tests 16 | for input in $(ls ${IN} | head -n ${ENTRIES}) 17 | do 18 | run_tests $input > ${OUT}/${input}.out 19 | done 20 | 21 | echo 'done'; 22 | rm -rf "${OUT}" 23 | -------------------------------------------------------------------------------- /test/pash_tests/wrap_cat.sh: -------------------------------------------------------------------------------- 1 | file1=1.out 2 | file2=2.out 3 | file3=3.out 4 | file4=4.out 5 | testFile=../../evaluation/scripts/input/10M.txt 6 | batchSize=70000 7 | 8 | mkfifo $file1 9 | mkfifo $file3 10 | 11 | ## 1. TODO: Deadlocks on merge + split (true) 12 | ## 2. Increasing batchsize deadlock 13 | ## 3. Improving wrap performance 14 | 15 | ../r_split $testFile $batchSize $file1 $file3 & 16 | # ../r_wrap cat < $file1 > $file3 & 17 | ../r_merge $file1 $file3 > $file4 18 | 19 | # cat $testFile > $file4 20 | 21 | # if cmp -s "$testFile" "$file4"; then 22 | # printf 'The file "%s" is the same as "%s"\n' "$file1" "$file3" 23 | # else 24 | # printf 'The file "%s" is different from "%s"\n' "$file1" "$file3" 25 | # fi 26 | 27 | rm -rf *.out 28 | -------------------------------------------------------------------------------- /test/pash_tests/6_7.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: verse_2om_3om_2instances 3 | # set -e 4 | # verses with 2 or more, 3 or more, exactly 2 instances of light. 5 | 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_7/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p "$OUT" 10 | 11 | for input in $(ls ${IN} | head -n ${ENTRIES}) 12 | do 13 | cat $IN/$input | grep -c 'light.\*light' > ${OUT}/${input}.out0 14 | cat $IN/$input | grep -c 'light.\*light.\*light' > ${OUT}/${input}.out1 15 | cat $IN/$input | grep 'light.\*light' | grep -vc 'light.\*light.\*light' > ${OUT}/${input}.out2 16 | done 17 | 18 | echo 'done'; 19 | rm -rf ${OUT} 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include COPYING Makefile.am autogen.sh configure.ac 2 | graft src 3 | exclude src/builtins.c src/builtins.h src/builtins.def 4 | exclude src/dash 5 | exclude src/init.c src/mkinit 6 | exclude src/mknodes src/mksignames src/mksyntax 7 | exclude src/nodes.c src/nodes.h 8 | exclude src/signames.c 9 | exclude src/syntax.c src/syntax.h 10 | exclude src/token.h 11 | prune src/.deps 12 | prune src/bltin/.deps 13 | prune src/mkinit.dSYM 14 | prune src/mknodes.dSYM 15 | prune src/mksignames.dSYM 16 | prune src/mksyntax.dSYM 17 | global-exclude *.dSYM *.o *.lo *.la *.py[cod] __pycache__ Makefile *.log .gitignore 18 | prune src/.libs 19 | prune ocaml/ 20 | prune test/ 21 | prune build/ 22 | exclude python/rt.py 23 | include libdash/libdash.so libdash/libdash.dylib -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "libdash" 3 | version = "0.3.1" 4 | authors = [ 5 | { name="Michael Greenberg", email="michael@greenberg.science" }, 6 | ] 7 | description = "Bindings for the dash shell as a library" 8 | readme = "README.md" 9 | license = { file="COPYING" } 10 | requires-python = ">=3.7" 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "Topic :: System :: System Shells", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: POSIX", 16 | ] 17 | 18 | [project.urls] 19 | "Homepage" = "https://github.com/mgree/libdash" 20 | "Bug Tracker" = "https://github.com/mgree/libdash/issues" 21 | 22 | [build-system] 23 | requires = ["setuptools>=61.0"] 24 | build-backend = "setuptools.build_meta" 25 | -------------------------------------------------------------------------------- /test/failing/process-aliases.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Extracting honest pipelines from all of GitHub's aliases 4 | # N.b.: Noisy data.. 5 | 6 | QUERY=$(cat <<'EOF' 7 | .headers on 8 | .mode csv 9 | SELECT * FROM alias 10 | EOF 11 | ) 12 | 13 | # https://zenodo.org/record/3778825#.X9YamKpKjRZ 14 | curl 'https://zenodo.org/record/3778825/files/results.db?download=1' > results.db # 3.2GB 15 | sqlite3 results.db <(echo $QUERY) | csvcut -c 4 pipelines.csv | awk '{$1=$1};1' | sort | uniq tee >( 16 | # Schwartzian transform 17 | awk -F'|' '{print NF,$0}' file | sort -nr | cut -d' ' -f2- > likely-longest-pipelines.txt 18 | ) >( 19 | tr '|' '\n' | awk '{$1=$1};1' | awk '{print $1}' | tr -cs 'A-Za-z' '\n' | sort | uniq -c | sort -rn > freq-commands.txt 20 | ) 21 | -------------------------------------------------------------------------------- /test/failing/bio3.sh: -------------------------------------------------------------------------------- 1 | # **Create the bowtie2 alignment database for the Arabidopsis genome** 2 | # https://bioinformaticsworkbook.org/Appendix/GNUparallel/GNU_parallel_examples.html#gsc.tab=0 3 | cd $PASH_TOP/evaluation/bio/input/bio3 4 | bowtie2-build TAIR10_chr_all.fas tair 5 | #theirs 6 | time parallel -j2 "bowtie2 --threads 4 -x tair -k1 -q -1 {1} -2 {2} -S {1/.}.sam >& {1/.}.log" ::: fastqfiles/*_1.fastq.gz :::+ fastqfiles/*_2.fastq.gz 7 | #ours 8 | paste <(find . -name "*_1.fastq.gz") <(find . -name "*_2.fastq.gz") | xargs -n \ 9 | 2 sh -c 'bowtie2 --threads 4 -x tair -k1 -q -1 "$1" -2 "$2" -S fifth_R1.sam' argv0 10 | 11 | -------------------------------------------------------------------------------- /test/failing/dgsh_tee.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | input=${1?"ERROR: dgsh-tee: No input file given"} 4 | output=${2?"ERROR: dgsh-tee: No output file given"} 5 | args=("${@:3}") 6 | 7 | # Set a default DISH_TOP in this directory if it doesn't exist 8 | PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} 9 | 10 | # TODO: Doable check if this is still needed. Turned off for distributed exection. 11 | # PR https://github.com/binpash/pash/pull/495 might've resolved it. 12 | # cleanup() 13 | # { 14 | # kill -SIGTERM $dgsh_tee_pid > /dev/null 2>&1 15 | # } 16 | # trap cleanup EXIT 17 | 18 | # $PASH_TOP/runtime/dgsh-tee -i "$input" -o "$output" $args & 19 | # dgsh_tee_pid=$! 20 | # wait $dgsh_tee_pid 21 | "$PASH_TOP"/runtime/dgsh-tee -i "$input" -o "$output" "${args[@]}" 22 | -------------------------------------------------------------------------------- /test/pash_tests/set-e-3.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | # individual command in a multi-command pipeline 3 | false | : 4 | echo passed pipeline 5 | # part of a compound list of an 'elif' 6 | if false; then :; elif false; then :; fi 7 | echo passed elif 8 | # non-subshell compound command whose exit status was the result 9 | # of a failure while -e was being ignored 10 | { false && : ; } 11 | echo passed compound-brace 12 | for i in a; do false && : ; done 13 | echo passed compound-for 14 | # case x in x) false && : ;; esac 15 | # echo passed compound-case 16 | if :; then false && : ; fi 17 | echo passed compound-if 18 | cont=y; while [ $cont = y ]; do cont=n; false && : ; done 19 | echo passed compound-while 20 | end=n; until [ $end = y ]; do end=y; false && : ; done 21 | echo passed compound-until -------------------------------------------------------------------------------- /test/pash_tests/tr_cs_wc_test.sh: -------------------------------------------------------------------------------- 1 | ## This script is used to experiment with how to get parallelism benefits from a bunch of Unix50 pipelines 2 | 3 | ## You have to run the following before running this script. 4 | ## The output should be 439M long 5 | ## Warning: Takes a long time 6 | ## cat $PASH_TOP/evaluation/unix50/4.txt | $PASH_TOP/runtime/multiply.sh -m 1000000 | pv > $PASH_TOP/evaluation/unix50/big_4.txt 7 | 8 | FILE="${PASH_TOP}/evaluation/unix50/big_4.txt" 9 | 10 | # cat $FILE | tr -s ' ' '\n' | grep 'x' | grep '\.' | wc -l 11 | 12 | cat $FILE | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l 13 | 14 | ## Possible solutions: 15 | ## 1. Make an aggregator for tr -s (This is the best solutoin) 16 | ## 2. Remove the -s since it is not actually necessary 17 | ## 3. Make an aggregator for wc (?) -------------------------------------------------------------------------------- /test/pash_tests/4_3b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #tag: count_trigrams.sh 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3b/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | run_tests() { 11 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words 12 | tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords 13 | tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 14 | paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | 15 | sort | uniq -c 16 | } 17 | export -f run_tests 18 | for input in $(ls ${IN} | head -n ${ENTRIES}) 19 | do 20 | run_tests $input > ${OUT}/${input}.trigrams 21 | done 22 | 23 | echo 'done'; 24 | rm -rf ${OUT} 25 | -------------------------------------------------------------------------------- /test/pash_tests/6.msg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # First command is almost always a generator 3 | set -e 4 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} 5 | 6 | 7 | # grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | grep -iw 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > /tmp/__shutdown.log && echo 'File written to /tmp__shutdown.log' 8 | # doesn't do much :/ 9 | grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | 10 | grep --regex 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > ${OUT}/shutdown.log 11 | -------------------------------------------------------------------------------- /test/pash_tests/identity.sh: -------------------------------------------------------------------------------- 1 | file1=1.out 2 | file2=2.out 3 | file3=3.out 4 | file4=4.out 5 | file5=5.out 6 | 7 | 8 | batchSize=10000 9 | testFile="/home/ubuntu/pash/evaluation/scripts/input/100M.txt" 10 | if ![ $1 -eq 0 ]; then 11 | testFile=@1 12 | fi 13 | if ![ $2 -eq 0 ]; then 14 | testFile=@2 15 | fi 16 | 17 | 18 | mkfifo $file1 19 | mkfifo $file2 20 | mkfifo $file3 21 | mkfifo $file4 22 | 23 | ../r_split $testFile $batchSize $file1 $file2 & 24 | 25 | ../r_wrap cat < $file1 > $file3 & 26 | ../r_wrap cat < $file2 > $file4 & 27 | 28 | ../r_merge $file3 $file4 > $file5 29 | 30 | if cmp -s "$testFile" "$file5"; then 31 | printf 'The file "%s" is the same as "%s"\n' "$testFile" "$file5" 32 | else 33 | printf 'The file "%s" is different from "%s"\n' "$testFile" "$file5" 34 | fi 35 | 36 | rm -rf *.out -------------------------------------------------------------------------------- /test/failing/pash_source_declare_vars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This sources variables that were produced from `declare -p` 4 | 5 | ## TODO: Fix this to not source read only variables 6 | ## TODO: Does this work with arrays 7 | 8 | ## TODO: Fix this to not source pash variables so as to not invalidate PaSh progress 9 | 10 | ## TODO: Fix this filtering 11 | 12 | filter_vars_file() 13 | { 14 | cat "$1" | grep -v "^declare -\([A-Za-z]\|-\)* \(pash\|BASH\|LINENO\|EUID\|GROUPS\)" 15 | } 16 | 17 | ## TODO: Error handling if the argument is empty? 18 | if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then 19 | > /dev/null 2>&1 "$@" 20 | else 21 | if [ "$PASH_REDIR" == '&2' ]; then 22 | >&2 source <(filter_vars_file "$1") 23 | else 24 | >>"$PASH_REDIR" 2>&1 source <(filter_vars_file "$1") 25 | fi 26 | fi 27 | -------------------------------------------------------------------------------- /test/pash_tests/8.3_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: compare_exodus_genesis.sh 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | INPUT2=${INPUT2:-$PASH_TOP/evaluation/benchmarks/nlp/input/exodus} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_3/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p $OUT 10 | 11 | run_tests() { 12 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}1.types 13 | tr -sc '[A-Z][a-z]' '[\012*]' < ${INPUT2} | sort -u > ${OUT}/${input}2.types 14 | sort $OUT/${input}1.types ${OUT}/${input}2.types ${OUT}/${input}2.types | uniq -c | head 15 | 16 | } 17 | export -f run_tests 18 | for input in $(ls ${IN} | head -n ${ENTRIES}) 19 | do 20 | run_tests $input > ${OUT}/${input}.out 21 | done 22 | 23 | echo 'done'; 24 | rm -rf "${OUT}" 25 | -------------------------------------------------------------------------------- /test/pash_tests/eager.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | input=${1?"ERROR: Eager: No input file given"} 4 | output=${2?"ERROR: Eager: No output file given"} 5 | intermediate_file=${3?"ERROR: Eager: No intermediate file given"} 6 | 7 | # Set a default DISH_TOP in this directory if it doesn't exist 8 | PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} 9 | # TODO: Doable check if this is still needed. Turned off for distributed exection. 10 | # PR https://github.com/binpash/pash/pull/495 might've resolved it. 11 | # cleanup() 12 | # { 13 | # kill -SIGPIPE $eager_pid > /dev/null 2>&1 14 | # } 15 | # trap cleanup EXIT 16 | 17 | # $PASH_TOP/runtime/eager "$input" "$output" "$intermediate_file" & 18 | # eager_pid=$! 19 | # wait $eager_pid 20 | "$PASH_TOP"/runtime/eager "$input" "$output" "$intermediate_file" 21 | rm "$intermediate_file" 22 | -------------------------------------------------------------------------------- /test/pash_tests/spell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Calculate mispelled words in an input 3 | # https://dl.acm.org/doi/10.1145/3532.315102 4 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} 5 | dict=${dict:-$PASH_TOP/evaluation/benchmarks/oneliners/input/dict.txt} 6 | 7 | cat $IN | 8 | iconv -f utf-8 -t ascii//translit | # remove non utf8 characters 9 | # groff -t -e -mandoc -Tascii | # remove formatting commands 10 | col -bx | # remove backspaces / linefeeds 11 | tr -cs A-Za-z '\n' | 12 | tr A-Z a-z | # map upper to lower case 13 | tr -d '[:punct:]' | # remove punctuation 14 | sort | # put words in alphabetical order 15 | uniq | # remove duplicate words 16 | comm -23 - $dict # report words not in dictionary 17 | -------------------------------------------------------------------------------- /test/pash_tests/get-summary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo 'GNU Coreutils ('$(cat coreutils-summary.txt | wc -l | awk '{$1=$1};1') 'commands):' 4 | echo ' S:' $(cat coreutils-summary.txt | grep ' S ' | wc -l) 5 | echo ' P:' $(cat coreutils-summary.txt | grep ' P ' | wc -l) 6 | echo ' N:' $(cat coreutils-summary.txt | grep ' N ' | wc -l) 7 | echo ' E:' $(cat coreutils-summary.txt | grep ' E ' | wc -l) 8 | 9 | echo 'POSIX ('$(( $(cat posix-summary.txt | wc -l | awk '{$1=$1};1') + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) 'commands):' 10 | echo ' S:' $(cat posix-summary.txt | grep ' S ' | wc -l) 11 | echo ' P:' $(cat posix-summary.txt | grep ' P ' | wc -l) 12 | echo ' N:' $(cat posix-summary.txt | grep ' N ' | wc -l) 13 | echo ' E:' $(( $(cat posix-summary.txt | grep ' E ' | wc -l) + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) 14 | 15 | -------------------------------------------------------------------------------- /test/pash_tests/tr-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This test contains all occurences of tr (to test the annotation) 4 | 5 | FILE="$PASH_TOP/evaluation/tests/input/1M.txt" 6 | 7 | cat $FILE | tr -d ',' 8 | cat $FILE | tr '[A-Z]' '[a-z]' 9 | cat $FILE | tr -s ' ' '\n' 10 | cat $FILE | tr '[a-z]' 'P' 11 | cat $FILE | tr -c "[a-z][A-Z]" '\n' 12 | cat $FILE | tr ' ' '\n' 13 | cat $FILE | tr '[a-z]' '\n' 14 | ## This is a bit tricky but `tr -d '\n'` is pure because after it is done there is only one line. 15 | cat $FILE | tr -d '\n' | grep "the" 16 | cat $FILE | tr -c '[A-Z]' '\n' 17 | cat $FILE | tr " " " " 18 | cat $FILE | tr -cs A-Za-z '\n' 19 | cat $FILE | tr A-Z a-z 20 | cat $FILE | tr -d '[:punct:]' 21 | cat $FILE | tr [:lower] [:upper] 22 | cat $FILE | tr [:lower:] [:upper:] 23 | cat $FILE | tr -s ' ' 24 | cat $FILE | tr -s ' \n' 25 | cat $FILE | tr -d '\012' | sort 26 | -------------------------------------------------------------------------------- /test/pash_tests/r-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | testFile=$PASH_TOP/evaluation/scripts/input/1G.txt 14 | batchSize=10000000 15 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 16 | if [ "$#" -gt "0" ] 17 | then 18 | testFile=$1 19 | fi 20 | if [ "$#" -gt "1" ]; then 21 | batchSize=$2 22 | fi 23 | 24 | mkfifo $file1 25 | mkfifo $file2 26 | mkfifo $file3 27 | mkfifo $file4 28 | mkfifo $file5 29 | mkfifo $file6 30 | 31 | 32 | $PASH_TOP/runtime/r_split -r $testFile $batchSize $file3 $file4 & 33 | 34 | 35 | # $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & 36 | # $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & 37 | 38 | wc $file3 > $file5 & 39 | wc $file4 > $file6 & 40 | 41 | ./merge-wc.sh $file5 $file6 42 | 43 | 44 | rm -rf *out -------------------------------------------------------------------------------- /test/failing/maximal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compatibility test, contains most of the features we'll need 4 | 5 | # Cute intro to brackets, for anyone interested 6 | # https://www.assertnotmagic.com/2018/06/20/bash-brackets-quick-reference/ 7 | 8 | OUT=./output/maximal.txt 9 | touch $OUT 10 | 11 | exec 3<&0 12 | exec 0< <(cat ./README.md) 13 | cat 14 | exec 0<&3 15 | exec 3<&- 16 | 17 | echo <(true) >(true) 18 | 19 | echo "start"; ls -l . | grep '.sh' | wc -l; echo "..scripts found here" > $OUT 20 | { echo "start"; 21 | echo $(ls -l .) | grep '.sh' | wc -l; 22 | echo "..scripts found here" 23 | } > $OUT 24 | 25 | { ls -R ../ | sort -rn | uniq | head; } > /dev/null 2>&1 & 26 | 27 | tee >(wc -l >&2) < $( echo $OUT ) | gzip > $OUT.gz 28 | 29 | # "optional" AND and OR composition operators 30 | [ -f 'pizza.123' ] && ( echo 'exists' >$OUT ) || { echo 'does not' >$OUT; } 31 | 32 | wait 33 | 34 | -------------------------------------------------------------------------------- /test/pash_tests/8.2_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: bigrams_appear_twice.sh 3 | # set -e 4 | 5 | # Calculate the bigrams (based on 4_3.sh script) 6 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 7 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_2/} 8 | ENTRIES=${ENTRIES:-1060} 9 | mkdir -p "$OUT" 10 | 11 | run_tests() { 12 | input=$1 13 | cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words 14 | tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords 15 | paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams 16 | awk "\$1 == 2 {print \$2, \$3}" ${OUT}/${input}.input.bigrams 17 | } 18 | 19 | export -f run_tests 20 | for input in $(ls ${IN} | head -n ${ENTRIES}) 21 | do 22 | run_tests $input > ${OUT}/${input}.out 23 | done 24 | 25 | echo 'done'; 26 | rm -rf "${OUT}" 27 | -------------------------------------------------------------------------------- /test/pash_tests/temp-analytics.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FROM=${FROM:-2015} 4 | TO=${TO:-2015} 5 | IN=${IN:-'http://ndr.md/data/noaa/'} 6 | fetch=${fetch:-"curl -s"} 7 | 8 | data_file=temperatures.txt 9 | 10 | ## Downloading and extracting 11 | seq $FROM $TO | 12 | sed "s;^;$IN;" | 13 | sed 's;$;/;' | 14 | xargs -r -n 1 $fetch | 15 | grep gz | 16 | tr -s ' \n' | 17 | cut -d ' ' -f9 | 18 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | 19 | sed "s;^;$IN;" | 20 | xargs -n1 curl -s | 21 | gunzip > "${data_file}" 22 | 23 | ## Processing 24 | cat "${data_file}" | 25 | cut -c 89-92 | 26 | grep -v 999 | 27 | sort -rn | 28 | head -n1 > max.txt 29 | 30 | cat "${data_file}" | 31 | cut -c 89-92 | 32 | grep -v 999 | 33 | sort -n | 34 | head -n1 > min.txt 35 | 36 | cat "${data_file}" | 37 | cut -c 89-92 | 38 | grep -v 999 | 39 | awk "{ total += \$1; count++ } END { print total/count }" > average.txt 40 | -------------------------------------------------------------------------------- /test/pash_tests/grep_f_script.sh: -------------------------------------------------------------------------------- 1 | mkfifo s1 s2 s3 s4 s5 2 | 3 | export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} 4 | 5 | IN="$PASH_TOP/evaluation/tests/input/1M.txt" 6 | 7 | sorted_in="/tmp/sorted.in" 8 | 9 | sort $IN > $sorted_in 10 | 11 | echo " king" | tee s4 >s3 & 12 | grep -vx -f s3 - > s1 < $sorted_in & 13 | grep -vx -f s4 - > s2 < $sorted_in & 14 | ## The eager is essential here or after tee to ensure non-deadlocks 15 | { "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } 16 | cat s1 s5 > grep-f.out 17 | 18 | echo " king" | tee s4 >s3 & 19 | comm -13 s3 - > s1 < $sorted_in & 20 | comm -13 s4 - > s2 < $sorted_in & 21 | ## The eager is essential here or after tee to ensure non-deadlocks 22 | { "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } 23 | cat s1 s5 > comm.out 24 | 25 | rm s1 s2 s3 s4 s5 26 | 27 | diff grep-f.out comm.out 28 | 29 | -------------------------------------------------------------------------------- /libdash.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "Bindings to the dash shell's parser" 4 | maintainer: ["michael@greenberg.science"] 5 | authors: ["Michael Greenberg"] 6 | license: "BSD-3-Clause" 7 | homepage: "https://github.com/mgree/libdash" 8 | bug-reports: "https://github.com/mgree/libdash/issues" 9 | depends: [ 10 | "dune" {>= "3.12"} 11 | "ctypes" {>= "0.21.1"} 12 | "ctypes-foreign" {>= "0.21.1"} 13 | "atdgen" {>= "2.15.0"} 14 | "conf-autoconf" {>= "0.1"} 15 | "conf-aclocal" {>= "2"} 16 | "conf-automake" {>= "1"} 17 | "conf-libtool" {>= "1"} 18 | "odoc" {with-doc} 19 | ] 20 | build: [ 21 | ["dune" "subst"] {dev} 22 | [ 23 | "dune" 24 | "build" 25 | "-p" 26 | name 27 | "-j" 28 | jobs 29 | "@install" 30 | "@runtest" {with-test} 31 | "@doc" {with-doc} 32 | ] 33 | ] 34 | dev-repo: "git+https://github.com/mgree/libdash.git" 35 | -------------------------------------------------------------------------------- /ocaml/rt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | : ${SHELL_TO_JSON=shell_to_json} 4 | if ! type shell_to_json >/dev/null 2>&1 5 | then 6 | SHELL_TO_JSON=$(dirname $0)/$SHELL_TO_JSON 7 | fi 8 | 9 | : ${JSON_TO_SHELL=json_to_shell} 10 | if ! type json_to_shell >/dev/null 2>&1 11 | then 12 | JSON_TO_SHELL=$(dirname $0)/json_to_shell 13 | fi 14 | 15 | if [ $# -ne 1 ] 16 | then 17 | echo "Usage: $0 testFile" >&2 18 | exit 1 19 | fi 20 | 21 | testFile="$1" 22 | 23 | if [ ! -f "$testFile" ] 24 | then 25 | echo "Error: cannot read '$testFile'!" >&2 26 | exit 1 27 | fi 28 | 29 | json=$(mktemp) 30 | 31 | "$SHELL_TO_JSON" "$testFile" >"$json" 32 | if [ $? -ne 0 ] 33 | then 34 | echo "OCAML_PARSE_ABORT: '$testFile'" >&2 35 | exit 1 36 | fi 37 | 38 | rt=$(mktemp) 39 | 40 | "$JSON_TO_SHELL" "$json" >"$rt" 41 | if [ $? -ne 0 ] 42 | then 43 | echo "OCAML_UNPARSE_ABORT: '$testFile' -> '$json'" >&2 44 | exit 1 45 | fi 46 | 47 | cat "$rt" 48 | -------------------------------------------------------------------------------- /test/failing/auto-split.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | input="$1" 4 | shift 5 | outputs=("$@") 6 | n_outputs="$#" 7 | 8 | # Set a default DISH_TOP in this directory if it doesn't exist 9 | PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} 10 | # generate a temp file 11 | temp="$(mktemp -u /tmp/pash_XXXXXXXXXX)" 12 | 13 | cat "$input" > "$temp" 14 | total_lines=$(wc -l "$temp" | cut -f 1 -d ' ') 15 | batch_size=$((total_lines / n_outputs)) 16 | # echo "Input: $input" 17 | # echo "Ouputs: $outputs" 18 | # echo "Number of outputs: $n_outputs" 19 | # echo "Total Lines: $total_lines" 20 | # echo "Batch Size: $batch_size" 21 | 22 | cleanup() 23 | { 24 | kill -SIGPIPE "$split_pid" > /dev/null 2>&1 25 | } 26 | trap cleanup EXIT 27 | 28 | 29 | # echo "$PASH_TOP/evaluation/tools/split $input $batch_size $outputs" 30 | "$PASH_TOP"/runtime/split "$temp" "$batch_size" "${outputs[@]}" & 31 | split_pid=$! 32 | wait "$split_pid" 33 | rm -f "$temp" 34 | -------------------------------------------------------------------------------- /test/pash_tests/pcap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #tag: pcap analysis 3 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data} 4 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/pcap-analysis} 5 | LOGS=${OUT}/logs 6 | mkdir -p ${LOGS} 7 | run_tests() { 8 | INPUT=$1 9 | /usr/sbin/tcpdump -nn -r ${INPUT} -A 'port 53' 2> /dev/null | sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' 2> /dev/null 10 | # extract URL 11 | /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -v -n -l 2> /dev/null | egrep -i "POST /|GET /|Host:" 2> /dev/null 12 | # extract passwords 13 | /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -A -n -l 2> /dev/null | egrep -i "POST /|pwd=|passwd=|password=|Host:" 2> /dev/null 14 | } 15 | export -f run_tests 16 | 17 | pkg_count=0 18 | 19 | for item in ${IN}/*; 20 | do 21 | pkg_count=$((pkg_count + 1)); 22 | run_tests $item > ${LOGS}/${pkg_count}.log 23 | done 24 | 25 | echo 'done'; 26 | -------------------------------------------------------------------------------- /test/pash_tests/split_pcap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # To process large pcap file, usually it is better to split it into small chunks first, 3 | # then process every chunk in parallel. 4 | INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} 5 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/scripts/input/out.pcap} 6 | split_size=1000 7 | output_index=1 8 | loop_count=10 9 | exit_flag=0 10 | 11 | command() { 12 | echo "$1" "$2" 13 | } 14 | 15 | tcpdump -r ${INPUT} -w ${OUTPUT} -C ${split_size} 16 | 17 | command ${OUTPUT} 18 | 19 | while : 20 | do 21 | loop_index=0 22 | while test ${loop_index} -lt ${loop_count} 23 | do 24 | if test -e ${OUTPUT}${output_index} 25 | then 26 | command ${OUTPUT} ${output_index} 27 | output_index=$((output_index + 1)) 28 | loop_index=$((loop_index + 1)) 29 | else 30 | exit_flag=1 31 | break 32 | fi 33 | done 34 | wait 35 | 36 | if test ${exit_flag} -eq 1 37 | then 38 | exit 0 39 | fi 40 | done 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore for dash 2 | 3 | # generated by autogen.sh 4 | Makefile.in 5 | /aclocal.m4 6 | /autom4te.cache/ 7 | /compile 8 | /config.h.in 9 | /configure 10 | /depcomp 11 | /install-sh 12 | /missing 13 | 14 | # generated by configure 15 | Makefile 16 | .deps 17 | .dirstamp 18 | /config.cache 19 | /config.h 20 | /config.log 21 | /config.status 22 | /stamp-h1 23 | 24 | # generated by make 25 | /src/builtins.h 26 | /src/nodes.h 27 | /src/syntax.h 28 | /src/token.h 29 | /src/token_vars.h 30 | 31 | # generated by dune 32 | _build 33 | 34 | # Apple debug symbol bundles 35 | *.dSYM/ 36 | 37 | # backups and patch artefacts 38 | *~ 39 | *.bak 40 | *.orig 41 | *.rej 42 | 43 | # OS generated files 44 | .DS_Store 45 | .DS_Store? 46 | ._* 47 | .Spotlight* 48 | .Trash* 49 | *[Tt]humbs.db 50 | ar-lib 51 | config.* 52 | src/libdash.a 53 | *.lo 54 | *.dylib 55 | m4 56 | libtool 57 | ltmain.sh 58 | ocamlprof.dump 59 | __pycache__ 60 | libdash.egg-info 61 | dist 62 | build 63 | -------------------------------------------------------------------------------- /test/pash_tests/update-img.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 4 | # Repackage updated pash docker image to latest commit 5 | ### 6 | 7 | cd $(dirname $0) 8 | 9 | # Assumes a pash image exists already 10 | # curl img.pash.ndr.md | docker load; docker run --name pash-playground -it pash/18.04 11 | 12 | docker start pash-playground 13 | docker exec pash-playground bash -c 'cd /pash; git pull' 14 | docker stop pash-playground 15 | 16 | 17 | docker commit $(docker ps -a | grep pash-playground | cut -f1 -d' ') pash/18.04:latest 18 | docker save pash/18.04:latest | gzip > pash-docker.tar.gz 19 | 20 | if [[ "$(hostname)" == "beta" ]]; then 21 | # This assumes you're on beta 22 | mv pash-docker.tar.gz /var/www/pash-web/ 23 | fi 24 | 25 | docker build -t pash-play ../ 26 | 27 | if [[ ./token.txt ]]; then 28 | cat ~/token.txt | docker login https://docker.pkg.github.com -u nvasilakis --password-stdin 29 | fi 30 | 31 | docker push docker.pkg.github.com/andromeda/pash/play:latest -------------------------------------------------------------------------------- /test/failing/run_alias.sh: -------------------------------------------------------------------------------- 1 | # parses the generated.file, and creates a log of the commands that were executed 2 | # successfully (succ.txt) and the failed ones (err.txt) 3 | 4 | cd $PASH_TOP/evaluation/scripts/input/ 5 | # we could read the file iteratively with IFS, but the environment was affected 6 | IFS=$'\r\n' GLOBIGNORE='*' command eval 'cmd_array=($(cat generated.file))' 7 | lc=$(cat generated.file | wc -l) 8 | for i in $(seq 0 $lc) 9 | do 10 | # get the entry from the array 11 | p=${cmd_array[$i]} 12 | # add a timeout to our script 13 | timeout --signal=SIGINT 50s /bin/bash -e $p >> /dev/null 2>&1 #./cmd.sh #eval "bash ./cmd.sh" 14 | ## get status ## 15 | status=$? 16 | if [ $status -eq 0 ]; then 17 | echo $p >> $PASH_TOP/evaluation/scripts/input/succ.txt 18 | else 19 | echo $p >> $PASH_TOP/evaluation/scripts/input/err.txt 20 | fi 21 | if ! ((i % 100)); then 22 | echo $i 23 | fi 24 | done 25 | echo "Done" 26 | -------------------------------------------------------------------------------- /test/pash_tests/suggest-ec2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Suggests envvars for use in ./make-ec2.sh 3 | 4 | main() { 5 | local vpc_id="$(aws ec2 describe-vpcs --output text --query 'Vpcs[0].VpcId')"; 6 | local key_name="$(aws ec2 describe-key-pairs --output text --query 'KeyPairs[0].KeyName')"; 7 | local subnet="$(aws ec2 describe-subnets --output text --query 'Subnets[0].SubnetId' --filter Name=vpc-id,Values=$vpc_id)"; 8 | local sg="$(aws ec2 describe-security-groups --output text --filter Name=vpc-id,Values=$vpc_id --query 'SecurityGroups[0].GroupId')"; 9 | 10 | echo "export PASH_AWS_EC2_AMI='ami-0d739ceed1874f156';"; 11 | echo "export PASH_AWS_EC2_INSTANCE_TYPE='t2.micro';"; 12 | echo "export PASH_AWS_EC2_VPC_ID='$vpc_id';"; 13 | echo "export PASH_AWS_EC2_KEY_NAME='$key_name';"; 14 | echo "export PASH_AWS_EC2_SUBNET='$subnet';"; 15 | echo "export PASH_AWS_EC2_SECURITY_GROUP='$sg';"; 16 | echo "export PASH_AWS_EC2_DISK_SIZE_GB='10';"; 17 | } 18 | 19 | main 20 | -------------------------------------------------------------------------------- /test/pash_tests/dgsh-wc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | testFile=$PASH_TOP/evaluation/scripts/input/1G.txt 14 | batchSize=10000000 15 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 16 | if [ "$#" -gt "0" ] 17 | then 18 | testFile=$1 19 | fi 20 | if [ "$#" -gt "1" ]; then 21 | batchSize=$2 22 | fi 23 | 24 | mkfifo $file1 25 | mkfifo $file2 26 | mkfifo $file3 27 | mkfifo $file4 28 | mkfifo $file5 29 | mkfifo $file6 30 | 31 | 32 | $PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & 33 | 34 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 10M & 35 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 10M & 36 | # $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & 37 | # $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & 38 | 39 | wc $file3 > $file5 & 40 | wc $file4 > $file6 & 41 | 42 | ./merge-wc.sh $file5 $file6 43 | 44 | 45 | rm -rf *out -------------------------------------------------------------------------------- /test/pash_tests/6_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tag: trigram_rec 3 | # set -e 4 | 5 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} 6 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1/} 7 | ENTRIES=${ENTRIES:-1060} 8 | mkdir -p "$OUT" 9 | 10 | trigrams() { 11 | input=$1 12 | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words 13 | tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords 14 | tail +3 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 15 | paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | sort | uniq -c 16 | rm -f ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 17 | } 18 | export -f trigrams 19 | 20 | for input in $(ls ${IN} | head -n ${ENTRIES}) 21 | do 22 | cat $IN"/"$input | grep 'the land of' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out0 23 | cat $IN"/"$input | grep 'And he said' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out1 24 | done 25 | 26 | echo 'done'; 27 | rm -rf "${OUT}" 28 | -------------------------------------------------------------------------------- /test/failing/timing.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | input_script='/pash/compiler/parser/libdash/ltmain.sh' 5 | 6 | 7 | if [ $# -eq 1 ] 8 | then 9 | input_script="$1" 10 | fi 11 | 12 | 13 | echo "Input script: $input_script" 14 | echo 15 | 16 | echo "OCaml (dash C AST -> libdash OCaml AST -> JSON -> Pash Python AST -> JSON -> shell:" 17 | time (../parse_to_json.native "$input_script" > /tmp/json.$$; cat /tmp/json.$$ | ../json_to_shell.native | md5sum) 18 | echo 19 | 20 | echo "C (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" 21 | time (./parse_to_json2 "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | ./json_to_shell2 | md5sum) 22 | echo 23 | 24 | echo "Python (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" 25 | time (python3 ./parse_to_json2.py "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | python3 ./json_to_shell2.py | md5sum) 26 | echo 27 | 28 | echo "Python (dash C AST -> Pash Python AST -> shell):" 29 | time (python3 ceda_rt.py "$input_script" | md5sum) 30 | echo 31 | 32 | -------------------------------------------------------------------------------- /test/pash_tests/distrotest_funs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | die() 4 | { 5 | echo "$*" >&4; 6 | exit 1; 7 | } 8 | 9 | distrotest_loop() 10 | { 11 | while read -r distro setup 12 | do 13 | [[ "$distro" = "#"* || -z "$distro" ]] && continue 14 | 15 | printf '%s ' "$distro" # >&3 16 | docker pull "$distro" || die "Can't pull $distro" 17 | printf 'pulled. ' # >&3 18 | 19 | tmp=$(mktemp -d) || die "Can't make temp dir" 20 | cp -r "${SHELLCHECK_DIR}" "$tmp/" || die "Can't populate test dir" 21 | printf 'Result: ' # >&3 22 | < /dev/null docker run -v "$tmp:/mnt" "$distro" sh -c " 23 | $setup 24 | cd /mnt/shellcheck || exit 1 25 | test/buildtest 26 | " 27 | ret=$? 28 | if [ "$ret" = 0 ] 29 | then 30 | echo "OK" # >&3 31 | else 32 | echo "FAIL with $ret. See $log" # >&3 33 | final=1 34 | fi 35 | rm -rf "$tmp" 36 | done 37 | } 38 | 39 | export -f die 40 | export -f distrotest_loop 41 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | 3 | language: generic 4 | 5 | env: 6 | - OCAML_VERSION=4.09 7 | - OCAML_VERSION=4.10 8 | - OCAML_VERSION=4.11 9 | 10 | os: 11 | - freebsd 12 | - linux 13 | - osx 14 | 15 | jobs: 16 | include: 17 | - os: linux 18 | env: OCAML_VERSION=4.11 INSTALL_LOCAL=1 19 | 20 | cache: 21 | directories: 22 | - $HOME/.opam 23 | 24 | addons: 25 | apt: 26 | packages: 27 | - autoconf 28 | - autotools-dev 29 | - libtool 30 | - pkg-config 31 | - libffi-dev 32 | homebrew: 33 | packages: 34 | - autoconf 35 | - automake 36 | - libtool 37 | - pkg-config 38 | - libffi 39 | 40 | install: 41 | - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh 42 | - bash -ex .travis-ocaml.sh 43 | 44 | script: 45 | - opam pin -y -v -t add . 46 | - ls -l $(opam var libdash:lib) 47 | - cat $(opam var libdash:lib)/META 48 | - opam exec -- make -C test test 49 | - opam uninstall libdash # clear the cache 50 | 51 | -------------------------------------------------------------------------------- /test/pash_tests/tailprogs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A bash script for finding the 10 longest scripts 4 | # (TODO: `group_by` script type?) 5 | 6 | # From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 7 | 8 | # Data: 9 | # Assumes a full list of commands 10 | # 11 | # # simple, from a single dir: 12 | # echo "$( 13 | # ls /usr/bin/* 14 | # )" > all_cmds.txt 15 | # 16 | # # Or more complicated, from $PATH: 17 | # 18 | # echo "$( 19 | # case "$PATH" in 20 | # (*[!:]:) PATH="$PATH:" ;; 21 | # esac 22 | # 23 | # set -f; IFS=: 24 | # for dir in $PATH; do 25 | # set +f 26 | # [ -z "$dir" ] && dir="." 27 | # for file in "$dir"/*; do 28 | # if [ -x "$file" ] && ! [ -d "$file" ]; then 29 | # printf '%s = %s\n' "${file##*/}" "$file" 30 | # fi 31 | # done 32 | # done 33 | # )" > ./input/allcmds.txt 34 | 35 | IN=./input/cmds10x.txt 36 | OUT=./output/out.txt 37 | 38 | ls /usr/bin/* > $IN 39 | 40 | cat $IN | 41 | xargs file | 42 | grep "shell script" | 43 | cut -d: -f1 | 44 | xargs wc -l | 45 | sort -rn | 46 | head -n 25 > $OUT 47 | -------------------------------------------------------------------------------- /test/failing/up.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # clone and setup pash 4 | # N.b. This is a .sh script 5 | 6 | set -e 7 | 8 | # will install dependencies locally. 9 | PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') 10 | URL='https://github.com/binpash/pash/archive/refs/heads/main.zip' 11 | VERSION='latest' 12 | DL=$(command -v curl >/dev/null 2>&1 && echo curl || echo 'wget -qO-') 13 | 14 | cmd_exists () { 15 | command -v $1 >/dev/null 2>&1 && echo 'true' || echo 'false'; 16 | } 17 | 18 | if [ "$PLATFORM" = "darwin" ]; then 19 | echo 'PaSh is not yet well supported on OS X' 20 | exit 1 21 | fi 22 | 23 | set +e 24 | git clone git@github.com:binpash/pash.git 25 | if [ $? -ne 0 ]; then 26 | echo 'SSH clone failed; attempting HTTPS' 27 | git clone https://github.com/andromeda/pash.git 28 | fi 29 | set -e 30 | 31 | cd pash/scripts 32 | # git checkout s3 # FIXME only for testing while PR is up 33 | 34 | if [ $(groups $(whoami) | grep -c "sudo\|root\|admin") -ge 1 ]; then 35 | # only run this if we are in the sudo group (or it's doomed to fail) 36 | bash distro-deps.sh 37 | fi 38 | bash setup-pash.sh 39 | -------------------------------------------------------------------------------- /src/bltin/times.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1999 Herbert Xu 3 | * This file contains code for the times builtin. 4 | */ 5 | 6 | #include 7 | #include 8 | #ifdef USE_GLIBC_STDIO 9 | #include 10 | #else 11 | #include "bltin.h" 12 | #endif 13 | #include "system.h" 14 | 15 | int timescmd() { 16 | struct tms buf; 17 | long int clk_tck = sysconf(_SC_CLK_TCK); 18 | int mutime, mstime, mcutime, mcstime; 19 | double utime, stime, cutime, cstime; 20 | 21 | times(&buf); 22 | 23 | utime = (double)buf.tms_utime / clk_tck; 24 | mutime = utime / 60; 25 | utime -= mutime * 60.0; 26 | 27 | stime = (double)buf.tms_stime / clk_tck; 28 | mstime = stime / 60; 29 | stime -= mstime * 60.0; 30 | 31 | cutime = (double)buf.tms_cutime / clk_tck; 32 | mcutime = cutime / 60; 33 | cutime -= mcutime * 60.0; 34 | 35 | cstime = (double)buf.tms_cstime / clk_tck; 36 | mcstime = cstime / 60; 37 | cstime -= mcstime * 60.0; 38 | 39 | printf("%dm%fs %dm%fs\n%dm%fs %dm%fs\n", mutime, utime, mstime, stime, 40 | mcutime, cutime, mcstime, cstime); 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /test/pash_tests/dgsh-raw-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | batchSize=10000000 14 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | mkfifo $file7 30 | mkfifo $file8 31 | 32 | $PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & 33 | 34 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file5 & 35 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file6 & 36 | 37 | sort < $file5 > $file7 & 38 | sort < $file6 > $file8 & 39 | 40 | sort -m $file7 $file8 41 | 42 | # cat $testFile | sort > $file8 43 | # if cmp -s "$file7" "$file8"; then 44 | # printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" 45 | # else 46 | # printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" 47 | # fi 48 | 49 | rm -rf *out -------------------------------------------------------------------------------- /ocaml/json_to_shell.ml: -------------------------------------------------------------------------------- 1 | (* This is straight-up copied from the libdash tests *) 2 | open Libdash 3 | 4 | let verbose = ref false 5 | let input_src : string option ref = ref None 6 | 7 | let parse_args () = 8 | Arg.parse 9 | [("-v",Arg.Set verbose,"verbose mode")] 10 | (function | "-" -> input_src := None | f -> input_src := Some f) 11 | "Final argument should be either a filename or empty (for STDIN); only the last such argument is used" 12 | 13 | let read_channel chan = 14 | let lines = ref [] in 15 | try 16 | while true; do 17 | lines := input_line chan :: !lines 18 | done; !lines 19 | with End_of_file -> 20 | close_in chan; 21 | List.rev !lines 22 | 23 | let read_lines () = 24 | match !input_src with 25 | | None -> read_channel stdin 26 | | Some filename -> read_channel (open_in filename) 27 | 28 | let parse_lines () : Ast.t list = 29 | let lines = read_lines () in 30 | List.map (fun line -> Ast_json.t_of_string line) lines 31 | 32 | 33 | let main () = 34 | parse_args (); 35 | let cs = parse_lines () in 36 | List.map (fun c -> print_endline (Ast.to_string c)) cs 37 | ;; 38 | 39 | main () 40 | -------------------------------------------------------------------------------- /test/failing/sq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Clever trick that uses the /dev/fd/xx pseudo-file system 3 | # https://stackoverflow.com/questions/40244/how-to-make-a-pipe-loop-in-bash 4 | 5 | # MMG 2022-06-30 the `function` kw is a bash-ism; leaving it in to not disrupt what gets optimized in previous evaluations 6 | function calc() { 7 | # calculate sum of squares of numbers 0,..,10 8 | 9 | sum=0 10 | for ((i=0; i<10; i++)); do 11 | echo $i # "request" the square of i 12 | 13 | read ii # read the square of i 14 | echo "got $ii" >&2 # debug message 15 | 16 | let sum=$sum+$ii 17 | done 18 | 19 | echo "sum $sum" >&2 # output result to stderr 20 | } 21 | 22 | function square() { 23 | # square numbers 24 | 25 | read j # receive first "request" 26 | while [ "$j" != "" ]; do 27 | let jj=$j*$j 28 | echo "square($j) = $jj" >&2 # debug message 29 | 30 | echo $jj # send square 31 | 32 | read j # receive next "request" 33 | done 34 | } 35 | 36 | read | { calc | square; } >/dev/fd/0 37 | -------------------------------------------------------------------------------- /ocaml/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Michael Greenberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/pash_tests/raw-r-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | batchSize=10000000 14 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | mkfifo $file7 30 | mkfifo $file8 31 | 32 | $PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & 33 | 34 | $PASH_TOP/runtime/eager.sh $file1 $file5 "/tmp/pash_eager_intermediate_#file1" & 35 | $PASH_TOP/runtime/eager.sh $file2 $file6 "/tmp/pash_eager_intermediate_#file2" & 36 | 37 | sort < $file5 > $file7 & 38 | sort < $file6 > $file8 & 39 | 40 | sort -m $file7 $file8 41 | 42 | # cat $testFile | sort > $file8 43 | # if cmp -s "$file7" "$file8"; then 44 | # printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" 45 | # else 46 | # printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" 47 | # fi 48 | 49 | rm -rf *out -------------------------------------------------------------------------------- /test/pash_tests/worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # trap ctrl-c and call ctrl_c() 4 | trap cleanup INT 5 | 6 | export PASH_TOP=${PASH_TOP:-${BASH_SOURCE%/*}} 7 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" 8 | # point to the local downloaded folders 9 | export PYTHONPATH=${PASH_TOP}/python_pkgs/ 10 | export PASH_TIMESTAMP="$(date +"%y-%m-%d-%T")" 11 | 12 | # add hdfs directory if hdfs command exist 13 | if command -v "hdfs" &> /dev/null 14 | then 15 | datanode_dir=$(hdfs getconf -confKey dfs.datanode.data.dir) 16 | export HDFS_DATANODE_DIR=${datanode_dir#"file://"} # removes file:// prefix 17 | fi 18 | 19 | source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" --distributed_exec 20 | 21 | export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/" 22 | 23 | cleanup() { 24 | kill "$FILEREADER_PID" "$DISCOVERY_PID" 25 | wait "$FILEREADER_PID" "$DISCOVERY_PID" 2>/dev/null 26 | rm -rf "$PASH_TMP_PREFIX" 27 | } 28 | 29 | "$PASH_TOP/runtime/dspash/file_reader/filereader_server" & 30 | FILEREADER_PID=$! 31 | "$PASH_TOP/runtime/dspash/file_reader/discovery_server" & 32 | DISCOVERY_PID=$! 33 | python3 "$PASH_TOP/compiler/dspash/worker.py" "$@" 34 | -------------------------------------------------------------------------------- /test/tests/grade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | score=0 4 | total=0 5 | 6 | if [ -d output ]; then 7 | echo "output directory already exists, aborting" 8 | exit 1 9 | fi 10 | 11 | mkdir output 12 | 13 | echo "LEXER/PARSER AUTOGRADER RESULTS" 14 | echo 15 | 16 | # check success cases 17 | for i in right/*.lc; do 18 | file=$(basename $i) 19 | output=$(mktemp output/$file.XXXX) 20 | 21 | echo -n "$file: " 22 | 23 | ./Main $i >$output 2>&1 24 | if [ $? -eq 0 ] 25 | then 26 | let score+=1 27 | echo "1/1" 28 | else 29 | echo "0/1" 30 | fi 31 | 32 | let total+=1 33 | done 34 | 35 | # check failure cases 36 | for i in wrong/*.lc; do 37 | file=$(basename $i) 38 | output=$(mktemp output/$file.XXXX) 39 | 40 | echo -n "$file: " 41 | 42 | ./Main $i >$output 2>&1 43 | if [ $? -eq 1 ] 44 | then 45 | let score+=1 46 | echo "1/1" 47 | else 48 | echo "0/1" 49 | fi 50 | 51 | let total+=1 52 | done 53 | 54 | echo 55 | echo "TOTAL: $score / $total" 56 | echo 57 | echo "PROBLEM 1: XXX / 5" 58 | echo 59 | let total=total+5 60 | echo "FINAL GRADE: $score + XXX / $total" 61 | -------------------------------------------------------------------------------- /test/pash_tests/grade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | score=0 4 | total=0 5 | 6 | if [ -d output ]; then 7 | echo "output directory already exists, aborting" 8 | exit 1 9 | fi 10 | 11 | mkdir output 12 | 13 | echo "LEXER/PARSER AUTOGRADER RESULTS" 14 | echo 15 | 16 | # check success cases 17 | for i in right/*.lc; do 18 | file=$(basename $i) 19 | output=$(mktemp output/$file.XXXX) 20 | 21 | echo -n "$file: " 22 | 23 | ./Main $i >$output 2>&1 24 | if [ $? -eq 0 ] 25 | then 26 | let score+=1 27 | echo "1/1" 28 | else 29 | echo "0/1" 30 | fi 31 | 32 | let total+=1 33 | done 34 | 35 | # check failure cases 36 | for i in wrong/*.lc; do 37 | file=$(basename $i) 38 | output=$(mktemp output/$file.XXXX) 39 | 40 | echo -n "$file: " 41 | 42 | ./Main $i >$output 2>&1 43 | if [ $? -eq 1 ] 44 | then 45 | let score+=1 46 | echo "1/1" 47 | else 48 | echo "0/1" 49 | fi 50 | 51 | let total+=1 52 | done 53 | 54 | echo 55 | echo "TOTAL: $score / $total" 56 | echo 57 | echo "PROBLEM 1: XXX / 5" 58 | echo 59 | let total=total+5 60 | echo "FINAL GRADE: $score + XXX / $total" 61 | -------------------------------------------------------------------------------- /test/failing/with-ec2.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Runs a command on the host within the lifespan of an EC2 instance. 4 | # Uses AWS CLI v2 5 | 6 | get-instance-ip() { 7 | aws ec2 describe-instances \ 8 | --instance-ids "$1" \ 9 | --query 'Reservations[0].Instances[0].PublicIpAddress' \ 10 | --output text 11 | } 12 | 13 | # This is necessary because a public IP might not be 14 | # available immediately after the instance starts. 15 | wait-for-instance-ip() { 16 | local variant="$(get-instance-ip "$1")"; 17 | 18 | if [[ "$variant" =~ [0-9] ]]; then 19 | echo "$variant"; 20 | else 21 | sleep 3; 22 | wait-for-instance-ip "$1" 23 | fi 24 | } 25 | 26 | call-with-active-ec2() { 27 | set -e 28 | local instance_id="$1" 29 | aws ec2 start-instances --instance-ids "$instance_id" 30 | local ip=$(wait-for-instance-ip "$instance_id"); 31 | echo "$ip" 32 | trap "aws ec2 stop-instances --instance-ids $instance_id" EXIT 33 | ${@:2} "$ip" 34 | } 35 | 36 | # First expression detects if script is being sourced. 37 | # https://stackoverflow.com/a/28776166 38 | (return 0 2>/dev/null) || call-with-active-ec2 "$@" 39 | -------------------------------------------------------------------------------- /test/pash_tests/generate_single_chrom.sh: -------------------------------------------------------------------------------- 1 | # Here are sample steps to generate a single paired read from hg19: 2 | # https://www.biostars.org/p/150010/ 3 | INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} 4 | OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} 5 | cd ${INPUT} 6 | # filter out a single chromosome and index it, e.g. 7 | samtools faidx ${INPUT}/human_g1k_v37.fasta 20 > ${OUTPUT}/human_g1k_v37_chr20.fasta 8 | bowtie2-build ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/homo_chr20 9 | #simulate a single read sample, e.g. here is for a single (-N 1) paired read: 10 | ${INPUT}/wgsim/wgsim -N 1 ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/single.read1.fq ${OUTPUT}/single.read2.fq > ${OUTPUT}/wgsim.out 11 | #generate the sam, e.g. 12 | bowtie2 -x ${OUTPUT}/homo_chr20 -1 ${OUTPUT}/single.read1.fq -2 ${OUTPUT}/single.read2.fq -S ${OUTPUT}/single_pair.sam 13 | #generate a bam 14 | samtools view -b -S -o ${OUTPUT}/single_pair.bam ${OUTPUT}/single_pair.sam 15 | #sort and index it 16 | samtools sort ${OUTPUT}/single_pair.bam -o ${OUTPUT}/single_pair.sorted.bam 17 | # this seems to not affect the file, but in other cases, its indeed needed 18 | samtools index ${OUTPUT}/single_pair.sorted.bam 19 | 20 | -------------------------------------------------------------------------------- /python/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Michael Greenberg, Konstantinos Kallas, and Thurston Dang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/pash_tests/pash_runtime_shell_to_pash.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | ## This currently performs (5), i.e., reverting bash state to get back to pash mode. 5 | ## 6 | 7 | ## TODO: Use that for (1) too 8 | 9 | output_vars_file=${1?Output var file not given} 10 | output_set_file=${2?Output set file not given} 11 | 12 | pash_exec_status=${internal_exec_status} 13 | pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_exec_status" 14 | 15 | ## Save the current set options to a file so that they can be recovered 16 | pash_final_set_vars=$- 17 | pash_redir_output echo "$$: (5) Writing current BaSh set state to: $output_set_file" 18 | pash_redir_output echo "$$: (5) Current BaSh shell: $-" 19 | echo "$pash_final_set_vars" > "$output_set_file" 20 | 21 | ## Revert to the old set state to avoid spurious fails 22 | source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$pash_current_set_state" 23 | pash_redir_output echo "$$: (5) Reverted to PaSh set state to: $-" 24 | 25 | 26 | ## Save the current variables 27 | source "$RUNTIME_DIR/pash_declare_vars.sh" "$output_vars_file" 28 | # pash_redir_output echo "$$: (5) Exiting from BaSh with BaSh status: $pash_exec_status" 29 | # (exit "$pash_exec_status") 30 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (data_only_dirs src) 2 | 3 | (rule 4 | (deps (source_tree src) configure.ac Makefile.am) 5 | (targets libdash.a dlldash.so 6 | builtins.h nodes.h syntax.h token.h token_vars.h 7 | ) 8 | (action 9 | (setenv CC "%{cc}" 10 | (bash 11 | "\ 12 | \n set -e\ 13 | \n if [ \"$(uname -s)\" = \"Darwin\" ]; then glibtoolize; else libtoolize; fi\ 14 | \n aclocal && autoheader && automake --add-missing && autoconf\ 15 | \n ./configure --prefix=\"$(pwd)\"\ 16 | \n %{make}\ 17 | \n %{make} install\ 18 | \n cp lib/libdash.a libdash.a\ 19 | \n cp lib/dlldash.so dlldash.so\ 20 | \n cp src/{builtins,nodes,syntax,token,token_vars}.h .\ 21 | \n")))) 22 | 23 | (subdir src 24 | (rule 25 | (deps ../builtins.h ../nodes.h ../syntax.h ../token.h ../token_vars.h) 26 | (targets builtins.h nodes.h syntax.h token.h token_vars.h) 27 | (action 28 | (progn 29 | (copy ../builtins.h builtins.h) 30 | (copy ../nodes.h nodes.h) 31 | (copy ../syntax.h syntax.h) 32 | (copy ../token.h token.h) 33 | (copy ../token_vars.h token_vars.h))))) 34 | 35 | (library 36 | (name dash) 37 | (foreign_archives dash)) 38 | -------------------------------------------------------------------------------- /test/pash_tests/dgsh-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | batchSize=10000000 14 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | mkfifo $file7 30 | mkfifo $file8 31 | 32 | $PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & 33 | 34 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 64K & 35 | $PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 64K & 36 | 37 | $PASH_TOP/runtime/r_unwrap < $file3 > $file5 & 38 | $PASH_TOP/runtime/r_unwrap < $file4 > $file6 & 39 | 40 | sort < $file5 > $file7 & 41 | sort < $file6 > $file8 & 42 | 43 | sort -m $file7 $file8 44 | 45 | # cat $testFile | sort > $file8 46 | # if cmp -s "$file7" "$file8"; then 47 | # printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" 48 | # else 49 | # printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" 50 | # fi 51 | 52 | rm -rf *out -------------------------------------------------------------------------------- /test/pash_tests/r-bell_grep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | file9=9.out 11 | rm -f *.out 12 | 13 | testFile="$PASH_TOP/evaluation/scripts/input/100M.txt" 14 | batchSize=10000000 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | 30 | # mkfifo $file7 31 | # mkfifo $file8 32 | # mkfifo $file9 33 | 34 | 35 | $PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & 36 | 37 | $PASH_TOP/runtime/r_wrap grep 'Bell' < $file1 > $file3 & 38 | $PASH_TOP/runtime/r_wrap grep 'Bell' < $file2 > $file4 & 39 | # ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & 40 | 41 | $PASH_TOP/runtime/r_merge $file3 $file4 42 | 43 | # cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 44 | # if cmp -s "$file6" "$file5"; then 45 | # printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" 46 | # else 47 | # printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" 48 | # fi 49 | 50 | rm -rf *out 51 | -------------------------------------------------------------------------------- /test/pash_tests/pkg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Package several versions of PaSh: 4 | # * a shallow-clone version for a quick-install from `up` 5 | # * a deep-clone version for other environments TODO 6 | # * a docker image running on ubuntu 18.04 TODO 7 | 8 | set -ex 9 | 10 | echo $(pwd) 11 | REV=0 12 | 13 | REV=$(git rev-parse --short HEAD) 14 | cd ../../ 15 | 16 | # # Shallow clone --- might not be ideal for development 17 | # git clone --depth 1 git@github.com:andromeda/pash.git 18 | # mv pash pash-shallow 19 | # tar -cvzf pash-shallow.tar.gz pash-shallow/ > /dev/null 20 | # # uncomment the following line to keep all versions 21 | # # mv pash.tar.gz get/pash-${REV}.tar.gz 22 | # # ln -sf ./pash-${REV}.tar.gz get/latest 23 | # mv pash-shallow.tar.gz get/ 24 | # ln -sf ./pash-shallow.tar.gz get/latest 25 | # rm -rf pash-shallow 26 | 27 | cd pash 28 | git pull 29 | cd .. 30 | tar -cvzf pash.tar.gz ./pash > /dev/null 31 | mv pash.tar.gz get/ 32 | ln -sf ./pash.tar.gz get/latest 33 | 34 | # in the future, we might want to have versions 35 | # ln -s pash.tar.gz latest 36 | 37 | 38 | # TODO: for a clear release, remove all versioning artifacts 39 | # cp -r pash release 40 | # cd release 41 | # rm -rf .gitignore .gitsubmodules .git 42 | # cd .. 43 | 44 | -------------------------------------------------------------------------------- /test/pash_tests/r-minimal_grep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | file9=9.out 11 | rm -f *.out 12 | 13 | testFile="$PASH_TOP/evaluation/scripts/input/10M.txt" 14 | batchSize=1000000 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | 30 | $PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & 31 | 32 | $PASH_TOP/runtime/r_wrap tr A-Z a-z < $file1 > $file3 & 33 | $PASH_TOP/runtime/r_wrap tr A-Z a-z < $file2 > $file4 & 34 | 35 | $PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file5 & 36 | $PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file4 > $file6 & 37 | 38 | $PASH_TOP/runtime/r_merge $file5 $file6 39 | # cat $testFile | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > t2.out 40 | # if cmp -s t1.out t2.out; then 41 | # printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" 42 | # else 43 | # printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" 44 | # fi 45 | 46 | rm -rf *out -------------------------------------------------------------------------------- /test/pash_tests/r-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | 11 | rm -f *.out 12 | 13 | testFile=../../evaluation/scripts/input/100M.txt 14 | batchSize=10000000 15 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 16 | if [ "$#" -gt "0" ] 17 | then 18 | testFile=$1 19 | fi 20 | if [ "$#" -gt "1" ]; then 21 | batchSize=$2 22 | fi 23 | 24 | mkfifo $file1 25 | mkfifo $file2 26 | mkfifo $file3 27 | mkfifo $file4 28 | mkfifo $file5 29 | mkfifo $file6 30 | mkfifo $file7 31 | mkfifo $file8 32 | 33 | $PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & 34 | 35 | $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & 36 | $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & 37 | 38 | $PASH_TOP/runtime/eager.sh $file3 $file5 "/tmp/pash_eager_intermediate_#file1" & 39 | $PASH_TOP/runtime/eager.sh $file4 $file6 "/tmp/pash_eager_intermediate_#file2" & 40 | 41 | sort < $file5 > $file7 & 42 | sort < $file6 > $file8 & 43 | 44 | sort -m $file7 $file8 45 | 46 | # cat $testFile | sort > $file8 47 | # if cmp -s "$file7" "$file8"; then 48 | # printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" 49 | # else 50 | # printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" 51 | # fi 52 | 53 | rm -rf *out -------------------------------------------------------------------------------- /test/tests/run_lda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONIOENCODING=utf8 4 | 5 | if test $# -ne 0; 6 | then 7 | KS="$*"; 8 | else 9 | KS="50 75 100 125 150 175 200" 10 | fi 11 | 12 | DIR=`date "+%Y-%m-%d_%H:%M"` 13 | START=`date "+%Y-%m-%d %H:%M"` 14 | 15 | # TODO error handling 16 | 17 | echo "SETTING UP" 18 | mkdir ${DIR} 19 | 20 | echo "PARSING" 21 | python parse.py 22 | 23 | for dat in abstracts.dat vocab.dat docs.dat; do 24 | mv ${dat} ${DIR} 25 | done 26 | 27 | # we don't want to lose this one! 28 | cp stopwords.dat ${DIR} 29 | 30 | echo "RUNNING LDA" 31 | 32 | ABS=${DIR}/abstracts.dat 33 | 34 | for k in ${KS}; do 35 | lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & 36 | echo lda${k} >>${DIR}/.gitignore 37 | done 38 | 39 | wait 40 | echo "PROCESSING TOPICS" 41 | 42 | for k in ${KS}; do 43 | python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt 44 | done 45 | 46 | echo "GENERATING CSV" 47 | 48 | for i in ${DIR}/lda*; do 49 | test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv 50 | test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv 51 | done 52 | 53 | echo "MOVING TO OUTPUT DIRECTORY" 54 | mv ${DIR} ../out 55 | 56 | echo "DONE" 57 | echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. 58 | -------------------------------------------------------------------------------- /test/failing/statistics.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This classification is wrt to their operation, not its input---i.e., whether 4 | # input contains the use of fs identifiers (identifiers can be fs or not fs) 5 | 6 | # We need to think about how to translate DFS commands 7 | # What is a distributed fs? Directories are simply keys? 8 | 9 | # everything else (i.e., side-effectful) just needs to be converted to location independent commands 10 | 11 | p="../c_stats/" 12 | A=${1:-${p}posix.txt} 13 | B=${2:-${p}coreutils.txt} 14 | 15 | # Take commands that are shared and use existing distributability descriptions 16 | comm -12 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | 17 | sed s/^/\^/ | 18 | xargs -n 1 -I {} grep -w {} ./coreutils.txt | 19 | sort -b -k2,2 -k1,1 # > posix_mandatory1.txt # commenting out this redirection will overwrite! 20 | 21 | # Analyze mandatory commands not in the second, and not built-ins 22 | comm -23 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | 23 | comm -23 - <(cat ../c_stats/builtins.txt | sed 's/ */ /g' | cut -d ' ' -f 1 | sort) | 24 | sed s/^/\^/ | 25 | xargs -n 1 -I {} grep -w {} $A | 26 | sed s/Mandatory// | 27 | sort -b -k2,2 -k1,1 # > posix_mandatory2.txt # commenting out this redirection will overwrite! 28 | -------------------------------------------------------------------------------- /test/pash_tests/run_lda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONIOENCODING=utf8 4 | 5 | if test $# -ne 0; 6 | then 7 | KS="$*"; 8 | else 9 | KS="50 75 100 125 150 175 200" 10 | fi 11 | 12 | DIR=`date "+%Y-%m-%d_%H:%M"` 13 | START=`date "+%Y-%m-%d %H:%M"` 14 | 15 | # TODO error handling 16 | 17 | echo "SETTING UP" 18 | mkdir ${DIR} 19 | 20 | echo "PARSING" 21 | python parse.py 22 | 23 | for dat in abstracts.dat vocab.dat docs.dat; do 24 | mv ${dat} ${DIR} 25 | done 26 | 27 | # we don't want to lose this one! 28 | cp stopwords.dat ${DIR} 29 | 30 | echo "RUNNING LDA" 31 | 32 | ABS=${DIR}/abstracts.dat 33 | 34 | for k in ${KS}; do 35 | lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & 36 | echo lda${k} >>${DIR}/.gitignore 37 | done 38 | 39 | wait 40 | echo "PROCESSING TOPICS" 41 | 42 | for k in ${KS}; do 43 | python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt 44 | done 45 | 46 | echo "GENERATING CSV" 47 | 48 | for i in ${DIR}/lda*; do 49 | test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv 50 | test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv 51 | done 52 | 53 | echo "MOVING TO OUTPUT DIRECTORY" 54 | mv ${DIR} ../out 55 | 56 | echo "DONE" 57 | echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. 58 | -------------------------------------------------------------------------------- /test/failing/bio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # https://www.biostars.org/p/43677/ 4 | # https://github.com/h3abionet/h3agatk 5 | # https://docs.google.com/document/d/1siCZrequI4plggz3ho351NnX57CoyCJl9GWp3azlxfU/edit# 6 | bwa mem -M -p -t [num_threads] \ 7 | -R "@RG\tID:1\tPL:ILLUMINA\tPU:pu\tLB:group1\tSM:SAMPLEID" \ 8 | [reference_fasta] \ 9 | [input_fastq] > [output] 10 | 11 | bwa mem genome.fa reads.fastq | samtools sort -o output.bam - 12 | 13 | # https://www.biostars.org/p/43677/ 14 | bwa aln -t 4 ./hg19.fasta ./s1_1.fastq > ./s1_1.sai 15 | bwa aln -t 4 ./hg19.fasta ./s1_2.fastq > ./s1_2.sai 16 | bwa sampe ./hg19.fasta ./s1_1.sai ./s1_2.sai ./s1_1.fastq ./s1_2.fastq | 17 | samtools view -Shu - | 18 | samtools sort - - | 19 | samtools rmdup -s - - | 20 | tee s1_sorted_nodup.bam | 21 | bamToBed > s1_sorted_nodup.bed 22 | 23 | # 4 cores, -M is for Picard compatibility 24 | bwa mem -M -t 4 ./hg19.fasta ./s1_1.fastq ./s1_2.fastq > s1.sam 25 | 26 | samtools merge - *.bam | 27 | # tee merged.bam | 28 | samtools rmdup - - | 29 | # tee rmdup.bam | 30 | samtools mpileup - uf ./hg19.fasta - | 31 | bcftools view -bvcg - | gzip > var.raw.bcf.gz 32 | 33 | bwa sampe ./hg19.fasta <(bwa aln -t 4 ./hg19.fasta ./s1_1.fastq) <(bwa aln -t 4 ./hg19.fasta ./s1_2.fastq) ./s1_1.fastq ./s1_2.fastq | samtools view -Shb /dev/stdin > s1.bam 34 | -------------------------------------------------------------------------------- /test/pash_tests/pacaur.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/packages} 3 | OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/packages} 4 | LOGS=${OUT}/logs 5 | mkdir -p ${OUT} ${LOGS} 6 | 7 | info() { echo -e "\e[1m--> $@\e[0m"; } 8 | mkcd() { mkdir -p "$1" && cd "$1"; } 9 | 10 | # check if not running as root 11 | # test "$UID" -gt 0 || { info "don't run this as root!"; exit; } 12 | 13 | # set link to plaintext PKGBUILDs 14 | pkgbuild="https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h" 15 | 16 | run_tests() { 17 | pgk=$1 18 | info "create subdirectory for $pkg" 19 | mkcd "${OUT}/$pkg" 20 | 21 | info "fetch PKGBUILD for $pkg" 22 | curl --insecure -o PKGBUILD "$pkgbuild=$pkg" 2> /dev/null|| echo ' ' 23 | 24 | #info "fetch required pgp keys from PKGBUILD" 25 | #gpg --recv-keys $(sed -n "s:^validpgpkeys=('\([0-9A-Fa-fx]\+\)').*$:\1:p" PKGBUILD) 26 | info "make and install ..." 27 | timeout 100 makedeb-makepkg --format-makedeb -d 2>/dev/null|| echo 'failed' 28 | cd - 29 | } 30 | 31 | export -f run_tests 32 | pkg_count=0 33 | # loop over required packages 34 | for pkg in $(cat ${IN} | tr '\n' ' ' ); 35 | do 36 | pkg_count=$((pkg_count + 1)) 37 | run_tests $pkg > "${LOGS}"/"$pkg_count.log" 38 | done 39 | 40 | echo 'done'; 41 | -------------------------------------------------------------------------------- /test/pash_tests/bell_grep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | file9=9.out 11 | rm -f *.out 12 | 13 | testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" 14 | batchSize=10000000 15 | if [ "$#" -gt "0" ] 16 | then 17 | testFile=$1 18 | fi 19 | if [ "$#" -gt "1" ]; then 20 | batchSize=$2 21 | fi 22 | 23 | mkfifo $file1 24 | mkfifo $file2 25 | mkfifo $file3 26 | mkfifo $file4 27 | mkfifo $file5 28 | mkfifo $file6 29 | 30 | # mkfifo $file7 31 | # mkfifo $file8 32 | # mkfifo $file9 33 | 34 | 35 | $PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & 36 | 37 | 38 | $PASH_TOP/runtime/dgsh-tee -I -i $file1 -o $file5 -b 10M & 39 | $PASH_TOP/runtime/dgsh-tee -I -i $file2 -o $file6 -b 10M & 40 | 41 | $PASH_TOP/runtime/r_wrap grep 'Bell' < $file5 > $file3 & 42 | $PASH_TOP/runtime/r_wrap grep 'Bell' < $file6 > $file4 & 43 | # ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & 44 | 45 | $PASH_TOP/runtime/r_merge $file3 $file4 46 | 47 | # cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 48 | # if cmp -s "$file6" "$file5"; then 49 | # printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" 50 | # else 51 | # printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" 52 | # fi 53 | 54 | rm -rf *out 55 | -------------------------------------------------------------------------------- /test/pash_tests/head_deadlock_fixed3.sh: -------------------------------------------------------------------------------- 1 | mkfifo s1 s2 2 | 3 | ## This way of fixing the problem suffers from some issues. 4 | ## 5 | ## - First of all, gathering the children after the end of the graph 6 | ## seems to gather more than just the alive nodes. This could lead 7 | ## to killing some random pid in the system. This could potentially 8 | ## be solved by gathering all pids incrementally. 9 | ## 10 | ## - In addition, this way of getting the last pid does not work if 11 | ## there is more than one output. (This is never the case in our 12 | ## tests, but could be. 13 | ## 14 | ## - Finally, it is not local, since all of the monitoring happens 15 | ## globally. Ideally, it should be done by a wrapper in each - 16 | ## node. The wrapper should monitor if the node dies, and if so it - 17 | ## should send SIGPIPE to all its producers. 18 | 19 | cat ../evaluation/scripts/input/1M.txt > s1 & 20 | echo "Current node: $!" 21 | cat ../evaluation/scripts/input/1M.txt > s2 & 22 | echo "Current node: $!" 23 | cat s1 s2 | head -n 1 & 24 | 25 | last=$! 26 | 27 | echo "Children pids" 28 | ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' 29 | 30 | echo "Alternative children pids" 31 | jobs -l | awk '{print $1}' 32 | 33 | wait $last 34 | 35 | echo "Last pid: $last" 36 | 37 | ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' | xargs -n 1 kill -SIGPIPE 38 | 39 | rm s1 s2 40 | -------------------------------------------------------------------------------- /test/pash_tests/nginx.sh: -------------------------------------------------------------------------------- 1 | ############################### 2 | ### awk not working on pash ### 3 | ############################### 4 | # sort by reponse codes 5 | #pash 36 sec, bash 7 sec 6 | INPUT=${PASH_TOP}/evaluation/scripts/input/access.log 7 | cat ${INPUT} | cut -d "\"" -f3 | cut -d ' ' -f2 | sort | uniq -c | sort -rn > /dev/null 8 | # awk alternative, too slow 9 | awk '{print $9}' ${INPUT} | sort | uniq -c | sort -rn > /dev/null 10 | # find broken links broken links 11 | awk '($9 ~ /404/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -rn > /dev/null 12 | # for 502 (bad-gateway) we can run following command: 13 | awk '($9 ~ /502/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -r > /dev/null 14 | # Who are requesting broken links (or URLs resulting in 502) 15 | awk -F\" '($2 ~ "/wp-admin/install.php"){print $1}' ${INPUT} | awk '{print $1}' | sort | uniq -c | sort -r > /dev/null 16 | # 404 for php files -mostly hacking attempts 17 | awk '($9 ~ /404/)' ${INPUT} | awk -F\" '($2 ~ "^GET .*\.php")' | awk '{print $7}' | sort | uniq -c | sort -r | head -n 20 > /dev/null 18 | ############################## 19 | # Most requested URLs ######## 20 | awk -F\" '{print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null 21 | # Most requested URLs containing XYZ 22 | awk -F\" '($2 ~ "ref"){print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null 23 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # compiled code anywhere 2 | **/*.native 3 | **/*.byte 4 | **/*.o 5 | **/*.lo 6 | **/*.cmx 7 | **/*.cmo 8 | **/*.cmi 9 | **/*.a 10 | **/*.cmxa 11 | **/*.dSYM/ 12 | **/_build 13 | 14 | # system crap 15 | Dockerfile 16 | Dockerfile.test 17 | Dockerfile.web 18 | .git 19 | .gitmodules 20 | .dockerignore 21 | **/.gitignore 22 | **/*~ 23 | **/.#* 24 | **/.DS_Store 25 | **/.DS_Store? 26 | **/._* 27 | **/.Spotlight* 28 | **/.Trash* 29 | **/*[Tt]humbs.db 30 | **/*~ 31 | **/*.bak 32 | **/*.orig 33 | **/*.rej 34 | 35 | ################################################################################ 36 | # libdash ignores 37 | 38 | libtool 39 | # geneated by libtool 40 | ltmain.sh 41 | 42 | # generated by autogen.sh 43 | Makefile.in 44 | aclocal.m4 45 | autom4te.cache/ 46 | compile 47 | config.h.in 48 | configure 49 | depcomp 50 | install-sh 51 | missing 52 | 53 | # generated by configure 54 | Makefile 55 | config.cache 56 | config.h 57 | config.log 58 | config.status 59 | src/.deps/ 60 | stamp-h1 61 | 62 | # generated by make 63 | src/token_vars.h 64 | 65 | # generated files 66 | ar-lib 67 | config.* 68 | src/libdash.a 69 | 70 | src/builtins.[ch] 71 | src/builtins.def 72 | src/dash 73 | src/init.c 74 | src/mkinit 75 | src/mknodes 76 | src/mksignames 77 | src/mksyntax 78 | src/nodes.[ch] 79 | src/signames.c 80 | src/syntax.[ch] 81 | src/token.h 82 | 83 | src/.libs 84 | src/.deps 85 | 86 | -------------------------------------------------------------------------------- /test/round_trip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "Usage: ${0##*/} program target" 5 | exit 2 6 | fi 7 | 8 | p=$1 9 | tgt=$2 10 | 11 | two_roundtrips() { 12 | [ "$(head -n1 "$tgt")" != '# TEST: single roundtrip' ] 13 | } 14 | 15 | orig=$(mktemp) 16 | 17 | "$p" "$tgt" >"$orig" 18 | if [ "$?" -ne 0 ] 19 | then 20 | echo "RT_ABORT_1: '$tgt' -> '$orig'" 21 | exit 3 22 | fi 23 | 24 | rt=$(mktemp) 25 | 26 | "$p" "$orig" >"$rt" 27 | if [ "$?" -ne 0 ] 28 | then 29 | echo "RT_ABORT_2: '$tgt' -> '$orig' -> '$rt'" 30 | exit 4 31 | fi 32 | 33 | if diff -b "$orig" "$rt" >/dev/null 34 | then 35 | echo "PASS '$tgt'" 36 | exit 0 37 | else 38 | if two_roundtrips 39 | then 40 | # try one more time around the loop 41 | rtrt=$(mktemp) 42 | 43 | "$p" "$rt" >"$rtrt" 44 | if [ "$?" -ne 0 ] 45 | then 46 | echo "RT_ABORT_3: '$tgt' -> '$orig' -> '$rt' -> '$rtrt'" 47 | exit 5 48 | fi 49 | 50 | if diff -b "$rt" "$rtrt" >/dev/null 51 | then 52 | echo "PASS '$tgt' (two runs to fixpoint)" 53 | exit 0 54 | fi 55 | fi 56 | 57 | echo "FAIL: '$tgt' first time" 58 | diff -ub "$orig" "$rt" 59 | if two_roundtrips 60 | then 61 | echo ">>> '$tgt' second time" 62 | diff -ub "$rt" "$rtrt" 63 | fi 64 | exit 1 65 | fi 66 | -------------------------------------------------------------------------------- /ocaml/shell_to_json.ml: -------------------------------------------------------------------------------- 1 | (* This is straight-up copied from the libdash tests *) 2 | 3 | open Libdash 4 | 5 | let verbose = ref false 6 | let input_src : string option ref = ref None 7 | 8 | let set_input_src () = 9 | match !input_src with 10 | | None -> Dash.setinputtostdin () 11 | | Some f -> Dash.setinputfile f 12 | 13 | let parse_args () = 14 | Arg.parse 15 | [("-v",Arg.Set verbose,"verbose mode")] 16 | (function | "-" -> input_src := None | f -> input_src := Some f) 17 | "Final argument should be either a filename or - (for STDIN); only the last such argument is used" 18 | 19 | exception Parse_error 20 | 21 | let rec parse_all () : Ast.t list = 22 | let stackmark = Dash.init_stack () in 23 | match Dash.parse_next ~interactive:false () with 24 | | Dash.Done -> Dash.pop_stack stackmark; [] 25 | | Dash.Error -> Dash.pop_stack stackmark; raise Parse_error 26 | | Dash.Null -> Dash.pop_stack stackmark; parse_all () 27 | | Dash.Parsed n -> 28 | (* translate to our AST *) 29 | let c = Ast.of_node n in 30 | (* deallocate *) 31 | Dash.pop_stack stackmark; 32 | (* keep calm and carry on *) 33 | c::parse_all () 34 | 35 | let print_ast c = print_endline (Ast_json.string_of_t c) 36 | 37 | let main () = 38 | Dash.initialize (); 39 | parse_args (); 40 | set_input_src (); 41 | let cs = parse_all () in 42 | List.map print_ast cs 43 | ;; 44 | 45 | main () 46 | -------------------------------------------------------------------------------- /test/tests/syntax: -------------------------------------------------------------------------------- 1 | ls * ${x:-$(foo)} ${#foo}seven $x "foo\"${x}"${y} $((x + ${x})) `ls 1` $(bq1)x$(bq2) >foo 2>&1 </dev/null 5 | mv /tmp/foo /tmp/bar & 6 | foo | bar | baz | quux 7 | foo | bar | baz | quux & 8 | if /bin/true; then always; else never; fi 9 | if [ -x some_file ]; then maybe; elif [ -d some_dir ]; then otherwise; fi 10 | if something; then we are looking for the one-armed bandit; fi 11 | while [ ! -x some_file ]; do try to make some_file; done 12 | until [ -x some_file ]; do seriously make that file; done 13 | while { ! a && ! b ; } ; do certainly not c; done 14 | for x in a b c d; do something to those letters; done 15 | for x; do something to those arguments implicitly; done 16 | case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh ;; esac 17 | case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh;; esac 18 | function foo { echo $*; export x=$((x + 1)) } 19 | function foo { echo $*; export x=$((x + 1)) } 20 | foo() { echo $*; export x=$((x + 1)) ; } 21 | this < $file3 & 42 | $PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file6 > $file4 & 43 | # ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & 44 | 45 | $PASH_TOP/runtime/r_merge $file3 $file4 46 | 47 | # cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 48 | # if cmp -s "$file6" "$file5"; then 49 | # printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" 50 | # else 51 | # printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" 52 | # fi 53 | 54 | rm -rf *out 55 | -------------------------------------------------------------------------------- /test/pash_tests/test1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file1=1.out 3 | file2=2.out 4 | file3=3.out 5 | file4=4.out 6 | file5=5.out 7 | file6=6.out 8 | file7=7.out 9 | file8=8.out 10 | file9=9.out 11 | rm -f *.out 12 | testFile=../../evaluation/scripts/input/10M.txt 13 | batchSize=100000 14 | mkfifo $file1 15 | mkfifo $file2 16 | mkfifo $file3 17 | mkfifo $file4 18 | # mkfifo $file5 19 | 20 | mkfifo $file7 21 | mkfifo $file8 22 | mkfifo $file9 23 | 24 | # mkfifo $file6 25 | # cat $testFile > $file9 & 26 | # ../auto-split.sh $file9 $file1 $file2 & 27 | # grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file4 & 28 | # grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file6 & 29 | # ../eager.sh $file1 $file3 temp & 30 | # ../eager.sh $file6 $file7 temp2 & 31 | # cat $file4 $file7 > $file5 32 | 33 | ../r_split $testFile $batchSize $file1 $file2 $file7 & 34 | 35 | ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file1 > $file3 & 36 | ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file4 & 37 | ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & 38 | 39 | ../r_merge $file3 $file4 $file8> $file5 40 | 41 | # cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 42 | # if cmp -s "$file6" "$file5"; then 43 | # printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" 44 | # else 45 | # printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" 46 | # fi 47 | 48 | # rm -rf *out -------------------------------------------------------------------------------- /test/pash_tests/wait_for_output_and_sigpipe_rest.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## TODO: Give it the output pid as an argument 4 | wait "$@" 5 | 6 | ## TODO: This only works if there is only a single output node 7 | ## (and a single node given as an argument to `wait`). 8 | export internal_exec_status=$? 9 | 10 | # It is assumed that $distro is set when this is called. 11 | 12 | # Note: We need the || true after the grep so that it doesn't exit with error if it finds nothing. 13 | 14 | 15 | # This value may contains multiple pids as a whitespace-separated string, and 16 | # we must split it as multiple pids by shell's field splitting. 17 | # shellcheck disable=SC2086 18 | (> /dev/null 2>&1 kill -SIGPIPE $pids_to_kill || true) 19 | 20 | ## 21 | ## Old way of waiting, very inefficient. 22 | ## 23 | 24 | # now do different things depending on distro 25 | 26 | ## TODO: Delete this since it is very costly 27 | # case "$distro" in 28 | # freebsd*) 29 | # # not sure at all about this one 30 | # pids_to_kill="$(ps -efl $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" 31 | # ;; 32 | # *) 33 | # pids_to_kill="$(ps --ppid $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" 34 | # ;; 35 | # esac 36 | # pids_to_kill="" 37 | 38 | ## TODO: Maybe send a signal to all pids at once 39 | # for pid in $pids_to_kill 40 | # do 41 | # # wait $pid 42 | # (> /dev/null 2>&1 kill -SIGPIPE $pid || true) 43 | # done 44 | -------------------------------------------------------------------------------- /test/test_ocaml_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | : ${RT_OCAML=../ocaml/rt.sh} 4 | : ${RT_PYTHON=../python/rt.py} 5 | 6 | if [ $# -ne 1 ] 7 | then 8 | echo "Usage: $0 testFile" 9 | echo 10 | exit 1 11 | fi 12 | 13 | testFile="$1" 14 | 15 | if [ ! -f "$testFile" ] 16 | then 17 | echo "Error: cannot read '$testFile'!" 18 | echo 19 | exit 1 20 | fi 21 | 22 | ocaml_rt=$(mktemp) 23 | ocaml_err=$(mktemp) 24 | python_rt=$(mktemp) 25 | python_err=$(mktemp) 26 | 27 | "$RT_OCAML" "$testFile" >"$ocaml_rt" 2>"$ocaml_err" 28 | ocaml_ec=$? 29 | "$RT_PYTHON" < "$testFile" >"$python_rt" 2>"$python_err" 30 | python_ec=$? 31 | 32 | if [ "$ocaml_ec" -ne 0 ] && [ "$python_ec" -ne 0 ] 33 | then 34 | echo "PASS '$testFile' | both abort" 35 | exit 0 36 | elif [ "$ocaml_ec" -ne 0 ] 37 | then 38 | echo "OCAML_ABORT: '$testFile'" 39 | cat "$ocaml_err" >&2 40 | exit 1 41 | elif [ "$python_ec" -ne 0 ] 42 | then 43 | echo "PYTHON_ABORT: '$testFile'" 44 | cat "$python_err" >&2 45 | exit 1 46 | fi 47 | 48 | diff "$ocaml_rt" "$python_rt" >/dev/null 49 | if [ $? -ne 0 ] 50 | then 51 | diff -w "$ocaml_rt" "$python_rt" >/dev/null 52 | if [ $? -ne 0 ] 53 | then 54 | diff -w "$ocaml_rt" "$python_rt" >/dev/null 55 | echo "FAIL: '$testFile' | $ocaml_rt $python_rt" 56 | else 57 | diff "$ocaml_rt" "$python_rt" >/dev/null 58 | echo "FAIL_WHITESPACE: '$testFile' | $ocaml_rt $python_rt" 59 | fi 60 | exit 1 61 | fi 62 | 63 | echo "PASS '$testFile'" 64 | -------------------------------------------------------------------------------- /test/failing/test_rt_py.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | SHELL_TO_JSON_OCAML=../parse_to_json.native 5 | JSON_TO_SHELL_OCAML=../json_to_shell.native 6 | 7 | RT_PYTHON=./ceda_rt.py 8 | 9 | 10 | if [ $# -ne 1 ] 11 | then 12 | echo "Usage: $0 testFile" 13 | echo 14 | exit 1 15 | fi 16 | 17 | 18 | testFile="$1" 19 | 20 | 21 | if [ ! -f "$testFile" ] 22 | then 23 | echo "Error: cannot read '$testFile'!" 24 | echo 25 | exit 1 26 | fi 27 | 28 | 29 | "$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ 30 | if [ $? -ne 0 ] 31 | then 32 | echo "REF_ABORT_1: '$testFile'" 33 | exit 1 34 | fi 35 | 36 | "$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ 37 | if [ $? -ne 0 ] 38 | then 39 | echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" 40 | exit 1 41 | fi 42 | 43 | python3 "$RT_PYTHON" < "$testFile" > /tmp/rt_python.$$ 44 | if [ $? -ne 0 ] 45 | then 46 | echo "ABORT_1: '$testFile'" 47 | exit 1 48 | fi 49 | 50 | diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null 51 | if [ $? -ne 0 ] 52 | then 53 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null 54 | if [ $? -ne 0 ] 55 | then 56 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ 57 | echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" 58 | else 59 | diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ 60 | echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" 61 | fi 62 | exit 1 63 | fi 64 | 65 | echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" 66 | -------------------------------------------------------------------------------- /test/pash_tests/get_results.sh: -------------------------------------------------------------------------------- 1 | export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} 2 | rm -rf log_results 3 | mkdir log_results 4 | 5 | stats() ( 6 | test_results_dir=$1 7 | grep "are identical" "$test_results_dir"/result_status | 8 | sed "s,^$PASH_TOP/,," > log_results/$2_passed.log 9 | cat log_results/$2_passed.log >> log_results/passed.log 10 | grep "are not identical" "$test_results_dir"/result_status | 11 | sed "s,^$PASH_TOP/,," > log_results/$2_failed.log 12 | # if the file has data, append it 13 | if [ -s log_results/$2_failed.log ] 14 | then 15 | cat log_results/$2_failed.log >> log_results/failed.log 16 | else 17 | # remove since it's empty 18 | rm log_results/$2_failed.log 19 | fi 20 | TOTAL_TESTS=$(cat "$test_results_dir"/result_status | wc -l) 21 | PASSED_TESTS=$(grep "are identical" "$test_results_dir"/result_status | wc -l) 22 | echo "$2: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." 23 | ) 24 | 25 | echo "Below follow the identical outputs:" > log_results/passed.log 26 | echo "Below follow the non-identical outputs:" > log_results/failed.log 27 | # 28 | ## intro tests 29 | stats "$PASH_TOP/evaluation/intro/output" intro 30 | # 31 | ## Interface Tests 32 | stats "$PASH_TOP/evaluation/tests/interface_tests/output" interface 33 | # 34 | ## compiler Tests 35 | stats "${PASH_TOP}/evaluation/tests/results" compiler 36 | # 37 | ## aggregator tests 38 | stats "${PASH_TOP}/evaluation/tests/agg/output" agg 39 | -------------------------------------------------------------------------------- /test/pash_tests/bio4.sh: -------------------------------------------------------------------------------- 1 | # create bam files with regions 2 | ################### 1KG SAMPLES 3 | IN=${INPUT:-$PASH_TOP/evaluation/benchmarks/bio} 4 | IN_NAME=${IN_N:-input_all.txt} 5 | OUT=${OUTPUT:-$PASH_TOP/evaluation/benchmarks/bio/output} 6 | cat ${IN}/${IN_NAME}|while read s_line; 7 | do 8 | sample=$(echo $s_line |cut -d " " -f 2); 9 | pop=$(echo $s_line |cut -f 1 -d " "); 10 | link=$(echo $s_line |cut -f 3 -d " "); 11 | ### correcting labeling of chromosomes so that all are 1,2,3.. instead of chr1,chr2 or chromosome1 etc 12 | echo 'Processing Sample '${IN}/input/$sample' '; 13 | # uniform the chromosomes in the file due to inconsistencies 14 | samtools view -H "${IN}/input/$sample".bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' \ 15 | | samtools reheader - "${IN}/input/$sample".bam > "${OUT}/$sample"_corrected.bam ; 16 | # create bai file 17 | samtools index -b "${OUT}/$sample"_corrected.bam ; 18 | ### Isolating each relevant chromosome based on Gen_locs 19 | cut -f 2 ./Gene_locs.txt |sort |uniq |while read chr; 20 | do 21 | echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', '; 22 | samtools view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam ; 23 | echo 'Indexing Sample '$pop'_'${OUT}/$sample' '; 24 | samtools index -b "${OUT}/$pop"_"$sample"_"$chr".bam; 25 | #sleep 2 26 | done; 27 | #rm "${OUT}/$sample"_corrected.bam; 28 | #rm "${OUT}/$sample"_corrected.bam.bai; 29 | #rm "${OUT}/$sample".bam 30 | done; 31 | -------------------------------------------------------------------------------- /ocaml/ast.mli: -------------------------------------------------------------------------------- 1 | type linno = int 2 | 3 | exception ParseException of string 4 | 5 | type t = 6 | Command of (linno * assign list * args * redirection list) 7 | | Pipe of (bool * t list) 8 | | Redir of (linno * t * redirection list) 9 | | Background of (linno * t * redirection list) 10 | | Subshell of (linno * t * redirection list) 11 | | And of (t * t) 12 | | Or of (t * t) 13 | | Not of (t) 14 | | Semi of (t * t) 15 | | If of (t * t * t) 16 | | While of (t * t) 17 | | For of (linno * arg list * t * string) 18 | | Case of (linno * arg * case list) 19 | | Defun of (linno * string * t) 20 | and assign = string * arg 21 | and redirection = 22 | File of (redir_type * int * arg) 23 | | Dup of (dup_type * int * arg) 24 | | Heredoc of (heredoc_type * int * arg) 25 | and redir_type = To | Clobber | From | FromTo | Append 26 | and dup_type = ToFD | FromFD 27 | and heredoc_type = Here | XHere 28 | and args = arg list 29 | and arg = arg_char list 30 | and arg_char = 31 | C of char 32 | | E of char 33 | | T of string option 34 | | A of arg 35 | | V of (var_type * bool * string * arg) 36 | | Q of arg 37 | | B of t 38 | and var_type = 39 | Normal 40 | | Minus 41 | | Plus 42 | | Question 43 | | Assign 44 | | TrimR 45 | | TrimRMax 46 | | TrimL 47 | | TrimLMax 48 | | Length 49 | and case = { cpattern : arg list; cbody : t; } 50 | 51 | val of_node : Dash.node Ctypes.union Ctypes.ptr -> t 52 | 53 | (* command that does nothing *) 54 | val skip : t 55 | 56 | (* render to string *) 57 | val to_string : t -> string 58 | -------------------------------------------------------------------------------- /ocaml/function_description.ml: -------------------------------------------------------------------------------- 1 | open Ctypes 2 | 3 | module Types = Types_generated 4 | open Types 5 | 6 | module Functions (F : Ctypes.FOREIGN) = struct 7 | open F 8 | 9 | let setstackmark = foreign "setstackmark" (ptr stackmark @-> returning void) 10 | let popstackmark = foreign "popstackmark" (ptr stackmark @-> returning void) 11 | 12 | let alloc_stack_string = foreign "sstrdup" (string @-> returning (ptr char)) 13 | let free_stack_string = foreign "stunalloc" (ptr char @-> returning void) 14 | 15 | let dash_init = foreign "init" (void @-> returning void) 16 | let initialize_dash_errno = foreign "initialize_dash_errno" (void @-> returning void) 17 | 18 | let popfile = foreign "popfile" (void @-> returning void) 19 | let setinputstring = foreign "setinputstring" (ptr char @-> returning void) 20 | let setinputfd = foreign "setinputfd" (int @-> int @-> returning void) 21 | let raw_setinputfile = foreign "setinputfile" (string @-> int @-> returning int) 22 | 23 | let raw_setvar = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) 24 | 25 | let setalias = foreign "setalias" (string @-> string @-> returning void) 26 | let unalias = foreign "unalias" (string @-> returning void) 27 | 28 | (* Unix/ExtUnix don't let you renumber things the way you want *) 29 | let freshfd_ge10 = foreign "freshfd_ge10" (int @-> returning int) 30 | 31 | let parsecmd_safe = foreign "parsecmd_safe" (int @-> returning (ptr node)) 32 | let neof = foreign_value "tokpushback" node 33 | let nerr = foreign_value "lasttoken" node 34 | end 35 | 36 | 37 | -------------------------------------------------------------------------------- /test/failing/test_ast2shell_py.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native 5 | JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native 6 | 7 | RT_PY="rt.py" 8 | 9 | 10 | if [ $# -ne 1 ] 11 | then 12 | echo "Usage: $0 testFile" 13 | echo 14 | exit 1 15 | fi 16 | 17 | 18 | testFile="$1" 19 | 20 | 21 | if [ ! -f "$testFile" ] 22 | then 23 | echo "Error: cannot read '$testFile'!" 24 | echo 25 | exit 1 26 | fi 27 | 28 | 29 | "$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ 30 | if [ $? -ne 0 ] 31 | then 32 | echo "REF_ABORT_1: '$testFile'" 33 | exit 1 34 | fi 35 | 36 | "$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ 37 | if [ $? -ne 0 ] 38 | then 39 | echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" 40 | exit 1 41 | fi 42 | 43 | # python3 "$RT_PY" < "$testFile" > /tmp/rt_py.$$ 44 | python3 "$RT_PY" "$testFile" > /tmp/rt_py.$$ 45 | if [ $? -ne 0 ] 46 | then 47 | echo "ABORT: '$testFile'" 48 | exit 1 49 | fi 50 | 51 | diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null 52 | if [ $? -ne 0 ] 53 | then 54 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null 55 | if [ $? -ne 0 ] 56 | then 57 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ 58 | echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" 59 | else 60 | diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ 61 | echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" 62 | fi 63 | exit 1 64 | fi 65 | 66 | echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" 67 | -------------------------------------------------------------------------------- /test/failing/test_rt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | SHELL_TO_JSON_OCAML=../parse_to_json.native 5 | JSON_TO_SHELL_OCAML=../json_to_shell.native 6 | 7 | SHELL_TO_JSON_C=./parse_to_json2 8 | JSON_TO_SHELL_C=./json_to_shell2 9 | 10 | 11 | if [ $# -ne 1 ] 12 | then 13 | echo "Usage: $0 testFile" 14 | echo 15 | exit 1 16 | fi 17 | 18 | 19 | testFile="$1" 20 | 21 | 22 | if [ ! -f "$testFile" ] 23 | then 24 | echo "Error: cannot read '$testFile'!" 25 | echo 26 | exit 1 27 | fi 28 | 29 | 30 | "$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ 31 | if [ $? -ne 0 ] 32 | then 33 | echo "REF_ABORT_1: '$testFile'" 34 | exit 1 35 | fi 36 | 37 | "$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ 38 | if [ $? -ne 0 ] 39 | then 40 | echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" 41 | exit 1 42 | fi 43 | 44 | "$SHELL_TO_JSON_C" < "$testFile" > /tmp/json_c.$$ 45 | if [ $? -ne 0 ] 46 | then 47 | echo "ABORT_1: '$testFile'" 48 | exit 1 49 | fi 50 | 51 | "$JSON_TO_SHELL_C" < /tmp/json_c.$$ > /tmp/rt_c.$$ 52 | if [ $? -ne 0 ] 53 | then 54 | echo "ABORT_2: '$testFile' | /tmp/json_c.$$" 55 | exit 1 56 | fi 57 | 58 | diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ 59 | if [ $? -ne 0 ] 60 | then 61 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ 62 | if [ $? -ne 0 ] 63 | then 64 | echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 65 | else 66 | echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 67 | fi 68 | exit 1 69 | fi 70 | 71 | echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 72 | -------------------------------------------------------------------------------- /test/failing/test_JSON_to_shell2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native 5 | JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native 6 | JSON_TO_SHELL_C=./json_to_shell2 7 | 8 | 9 | if [ $# -ne 1 ] 10 | then 11 | echo "Usage: $0 testFile" 12 | echo 13 | exit 1 14 | fi 15 | 16 | 17 | testFile="$1" 18 | 19 | 20 | if [ ! -f "$testFile" ] 21 | then 22 | echo "Error: cannot read '$testFile'!" 23 | echo 24 | exit 1 25 | fi 26 | 27 | 28 | "$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json.$$ 29 | if [ $? -ne 0 ] 30 | then 31 | echo "INVALID_INPUT_1: '$testFile' | Unable to run '$SHELL_TO_JSON_OCAML' on '$testFile'" 32 | exit 1 33 | fi 34 | 35 | "$JSON_TO_SHELL_OCAML" < /tmp/json.$$ > /tmp/rt_ocaml.$$ 36 | if [ $? -ne 0 ] 37 | then 38 | echo "INVALID_INPUT_2: '$testFile' | Unable to run '$JSON_TO_SHELL_OCAML' on '/tmp/json.$$'" 39 | exit 1 40 | fi 41 | 42 | "$JSON_TO_SHELL_C" < /tmp/json.$$ > /tmp/rt_c.$$ 43 | if [ $? -ne 0 ] 44 | then 45 | echo "ABORT: '$testFile' | Unable to run '$JSON_TO_SHELL_C' on '/tmp/json.$$'" 46 | exit 1 47 | fi 48 | 49 | diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ 50 | if [ $? -ne 0 ] 51 | then 52 | diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ 53 | if [ $? -ne 0 ] 54 | then 55 | echo "FAIL: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 56 | else 57 | echo "FAIL_WHITESPACE: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 58 | fi 59 | exit 1 60 | fi 61 | 62 | echo "PASS: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.command.build_py import build_py 3 | 4 | import os 5 | import shutil 6 | import subprocess 7 | import sys 8 | 9 | from pathlib import Path 10 | long_description = (Path(__file__).parent / "README.md").read_text() 11 | 12 | def try_exec(*cmds): 13 | proc = subprocess.run(cmds) 14 | 15 | if proc.returncode != 0: 16 | print('`{}` failed'.format(' '.join(cmds)), file=sys.stderr) 17 | proc.check_returncode() 18 | 19 | class libdash_build_py(build_py): 20 | def run(self): 21 | build_py.run(self) 22 | 23 | if sys.platform == 'darwin': 24 | libtoolize = "glibtoolize" 25 | else: 26 | libtoolize = "libtoolize" 27 | 28 | try_exec(libtoolize) 29 | try_exec('aclocal') 30 | try_exec('autoheader') 31 | try_exec('automake', '--add-missing') 32 | try_exec('autoconf') 33 | try_exec('./configure') 34 | try_exec('make') 35 | 36 | shutil.copy2('src/.libs/dlldash.so', os.path.join(self.build_lib, 'libdash/libdash.so')) 37 | if sys.platform == 'darwin': 38 | shutil.copy2('src/.libs/libdash.dylib', os.path.join(self.build_lib, 'libdash/libdash.dylib')) 39 | 40 | setup(name='libdash', 41 | packages=['libdash'], 42 | cmdclass={'build_py': libdash_build_py}, 43 | version='0.3.1', 44 | long_description=long_description, 45 | long_description_content_type='text/markdown', 46 | include_package_data=True, 47 | has_ext_modules=lambda: True) 48 | -------------------------------------------------------------------------------- /test/pash_tests/run_all_benchmarks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Determines whether the experimental pash flags will be tested. 4 | ## By default they are not. 5 | export EXPERIMENTAL=0 6 | export DEBUG=0 7 | 8 | for item in $@ 9 | do 10 | if [ "--experimental" == "$item" ]; then 11 | export EXPERIMENTAL=1 12 | fi 13 | 14 | if [ "--debug" == "$item" ]; then 15 | export DEBUG=1 16 | fi 17 | done 18 | 19 | ## This script is necessary to ensure that sourcing happens with bash 20 | source run.seq.sh 21 | source run.par.sh 22 | 23 | compare_outputs(){ 24 | dir=$1 25 | outputs=$(ls $dir | grep "seq" | sed 's/.seq.out$//') 26 | for out in $outputs; 27 | do 28 | seq_output="${dir}/${out}.seq.out" 29 | pash_output="${dir}/${out}.par.out" 30 | diff -q "$seq_output" "$pash_output" 31 | done 32 | } 33 | 34 | if [ "$EXPERIMENTAL" -eq 1 ]; then 35 | export PASH_FLAGS="--r_split --dgsh_tee --r_split_batch_size 1000000" 36 | # --speculation quick_abort is not maintained at the moment 37 | else 38 | export PASH_FLAGS="" 39 | fi 40 | 41 | ## Add the debug flag 42 | if [ "$DEBUG" -eq 1 ]; then 43 | export PASH_FLAGS="$PASH_FLAGS -d 1" 44 | fi 45 | 46 | 47 | oneliners 48 | oneliners_pash 49 | 50 | compare_outputs "oneliners/outputs" 51 | 52 | unix50 53 | unix50_pash 54 | 55 | compare_outputs "unix50/outputs" 56 | 57 | nlp 58 | nlp_pash 59 | 60 | compare_outputs "nlp/outputs" 61 | 62 | web-index 63 | web-index_pash 64 | 65 | compare_outputs "web-index/outputs" 66 | 67 | analytics-mts 68 | analytics-mts_pash 69 | 70 | compare_outputs "analytics-mts/outputs" 71 | -------------------------------------------------------------------------------- /test/pash_tests/bigrams.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | bigrams_aux() 4 | { 5 | s2=$(mktemp -u) 6 | mkfifo $s2 7 | tee $s2 | 8 | tail -n +2 | 9 | paste $s2 - | 10 | sed "\$d" 11 | rm $s2 12 | } 13 | 14 | bigram_aux_map() 15 | { 16 | IN=$1 17 | OUT=$2 18 | AUX_HEAD=$3 19 | AUX_TAIL=$4 20 | 21 | s2=$(mktemp -u) 22 | aux1=$(mktemp -u) 23 | aux2=$(mktemp -u) 24 | aux3=$(mktemp -u) 25 | temp=$(mktemp -u) 26 | 27 | mkfifo $s2 28 | mkfifo $aux1 29 | mkfifo $aux2 30 | mkfifo $aux3 31 | 32 | cat $IN > $temp 33 | 34 | sed "\$d" $temp > $aux3 & 35 | cat $temp | head -n 1 > $AUX_HEAD & 36 | cat $temp | tail -n 1 > $AUX_TAIL & 37 | cat $temp | tail -n +2 | paste $aux3 - > $OUT & 38 | 39 | wait 40 | 41 | rm $temp 42 | rm $s2 43 | rm $aux1 44 | rm $aux2 45 | rm $aux3 46 | } 47 | 48 | bigram_aux_reduce() 49 | { 50 | IN1=$1 51 | AUX_HEAD1=$2 52 | AUX_TAIL1=$3 53 | IN2=$4 54 | AUX_HEAD2=$5 55 | AUX_TAIL2=$6 56 | OUT=$7 57 | AUX_HEAD_OUT=$8 58 | AUX_TAIL_OUT=$9 59 | 60 | temp=$(mktemp -u) 61 | 62 | mkfifo $temp 63 | 64 | cat $AUX_HEAD1 > $AUX_HEAD_OUT & 65 | cat $AUX_TAIL2 > $AUX_TAIL_OUT & 66 | paste $AUX_TAIL1 $AUX_HEAD2 > $temp & 67 | cat $IN1 $temp $IN2 > $OUT & 68 | 69 | wait 70 | 71 | rm $temp 72 | } 73 | 74 | export -f bigrams_aux 75 | export -f bigram_aux_map 76 | export -f bigram_aux_reduce 77 | 78 | cat $IN | 79 | tr -cs A-Za-z '\n' | 80 | tr A-Z a-z | 81 | bigrams_aux | 82 | sort | 83 | uniq 84 | 85 | 86 | --------------------------------------------------------------------------------