├── test ├── __init__.py ├── fixtures │ ├── semantics.txt │ ├── grammar.txt │ ├── val.txt │ ├── train.txt │ └── seq2seq.json ├── test_model.py ├── test_commands_reader.py ├── test_semantics_grammar.py └── test_paired_generator.py ├── data └── .gitignore ├── gpsr_command_understanding ├── data │ ├── __init__.py │ ├── generate_list_for_paraphrasing.py │ └── enumerate_grammar.py ├── demo │ ├── __init__.py │ ├── generate_utterance.py │ ├── parse_utterance.py │ ├── sem_parse_utterance_learned.py │ └── sem_parse_utterance.py ├── generator │ ├── __init__.py │ ├── tokens.py │ ├── knowledge.py │ └── loading_helpers.py ├── resources │ ├── __init__.py │ ├── generator2018 │ │ ├── __init__.py │ │ ├── whattosay.txt │ │ ├── categories.txt │ │ ├── gestures.xml │ │ ├── names.xml │ │ ├── gpsr_category_1_grammar.txt │ │ ├── questions.xml │ │ ├── gpsr_category_1_slot.txt │ │ ├── gpsr_category_2_grammar.txt │ │ ├── locations.xml │ │ ├── objects.xml │ │ ├── gpsr_category_2_slot.txt │ │ ├── gpsr_category_3_grammar.txt │ │ ├── common_rules.txt │ │ ├── gpsr_category_1_semantics.txt │ │ └── gpsr_category_3_semantics.txt │ ├── generator2019 │ │ ├── __init__.py │ │ ├── egpsr_semantics.txt │ │ ├── gpsr_semantics.txt │ │ ├── whattosay.txt │ │ ├── categories.txt │ │ ├── gestures.xml │ │ ├── names.xml │ │ ├── questions.xml │ │ ├── locations.xml │ │ ├── objects.xml │ │ └── common_rules.txt │ ├── generator2021 │ │ ├── __init__.py │ │ ├── egpsr_semantics.txt │ │ ├── gpsr_semantics.txt │ │ ├── questions.xml │ │ ├── whattosay.txt │ │ ├── categories.txt │ │ ├── gestures.xml │ │ ├── names.xml │ │ ├── locations.xml │ │ └── common_rules.txt │ ├── speech-crowdsourcing │ │ ├── saved_audio │ │ │ ├── .wav │ │ │ ├── newfile.txt │ │ │ ├── 0-1956ec.wav │ │ │ ├── 0-2482ee.wav │ │ │ ├── 0-45f8b6.wav │ │ │ ├── 0-85203d.wav │ │ │ ├── 0-b6fc9a.wav │ │ │ ├── 5-9a99be.wav │ │ │ ├── 5-9eff3b.wav │ │ │ ├── 5-b009b0.wav │ │ │ ├── 5-dff0ca.wav │ │ │ ├── 5-e755f5.wav │ │ │ ├── 6-031c37.wav │ │ │ ├── 6-45183b.wav │ │ │ ├── 6-5881fb.wav │ │ │ ├── 6-ad395f.wav │ │ │ ├── 6-ff1fa4.wav │ │ │ ├── 7-382f13.wav │ │ │ ├── 7-645e8f.wav │ │ │ ├── 7-853297.wav │ │ │ ├── 7-d5bb75.wav │ │ │ ├── 7-fcedd7.wav │ │ │ ├── 8-9473da.wav │ │ │ ├── 8-a34ae2.wav │ │ │ ├── 8-b5ca87.wav │ │ │ ├── 8-cd01b7.wav │ │ │ ├── 8-f10ba8.wav │ │ │ ├── 12-328c1d.wav │ │ │ ├── 12-5a21eb.wav │ │ │ ├── 12-8ca714.wav │ │ │ ├── 12-d3966e.wav │ │ │ ├── 12-f65f12.wav │ │ │ ├── 15-03a0e9.wav │ │ │ ├── 15-088cb9.wav │ │ │ ├── 15-9149fa.wav │ │ │ ├── 15-9f98b7.wav │ │ │ ├── 15-b20b02.wav │ │ │ ├── 17-190bb0.wav │ │ │ ├── 17-1b0e22.wav │ │ │ ├── 17-45acb8.wav │ │ │ ├── 17-4ca19a.wav │ │ │ ├── 17-6ec4d1.wav │ │ │ ├── 0-newcommand0.wav │ │ │ ├── 1038-17514d.wav │ │ │ ├── 1111-17514d.wav │ │ │ ├── 1516-17514d.wav │ │ │ ├── 23853-17514d.ogg │ │ │ ├── 3442-17514d.wav │ │ │ ├── 5-newcommand0.wav │ │ │ ├── 5470-17514d.wav │ │ │ ├── 6-newcommand0.wav │ │ │ ├── 6145-17514d.wav │ │ │ ├── 7-newcommand0.wav │ │ │ ├── 8-newcommand0.wav │ │ │ ├── 8058-17514d.wav │ │ │ ├── 8330-17514d.wav │ │ │ ├── 8779-3c1cfb.wav │ │ │ ├── 8779-4ec5f8.wav │ │ │ ├── 8779-69983b.wav │ │ │ ├── 8779-90a6e4.wav │ │ │ ├── 8779-bee800.wav │ │ │ ├── 9406-3c1cfb.wav │ │ │ ├── 9406-4ec5f8.wav │ │ │ ├── 9406-69983b.wav │ │ │ ├── 9406-90a6e4.wav │ │ │ ├── 9406-bee800.wav │ │ │ ├── 98229-17514d.ogg │ │ │ ├── 12-newcommand0.wav │ │ │ ├── 15-newcommand0.wav │ │ │ ├── 17-newcommand0.wav │ │ │ ├── 1516-newcommand0.wav │ │ │ ├── 23853-newcommand0.ogg │ │ │ ├── 5470-newcommand0.wav │ │ │ ├── 6145-newcommand0.wav │ │ │ ├── 8058-newcommand0.wav │ │ │ ├── 8330-newcommand0.wav │ │ │ ├── 8779-newcommand0.wav │ │ │ ├── 9406-newcommand0.wav │ │ │ └── 98229-newcommand0.ogg │ │ ├── package.json │ │ ├── test.csv │ │ ├── tsconfig.json │ │ ├── paged-navigation.d.ts │ │ ├── record-element.d.ts │ │ ├── upload.php │ │ ├── paged-navigation.js │ │ ├── paged-navigation.js.map │ │ ├── paged-navigation.ts │ │ └── record-element.js.map │ ├── demo │ │ ├── script.js │ │ └── index.html │ ├── lambda_ebnf.lark │ └── generator.lark ├── __init__.py ├── models │ ├── __init__.py │ ├── noop_tokenizer.py │ ├── command_predictor.py │ ├── metrics.py │ └── commands_reader.py ├── util.py └── anonymizer.py ├── .gitignore ├── requirements.txt ├── MANIFEST.in ├── experiments ├── transformers ├── glove_embedding.libjsonnet ├── glove_seq2seq.jsonnet ├── seq2seq.jsonnet ├── elmo_seq2seq.jsonnet ├── common.libjsonnet ├── common_seq2seq.libjsonnet └── transformer_seq2seq.jsonnet ├── .flake8 ├── .github └── workflows │ └── ci.yaml ├── scripts ├── create_data_splits ├── compile_multiseed_results.py ├── test_all_models ├── train_all_models └── process_turk_data.py ├── LICENSE ├── setup.py ├── run_all_experiments └── README.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /gpsr_command_understanding/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/generator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | experiments/results*/ 3 | *.swp 4 | venv/ -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/egpsr_semantics.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/gpsr_semantics.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/egpsr_semantics.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/gpsr_semantics.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/.wav: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/newfile.txt: -------------------------------------------------------------------------------- 1 | John Doe 2 | Jane Doe 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | allennlp>=2.0.0 2 | editdistance 3 | importlib_resources 4 | lark-parser>=0.8.5 5 | lxml -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/questions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/whattosay.txt: -------------------------------------------------------------------------------- 1 | the time 2 | what day it is 3 | what day tomorrow is 4 | the day of the week 5 | a joke -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/whattosay.txt: -------------------------------------------------------------------------------- 1 | the time 2 | what day it is 3 | what day tomorrow is 4 | the day of the week 5 | a joke -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "speech-crowdsourcing", 3 | "version": "1.0.0", 4 | "dependencies": { 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | recursive-include gpsr_command_understanding * 4 | global-exclude .DS_Store *.py[cod] 5 | prune **/__pycache__ 6 | prune **/.mypy_cache -------------------------------------------------------------------------------- /experiments/transformers: -------------------------------------------------------------------------------- 1 | albert-base-v2 2 | bert-base-uncased 3 | bert-large-uncased 4 | distilbert-base-uncased 5 | distilgpt2 6 | distilroberta-base 7 | gpt2 8 | roberta-base 9 | xlnet-base-cased -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-1956ec.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-1956ec.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-2482ee.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-2482ee.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-45f8b6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-45f8b6.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-85203d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-85203d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-b6fc9a.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-b6fc9a.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-9a99be.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-9a99be.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-9eff3b.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-9eff3b.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-b009b0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-b009b0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-dff0ca.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-dff0ca.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-e755f5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-e755f5.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-031c37.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-031c37.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-45183b.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-45183b.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-5881fb.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-5881fb.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-ad395f.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-ad395f.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-ff1fa4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-ff1fa4.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-382f13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-382f13.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-645e8f.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-645e8f.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-853297.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-853297.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-d5bb75.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-d5bb75.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-fcedd7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-fcedd7.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-9473da.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-9473da.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-a34ae2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-a34ae2.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-b5ca87.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-b5ca87.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-cd01b7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-cd01b7.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-f10ba8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-f10ba8.wav -------------------------------------------------------------------------------- /experiments/glove_embedding.libjsonnet: -------------------------------------------------------------------------------- 1 | { 2 | type: 'embedding', 3 | vocab_namespace: 'source_tokens', 4 | embedding_dim: 100, 5 | pretrained_file: 'https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz', 6 | } -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-328c1d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-328c1d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-5a21eb.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-5a21eb.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-8ca714.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-8ca714.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-d3966e.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-d3966e.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-f65f12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-f65f12.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-03a0e9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-03a0e9.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-088cb9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-088cb9.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-9149fa.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-9149fa.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-9f98b7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-9f98b7.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-b20b02.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-b20b02.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-190bb0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-190bb0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-1b0e22.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-1b0e22.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-45acb8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-45acb8.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-4ca19a.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-4ca19a.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-6ec4d1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-6ec4d1.wav -------------------------------------------------------------------------------- /test/fixtures/semantics.txt: -------------------------------------------------------------------------------- 1 | $speak = (speak) 2 | $bring = (bring) 3 | Find {name} who is {gesture} by the {beacon} in the {room} and bring {pron} the {kobject} from the {placement} = (test {name} {gesture} {beacon} {room} {kobject} {placement}) -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/0-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1038-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1038-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1111-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1111-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1516-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1516-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/23853-17514d.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/23853-17514d.ogg -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/3442-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/3442-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5470-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5470-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6145-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6145-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/7-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8058-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8058-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8330-17514d.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8330-17514d.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-3c1cfb.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-3c1cfb.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-4ec5f8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-4ec5f8.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-69983b.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-69983b.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-90a6e4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-90a6e4.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-bee800.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-bee800.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-3c1cfb.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-3c1cfb.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-4ec5f8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-4ec5f8.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-69983b.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-69983b.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-90a6e4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-90a6e4.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-bee800.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-bee800.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/98229-17514d.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/98229-17514d.ogg -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/12-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/15-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/17-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/categories.txt: -------------------------------------------------------------------------------- 1 | name plural, name singular 2 | cutlery, utensil 3 | containers, container 4 | drinks, drink 5 | food, food 6 | fruits, fruit 7 | snacks, snack 8 | tableware, tableware 9 | toiletries, toiletry -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/categories.txt: -------------------------------------------------------------------------------- 1 | name plural, name singular 2 | cutlery, utensil 3 | containers, container 4 | drinks, drink 5 | food, food 6 | fruits, fruit 7 | snacks, snack 8 | tableware, tableware 9 | toiletries, toiletry -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1516-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/1516-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/23853-newcommand0.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/23853-newcommand0.ogg -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5470-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/5470-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6145-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/6145-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8058-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8058-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8330-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8330-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/8779-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-newcommand0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/9406-newcommand0.wav -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/98229-newcommand0.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickswalker/gpsr-command-understanding/HEAD/gpsr_command_understanding/resources/speech-crowdsourcing/saved_audio/98229-newcommand0.ogg -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 160 3 | max-complexity = 10 4 | exclude = 5 | data 6 | experiments 7 | .git 8 | venv 9 | # We import modules here to make them available to Allennlp 10 | per-file-ignores = 11 | **/__init__.py:F401 -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/whattosay.txt: -------------------------------------------------------------------------------- 1 | the time 2 | what day it is 3 | what day tomorrow is 4 | the day of the week 5 | a joke 6 | it's dinner time 7 | they're late 8 | it's time to go 9 | to clean up their mess 10 | i'm hungry -------------------------------------------------------------------------------- /gpsr_command_understanding/models/__init__.py: -------------------------------------------------------------------------------- 1 | import gpsr_command_understanding.models.commands_reader 2 | import gpsr_command_understanding.models.seq2seq 3 | import gpsr_command_understanding.models.metrics 4 | import gpsr_command_understanding.models.command_predictor 5 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/test.csv: -------------------------------------------------------------------------------- 1 | holder,command0,command1,command2,command3,command4,command5,command6,command7,command8,command9,command10,command11 2 | "value","test","test","test","test","test","test","test","test","test","test","test","test" 3 | -------------------------------------------------------------------------------- /test/fixtures/grammar.txt: -------------------------------------------------------------------------------- 1 | $Main = ($speak | $bring | $wild) 2 | $speak = say hi to him right now please 3 | $bring = bring it to (me | {pron} ) $when 4 | $when = (now | later) 5 | $wild = Find {name} who is {gesture} by the {beacon} in the {room} and bring {pron} the {kobject} from the {placement} 6 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/categories.txt: -------------------------------------------------------------------------------- 1 | name plural, name singular 2 | cleaning supplies, cleaning supply 3 | cutlery, utensil 4 | containers, container 5 | drinks, drink 6 | food, food 7 | household items, item 8 | fruits, fruit 9 | kitchen items, kitchen item 10 | personal items, personal 11 | snacks, snack 12 | tableware, tableware 13 | toiletries, toiletry 14 | tools, tool 15 | toys, toy -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gestures.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/gestures.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/gestures.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "es6", 4 | "moduleResolution": "node", 5 | "target": "es2016", 6 | "removeComments": true, 7 | "preserveConstEnums": true, 8 | "sourceRoot": "./", 9 | "rootDir": "./", 10 | "outDir": "./", 11 | "declaration": true, 12 | "noImplicitAny": true, 13 | "sourceMap": true 14 | }, 15 | "exclude": [ 16 | "build" 17 | ] 18 | } -------------------------------------------------------------------------------- /experiments/glove_seq2seq.jsonnet: -------------------------------------------------------------------------------- 1 | local common_parameters = import 'common_seq2seq.libjsonnet'; 2 | local glove_embedding = import 'glove_embedding.libjsonnet'; 3 | std.mergePatch(common_parameters, { 4 | dataset_reader: { 5 | source_token_indexers: { 6 | source_tokens: { 7 | type: 'single_id', 8 | namespace: 'source_tokens', 9 | }, 10 | }, 11 | }, 12 | model: { 13 | type: 'seq2seq', 14 | source_embedder: { 15 | token_embedders: { 16 | source_tokens: glove_embedding, 17 | }, 18 | }, 19 | }, 20 | trainer: { 21 | num_epochs: 150, 22 | }, 23 | }) -------------------------------------------------------------------------------- /test/test_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from allennlp.common.testing import ModelTestCase 4 | # We need to register components with AllenNLP 5 | from gpsr_command_understanding.models import seq2seq 6 | 7 | FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 8 | 9 | 10 | class TestModel(ModelTestCase): 11 | def setup(self): 12 | super().setup_method() 13 | self.set_up_model(os.path.join(FIXTURE_DIR, 'seq2seq.json'), 14 | os.path.join(FIXTURE_DIR, 'train.txt')) 15 | 16 | def test_model_can_train_save_and_load(self): 17 | self.ensure_model_can_train_save_and_load(self.param_file) 18 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/paged-navigation.d.ts: -------------------------------------------------------------------------------- 1 | declare var current: number; 2 | declare var moved: number[]; 3 | declare var movedto: number[]; 4 | declare let numPages: number; 5 | declare let pagesContainer: HTMLElement; 6 | declare let pageNumber: HTMLElement; 7 | declare let preNextValidator: (arg0: HTMLElement) => boolean; 8 | declare function setUpPages(container: HTMLElement, nextValidator: (arg0: HTMLElement) => boolean): void; 9 | declare function effectivePage(pagenum: number): number; 10 | declare function swap(vanish: number, appear: number): void; 11 | declare function next(): void; 12 | declare function back(): void; 13 | -------------------------------------------------------------------------------- /test/test_commands_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | 4 | from allennlp.common.testing import AllenNlpTestCase 5 | from allennlp.common.util import ensure_list 6 | from gpsr_command_understanding.models.commands_reader import CommandsDatasetReader 7 | 8 | 9 | FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 10 | 11 | 12 | class TestCommandsReader(AllenNlpTestCase): 13 | 14 | def setup(self): 15 | super().setup_method() 16 | self.reader = CommandsDatasetReader() 17 | instances = self.reader.read(join(FIXTURE_DIR, "train.txt")) 18 | self.instances = ensure_list(instances) 19 | 20 | def test_tokens(self): 21 | assert len(self.instances) == 9 22 | -------------------------------------------------------------------------------- /experiments/seq2seq.jsonnet: -------------------------------------------------------------------------------- 1 | local common_parameters = import 'common_seq2seq.libjsonnet'; 2 | std.mergePatch(common_parameters, { 3 | dataset_reader: { 4 | source_token_indexers: { 5 | source_tokens: { 6 | type: 'single_id', 7 | namespace: 'source_tokens', 8 | }, 9 | }, 10 | }, 11 | model: { 12 | source_embedder: { 13 | token_embedders: { 14 | source_tokens: { 15 | type: 'embedding', 16 | vocab_namespace: 'source_tokens', 17 | embedding_dim: 100, 18 | trainable: true, 19 | }, 20 | }, 21 | }, 22 | }, 23 | data_loader: { 24 | batch_sampler: { 25 | batch_size: 16, 26 | }, 27 | }, 28 | trainer: { 29 | 30 | }, 31 | }) -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/names.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Jamie 4 | Morgan 5 | Michael 6 | Taylor 7 | Tracy 8 | Jordan 9 | Hayden 10 | Juan 11 | Peyton 12 | Robin 13 | Alex 14 | Mohamed 15 | Peter 16 | Ali 17 | Mary 18 | Adam 19 | Oliver 20 | Liam 21 | Emma 22 | Jose 23 | Sofia 24 | Luis 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/record-element.d.ts: -------------------------------------------------------------------------------- 1 | declare global { 2 | interface Window { 3 | sharedAudioContext: any; 4 | sharedAudioStream: MediaStream; 5 | webkitAudioContext: any; 6 | } 7 | } 8 | export declare class AudioRecorderElement extends HTMLElement { 9 | private recorder; 10 | private audioPlayback; 11 | private recordedChunks; 12 | private shouldStop; 13 | private startStopResetButton; 14 | private textDisplay; 15 | recordedBlob: Blob; 16 | constructor(); 17 | get recording(): string; 18 | get isRecording(): boolean; 19 | startStopResetClicked(): void; 20 | record(): Promise; 21 | started(): void; 22 | stop(): void; 23 | stopped(): void; 24 | reset(): void; 25 | dataAvailable(e: any): void; 26 | } 27 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/names.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Jamie 4 | Morgan 5 | Michael 6 | Taylor 7 | Tracy 8 | Jordan 9 | Hayden 10 | Peyton 11 | Robin 12 | Alex 13 | Jamie 14 | Morgan 15 | Michael 16 | Taylor 17 | Tracy 18 | Jordan 19 | Hayden 20 | Peyton 21 | Robin 22 | Alex 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/names.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Jamie 4 | Morgan 5 | Michael 6 | Taylor 7 | Tracy 8 | Jordan 9 | Hayden 10 | Peyton 11 | Robin 12 | Alex 13 | Jamie 14 | Morgan 15 | Michael 16 | Taylor 17 | Tracy 18 | Jordan 19 | Hayden 20 | Peyton 21 | Robin 22 | Alex 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint-and-test: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: [3.6, 3.7, 3.8] 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r requirements.txt 23 | - name: Lint with flake8 24 | continue-on-error: true 25 | run: | 26 | pip install flake8 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 . 29 | - name: Test with pytest 30 | run: | 31 | pytest test -------------------------------------------------------------------------------- /gpsr_command_understanding/models/noop_tokenizer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from overrides import overrides 4 | 5 | from allennlp.data.tokenizers.token_class import Token 6 | from allennlp.data.tokenizers.tokenizer import Tokenizer 7 | 8 | 9 | @Tokenizer.register("no_op") 10 | class NoOpTokenizer(Tokenizer): 11 | """ 12 | Just returns the unsplit line. Helpful for reusing a datasetreader for just dumping lines out of a file 13 | """ 14 | 15 | def __init__(self, ) -> None: 16 | pass 17 | 18 | @overrides 19 | def tokenize(self, text: str) -> List[Token]: 20 | """ 21 | Does whatever processing is required to convert a string of text into a sequence of tokens. 22 | At a minimum, this uses a ``WordSplitter`` to split words into text. It may also do 23 | stemming or stopword removal, depending on the parameters given to the constructor. 24 | """ 25 | return [Token(text)] 26 | -------------------------------------------------------------------------------- /scripts/create_data_splits: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # The commands used to produce the distributed dataset. The "all" set wasn't distributed because it can 3 | # be made just by concatenating the other two sets. 4 | 5 | seed=0 6 | 7 | gen_cmd="python -m gpsr_command_understanding.data.make_dataset -f" 8 | 9 | # Just gen (Column 1) 10 | ${gen_cmd} --name gen -a --seed $seed 11 | ${gen_cmd} --name gen_logical -a -s .68 .12 .20 --use-logical-split --seed $seed 12 | 13 | # Just paraphrase (Column 3) 14 | ${gen_cmd} --name para --paraphrasings data/paraphrasings.txt --seed $seed 15 | ${gen_cmd} --name para_logical --paraphrasings data/paraphrasings.txt --use-logical-split --match-logical-split data/gen_logical --seed $seed 16 | 17 | # Gen + para (Column 4) 18 | ${gen_cmd} --name all --paraphrasings data/paraphrasings.txt -a --seed $seed 19 | ${gen_cmd} --name all_logical --paraphrasings data/paraphrasings.txt -a --use-logical-split --match-logical-split data/gen_logical --seed $seed -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/demo/script.js: -------------------------------------------------------------------------------- 1 | function predict() { 2 | var quotedFieldList = ['command']; 3 | var data = {}; 4 | quotedFieldList.forEach(function (fieldName) { 5 | data[fieldName] = document.getElementById("input-" + fieldName).value; 6 | }) 7 | 8 | var xhr = new XMLHttpRequest(); 9 | xhr.open('POST', '/predict'); 10 | xhr.setRequestHeader('Content-Type', 'application/json'); 11 | xhr.onload = function () { 12 | if (xhr.status == 200) { 13 | // If you want a more impressive visualization than just 14 | // outputting the raw JSON, change this part of the code. 15 | var asJson = JSON.parse(xhr.responseText); 16 | var htmlResults = "

" + asJson['instance']['predicted_tokens'].join(' ') + "

"; 17 | htmlResults += "
" + JSON.stringify(asJson, null, 2) + "
"; 18 | 19 | document.getElementById("output").innerHTML = htmlResults; 20 | } 21 | }; 22 | xhr.send(JSON.stringify(data)); 23 | } -------------------------------------------------------------------------------- /gpsr_command_understanding/demo/generate_utterance.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | 4 | from gpsr_command_understanding.generator.generator import Generator 5 | from gpsr_command_understanding.generator.grammar import tree_printer 6 | from gpsr_command_understanding.generator.loading_helpers import load, GRAMMAR_YEAR_TO_MODULE 7 | from gpsr_command_understanding.generator.tokens import ROOT_SYMBOL 8 | from lark.tree import Tree 9 | 10 | 11 | def main(): 12 | year = int(sys.argv[1]) 13 | gen = Generator(None, year) 14 | load(gen, "gpsr", GRAMMAR_YEAR_TO_MODULE[year]) 15 | 16 | utterance = gen.generate_random(ROOT_SYMBOL, random_generator=random.Random(11)) 17 | utterance, meta = gen.extract_metadata(utterance) 18 | grounded = gen.ground(utterance) 19 | print(tree_printer(grounded)) 20 | 21 | print(tree_printer(utterance)) 22 | for _, note in meta.items(): 23 | if note: 24 | print("\t" + tree_printer(Tree("expression", note))) 25 | 26 | 27 | if __name__ == "__main__": 28 | main() 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Nick Walker 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /test/fixtures/val.txt: -------------------------------------------------------------------------------- 1 | Tell me the name of the person at the 2 | ( say ( λ $1 e ( and ( name $1 ) ( at $1 ) ) ) ) 3 | 4 | Tell me the name of the person in the 5 | ( say ( λ $1 e ( and ( name $1 ) ( at $1 ) ) ) ) 6 | 7 | bring me the from the 8 | ( bring ( λ $1 e ( and ( $1 ) ( at $1 ) ) ) ) 9 | 10 | give me the from the 11 | ( bring ( λ $1 e ( and ( $1 ) ( at $1 ) ) ) ) 12 | 13 | find the in the 14 | ( find ( λ $1 e ( $1 ) ) ( λ $1 e ( and ( location $1 ) ( name ) ) ) ) 15 | 16 | locate the in the 17 | ( find ( λ $1 e ( $1 ) ) ( λ $1 e ( and ( location $1 ) ( name ) ) ) ) 18 | 19 | look for the in the 20 | ( find ( λ $1 e ( $1 ) ) ( λ $1 e ( and ( location $1 ) ( name ) ) ) ) -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/lambda_ebnf.lark: -------------------------------------------------------------------------------- 1 | ?start: expression | 2 | 3 | expression: lambda_abs 4 | | predicate 5 | | UNKNOWN 6 | lambda_abs: "(" ("lambda" | "λ") ARG ":"? TYPE "."? expression+ ")" 7 | predicate: "(" name [predicate_arg+] ")" 8 | !name: CNAME 9 | | placeholder 10 | 11 | ?predicate_arg: expression 12 | | placeholder 13 | | INT 14 | | ESCAPED_STRING 15 | | ARG 16 | | ANON 17 | 18 | ?placeholder: non_terminal 19 | | wildcard 20 | 21 | non_terminal: "$"CNAME 22 | 23 | UNKNOWN: "UNKNOWN" 24 | ANON: "<"WORD">" 25 | ARG: "$"INT 26 | TYPE: "e" 27 | COMMA: "," 28 | PERIOD: "." 29 | COMMENT: ("#"|";"|"//") /[^\n]/* 30 | 31 | %ignore COMMENT 32 | %import common.WORD 33 | %import common.ESCAPED_STRING 34 | %import common.INT 35 | %import common.LETTER 36 | %import common.WS 37 | %import common.CNAME 38 | %import .generator.wildcard 39 | %ignore WS -------------------------------------------------------------------------------- /test/fixtures/train.txt: -------------------------------------------------------------------------------- 1 | look for the in the 2 | (find (lambda $1 e (is_a $1 " ") (at $1 " "))) 3 | locate the in the 4 | (find (lambda $1 e (is_a $1 " ") (at $1 " "))) 5 | find the in the 6 | (find (lambda $1 e (is_a $1 " ") (at $1 " "))) 7 | Tell me how many there are on the 8 | (say (count (lambda $1 e (is_a $1 " ") (at $1 " ")))) 9 | look for a person in the and say 10 | (say " " (lambda $1 e (person $1) (at $1 " "))) 11 | look for a person in the and tell 12 | (say " " (lambda $1 e (person $1) (at $1 " "))) 13 | locate a person in the and say 14 | (say " " (lambda $1 e (person $1) (at $1 " "))) 15 | locate a person in the and tell 16 | (say " " (lambda $1 e (person $1) (at $1 " "))) 17 | find a person in the and say 18 | (say " " (lambda $1 e (person $1) (at $1 " "))) -------------------------------------------------------------------------------- /gpsr_command_understanding/demo/parse_utterance.py: -------------------------------------------------------------------------------- 1 | """ 2 | A REPL that demonstrates how to use the grammar-based parser to parse commands. 3 | """ 4 | import sys 5 | 6 | from gpsr_command_understanding.anonymizer import NumberingAnonymizer 7 | from gpsr_command_understanding.generator.loading_helpers import load_paired, GRAMMAR_YEAR_TO_MODULE 8 | from gpsr_command_understanding.parser import GrammarBasedParser, AnonymizingParser 9 | 10 | 11 | def main(): 12 | year = 2018 13 | if len(sys.argv) == 2: 14 | year = int(sys.argv[1]) 15 | generator = load_paired("gpsr", GRAMMAR_YEAR_TO_MODULE[year]) 16 | 17 | parser = GrammarBasedParser(generator.rules) 18 | anonymizer = NumberingAnonymizer.from_knowledge_base(generator.knowledge_base) 19 | parser = AnonymizingParser(parser, anonymizer) 20 | while True: 21 | print("Type in a command") 22 | utterance = input() 23 | parsed = parser(utterance, verbose=True) 24 | 25 | if parsed: 26 | print(parsed.pretty()) 27 | else: 28 | print("Could not parse utterance based on the command grammar") 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /experiments/elmo_seq2seq.jsonnet: -------------------------------------------------------------------------------- 1 | local common_parameters = import 'common_seq2seq.libjsonnet'; 2 | local glove_embedding = import 'glove_embedding.libjsonnet'; 3 | std.mergePatch(common_parameters, { 4 | dataset_reader: { 5 | source_token_indexers: { 6 | source_tokens: { 7 | type: 'single_id', 8 | namespace: 'source_tokens', 9 | }, 10 | elmo: { 11 | type: 'elmo_characters', 12 | }, 13 | }, 14 | }, 15 | model: { 16 | source_embedder: { 17 | token_embedders: { 18 | source_tokens: glove_embedding, 19 | elmo: { 20 | type: 'elmo_token_embedder', 21 | options_file: 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json', 22 | weight_file: 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5', 23 | do_layer_norm: false, 24 | dropout: 0.0, 25 | }, 26 | }, 27 | }, 28 | encoder: { 29 | type: 'lstm', 30 | input_size: 100 + 1024, 31 | }, 32 | }, 33 | trainer: { 34 | num_epochs: 150, 35 | patience: 30, 36 | }, 37 | }) -------------------------------------------------------------------------------- /test/test_semantics_grammar.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import unittest 4 | 5 | from gpsr_command_understanding.generator.paired_generator import LambdaParserWrapper 6 | 7 | GRAMMAR_DIR_2018 = "gpsr_command_understanding.resources.generator2018" 8 | GRAMMAR_DIR_2019 = "gpsr_command_understanding.resources.generator2019" 9 | FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 10 | 11 | 12 | class TestSemanticsGrammar(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.lambda_parser = LambdaParserWrapper() 16 | 17 | def test_parse_wildcard_expression(self): 18 | test = self.lambda_parser.parse("(test {object} {kobject?} {pron})") 19 | flattened = [x for x in test.iter_subtrees()] 20 | self.assertEqual(3, len(flattened)) 21 | self.assertEqual(4, len(test.children[0].children)) 22 | 23 | def test_parse_lambda_expression(self): 24 | test = self.lambda_parser.parse("(test (lambda $1 :e .(yo 1)))") 25 | flattened = [x.pretty() for x in test.iter_subtrees()] 26 | self.assertEqual(len(flattened), 8) 27 | 28 | def test_parse_escaped_string(self): 29 | test = self.lambda_parser.parse("(test \"hello there\" \"second arg with many tokens\")") 30 | flattened = [x.pretty() for x in test.iter_subtrees()] 31 | self.assertEqual(3, len(flattened)) 32 | -------------------------------------------------------------------------------- /experiments/common.libjsonnet: -------------------------------------------------------------------------------- 1 | local SEED = std.parseInt(std.extVar('SEED')); 2 | local TRAIN_PATH = std.extVar('TRAIN_PATH'); 3 | local VALIDATION_PATH = std.extVar('VALIDATION_PATH'); 4 | local TEST_PATH = std.extVar('TEST_PATH'); 5 | local EVALUATE_ON_TEST = std.parseInt(std.extVar('EVALUATE_ON_TEST')) == 1; 6 | // GPU to use. Setting this to -1 will mean that we'll use the CPU. 7 | local CUDA_DEVICE = std.parseInt(std.extVar('CUDA_DEVICE')); 8 | local HIDDEN_SIZE = 200; 9 | 10 | { 11 | numpy_seed: SEED, 12 | pytorch_seed: SEED, 13 | random_seed: SEED, 14 | train_data_path: TRAIN_PATH, 15 | validation_data_path: VALIDATION_PATH, 16 | test_data_path: if EVALUATE_ON_TEST then TEST_PATH else null, 17 | evaluate_on_test: EVALUATE_ON_TEST, 18 | data_loader: { 19 | batch_sampler: { 20 | type: 'bucket', 21 | padding_noise: 0.2, 22 | batch_size: 32, 23 | }, 24 | }, 25 | dataset_reader: { 26 | type: 'commands' 27 | }, 28 | trainer: { 29 | optimizer: { 30 | type: 'adamw', 31 | lr: 0.01, 32 | }, 33 | learning_rate_scheduler: { 34 | type: 'noam', 35 | warmup_steps: 1000, 36 | model_size: HIDDEN_SIZE, 37 | }, 38 | num_epochs: 150, 39 | patience: 30, 40 | cuda_device: CUDA_DEVICE, 41 | checkpointer: { 42 | num_serialized_models_to_keep: 0 43 | } 44 | }, 45 | 46 | 47 | } -------------------------------------------------------------------------------- /experiments/common_seq2seq.libjsonnet: -------------------------------------------------------------------------------- 1 | local common_parameters = import 'common.libjsonnet'; 2 | local HIDDEN_SIZE = 200; 3 | std.mergePatch(common_parameters, 4 | { 5 | dataset_reader: { 6 | target_tokenizer: { 7 | type: 'just_spaces', 8 | }, 9 | target_token_indexers: { 10 | tokens: { 11 | type: 'single_id', 12 | namespace: 'tokens', 13 | }, 14 | }, 15 | }, 16 | model: { 17 | type: 'seq2seq', 18 | encoder: { 19 | type: 'lstm', 20 | input_size: 100, 21 | hidden_size: HIDDEN_SIZE, 22 | num_layers: 1, 23 | bidirectional: true, 24 | }, 25 | attention: { 26 | type: 'bilinear', 27 | vector_dim: 400, 28 | matrix_dim: 400, 29 | }, 30 | target_embedding_dim: 100, 31 | beam_size: 5, 32 | max_decoding_steps: 100, 33 | use_bleu: false, 34 | }, 35 | data_loader: { 36 | batch_sampler: { 37 | type: 'bucket', 38 | padding_noise: 0.2, 39 | batch_size: 64, 40 | }, 41 | }, 42 | trainer: { 43 | optimizer: { 44 | type: 'adamw', 45 | lr: 0.08, 46 | }, 47 | learning_rate_scheduler: { 48 | type: 'noam', 49 | warmup_steps: 1000, 50 | model_size: HIDDEN_SIZE, 51 | }, 52 | num_epochs: 150, 53 | patience: 30, 54 | validation_metric: '+seq_acc', 55 | checkpointer: { 56 | num_serialized_models_to_keep: 0 57 | } 58 | }, 59 | }) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import io 3 | import os 4 | 5 | import gpsr_command_understanding 6 | 7 | here = os.path.abspath(os.path.dirname(__file__)) 8 | 9 | 10 | def read(*filenames, **kwargs): 11 | encoding = kwargs.get('encoding', 'utf-8') 12 | sep = kwargs.get('sep', '\n') 13 | buf = [] 14 | for filename in filenames: 15 | with io.open(filename, encoding=encoding) as f: 16 | buf.append(f.read()) 17 | return sep.join(buf) 18 | 19 | 20 | long_description = read('README.md') 21 | 22 | 23 | setup( 24 | name='gpsr-command-understanding', 25 | version=gpsr_command_understanding.__version__, 26 | url='http://github.com/nickswalker/gpsr-command-understanding', 27 | license='MIT', 28 | author='Nick Walker', 29 | install_requires=['allennlp>=v1.0.0rc5', 30 | 'lark-parser', 31 | ], 32 | author_email='nswalker@cs.washington.edu', 33 | description='Automated REST APIs for existing database-driven systems', 34 | long_description=long_description, 35 | packages=find_packages(), 36 | include_package_data=True, 37 | platforms='any', 38 | classifiers = [ 39 | 'Programming Language :: Python :: 3', 40 | 'Development Status :: 4 - Beta', 41 | 'Natural Language :: English', 42 | 'Intended Audience :: Developers', 43 | 'Operating System :: OS Independent', 44 | 'Topic :: Software Development :: Libraries :: Python Modules', 45 | ] 46 | ) -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/upload.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | GPSR Semantic Parser 5 | 6 | 7 | 8 | 9 |
10 |
11 |
12 |
13 |

GPSR Semantic Parser

14 |
15 | 16 |
17 | 18 | 19 |
20 | 21 |
22 | 23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |

Run model to view results

34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /gpsr_command_understanding/models/command_predictor.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from allennlp.common.util import JsonDict, sanitize 4 | from allennlp.data import Instance 5 | from allennlp.predictors.predictor import Predictor 6 | 7 | 8 | @Predictor.register('command_parser') 9 | class CommandParser(Predictor): 10 | """Predictor wrapper for the CommandParser""" 11 | 12 | def predict_text(self, text: str) -> JsonDict: 13 | return self.predict_instance(self._dataset_reader.text_to_instance(source_string=text)) 14 | 15 | def predict_instance(self, instance: Instance) -> JsonDict: 16 | self._model.vocab.extend_from_instances([instance]) 17 | # Pretrained transformer embedders don't have an extend method, so this won't do anything to them 18 | self._model.extend_embedder_vocab({ 19 | '_source_embedder.token_embedder_source_tokens': 'https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz'}) 20 | outputs = self._model.forward_on_instance(instance) 21 | out_dict = sanitize(outputs) 22 | digest = " ".join(out_dict["predicted_tokens"]) 23 | out_dict["digest"] = digest 24 | return out_dict 25 | 26 | def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: 27 | outputs = self._model.forward_on_instances(instances) 28 | out_dict = sanitize(outputs) 29 | for i, pred in enumerate(out_dict): 30 | digest = " ".join(out_dict[i]["predicted_tokens"]) 31 | out_dict[i]["digest"] = digest 32 | return out_dict 33 | 34 | def _json_to_instance(self, json_dict: JsonDict) -> Instance: 35 | command = json_dict['command'] 36 | return self._dataset_reader.text_to_instance(source_string=command) 37 | -------------------------------------------------------------------------------- /run_all_experiments: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Pass a seed as the sole argument 4 | # This'll be passed through to the configs, where it's used 5 | # in data iteration and in Pytorch internals 6 | seed=0 7 | if [[ $# == 1 ]]; then 8 | seed=$1 9 | fi 10 | 11 | # Catch misspecified out directory early 12 | if [[ -n "${OUT_DIR}" && -d "${OUT_DIR}" ]]; then 13 | echo "${OUT_DIR} already exists" 14 | exit 1 15 | fi 16 | mkdir -p "${OUT_DIR}" 17 | 18 | function run_with_data { 19 | local data="$1" 20 | local test_data="$2" 21 | if [[ -z "${test_data}" ]]; then 22 | # Default to the folder's test data 23 | test_data="data/${data}/test.txt" 24 | fi 25 | local results_dir="experiments/results_${data}" 26 | bash -c "sleep 10; tensorboard serve --logdir ${results_dir} --bind_all --port 6008" & 27 | ./scripts/train_all_models "${data}" experiments -t "data/${data}/train.txt" -v "data/${data}/val.txt" 28 | ./scripts/test_all_models "${results_dir}" "${test_data}" 29 | pkill tensorboard 30 | # Model archives can take up a lot of space. Use OUT_DIR to move them elsewhere 31 | if [[ -n "${OUT_DIR}" ]]; then 32 | set -x; 33 | mv -b --suffix=.bak "${results_dir}" "${OUT_DIR}" 34 | set -x; 35 | fi 36 | } 37 | 38 | export SEED=$seed 39 | # Column 1 40 | run_with_data gen 41 | run_with_data gen_logical 42 | 43 | # Column 2 (train on gen data, test on the standard paraphrase test set) 44 | ./scripts/test_all_models "experiments/results_gen" "data/para/test.txt" 45 | ./scripts/test_all_models "experiments/results_gen_logical" "data/para_logical/test.txt" 46 | 47 | # Column 3 48 | run_with_data para 49 | run_with_data para_logical 50 | 51 | # Column 4 (both gen and real data, but test on para only!) 52 | run_with_data all "data/para/test.txt" 53 | run_with_data all_logical "data/para_logical/test.txt" 54 | 55 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/paged-navigation.js: -------------------------------------------------------------------------------- 1 | var current = 1; 2 | var moved = []; 3 | var movedto = []; 4 | let numPages = 0; 5 | let pagesContainer = null; 6 | let pageNumber = null; 7 | let preNextValidator = null; 8 | function setUpPages(container, nextValidator) { 9 | pagesContainer = container; 10 | preNextValidator = nextValidator; 11 | pageNumber = document.getElementById("page-number"); 12 | let pages = container.querySelectorAll(".page"); 13 | for (let i = 0; i < pages.length; i++) { 14 | pages[i].setAttribute("id", "p" + i.toString()); 15 | pages[i].style.display = "none"; 16 | } 17 | document.getElementById("p0").style.display = "block"; 18 | current = 0; 19 | numPages = pages.length; 20 | } 21 | function effectivePage(pagenum) { 22 | let index = moved.indexOf(pagenum); 23 | if (index === -1) { 24 | return pagenum; 25 | } 26 | else { 27 | return movedto[index]; 28 | } 29 | } 30 | function swap(vanish, appear) { 31 | pagesContainer.querySelector("#p" + vanish).style.display = "none"; 32 | document.querySelector("#p" + appear).style.display = ""; 33 | } 34 | function next() { 35 | var _a; 36 | let validationResult = (_a = preNextValidator(pagesContainer.querySelector("#p" + current))) !== null && _a !== void 0 ? _a : ""; 37 | if (validationResult !== "") { 38 | alert(validationResult); 39 | return; 40 | } 41 | if (current === numPages - 1) { 42 | return; 43 | } 44 | current++; 45 | pageNumber.innerText = current.toString(); 46 | swap(current - 1, current); 47 | } 48 | function back() { 49 | if (current === 0) { 50 | return; 51 | } 52 | current--; 53 | pageNumber.innerText = current.toString(); 54 | swap(current + 1, current); 55 | } 56 | //# sourceMappingURL=paged-navigation.js.map -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_1_grammar.txt: -------------------------------------------------------------------------------- 1 | ; grammar name Category I 2 | ; grammar tier Easy 3 | ; import common.txt 4 | 5 | 6 | $Main = $deliver 7 | $Main = $fndppl 8 | $Main = $fndobj 9 | 10 | ############################################################################## 11 | # 12 | # Production Rules required by common.txt 13 | # 14 | ############################################################################## 15 | ; Define an object type 16 | $object = {kobject} 17 | 18 | ; Rule for finding a specific (named) person 19 | $findp = $vbfind ( {name} | a person | someone ) 20 | 21 | ; A named or described person in the given place 22 | $whowhere = {name} at the {beacon 1} 23 | 24 | ############################################################################## 25 | # 26 | # Manipulation 27 | # 28 | ############################################################################## 29 | $deliver = $vbbring (me | to $whowhere) the {kobject} from the {placement} 30 | $deliver = $takefrom and ($delivme | $delivat) 31 | $deliver = $takefrom and $place 32 | 33 | 34 | ############################################################################## 35 | # 36 | # Find people 37 | # 38 | ############################################################################## 39 | $fndppl = Tell me the name of the person at the {beacon} 40 | $fndppl = Tell me the name of the person in the {room} 41 | 42 | ############################################################################## 43 | # 44 | # Find objects 45 | # 46 | ############################################################################## 47 | $fndobj = $vbfind the {kobject?} in the {room} 48 | 49 | 50 | 51 | ############################################################################## 52 | # 53 | # Rules 54 | # 55 | ############################################################################## 56 | 57 | 58 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/paged-navigation.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"paged-navigation.js","sourceRoot":"./","sources":["paged-navigation.ts"],"names":[],"mappings":"AAAA,IAAI,OAAO,GAAG,CAAC,CAAC;AAChB,IAAI,KAAK,GAAa,EAAE,CAAC;AACzB,IAAI,OAAO,GAAa,EAAE,CAAC;AAC3B,IAAI,QAAQ,GAAG,CAAC,CAAC;AACjB,IAAI,cAAc,GAAgB,IAAI,CAAC;AACvC,IAAI,UAAU,GAAgB,IAAI,CAAC;AAEnC,IAAI,gBAAgB,GAAmC,IAAI,CAAA;AAE3D,SAAS,UAAU,CAAC,SAAsB,EAAE,aAA6C;IACrF,cAAc,GAAG,SAAS,CAAA;IAC1B,gBAAgB,GAAG,aAAa,CAAA;IAChC,UAAU,GAAG,QAAQ,CAAC,cAAc,CAAC,aAAa,CAAC,CAAA;IACnD,IAAI,KAAK,GAAQ,SAAS,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAA;IAGpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAEnC,KAAK,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,GAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAA;QAC7C,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,GAAG,MAAM,CAAA;KAClC;IAED,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC;IACtD,OAAO,GAAG,CAAC,CAAC;IACZ,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAA;AAC3B,CAAC;AAGD,SAAS,aAAa,CAAC,OAAe;IAClC,IAAI,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE;QACd,OAAO,OAAO,CAAC;KAClB;SAAM;QACH,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC;KACzB;AACL,CAAC;AAGD,SAAS,IAAI,CAAC,MAAc,EAAE,MAAc;IAC1B,cAAc,CAAC,aAAa,CAAC,IAAI,GAAC,MAAM,CAAE,CAAC,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC;IAClE,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAC,MAAM,CAAE,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC;AAC1E,CAAC;AAGD,SAAS,IAAI;;IACT,IAAI,gBAAgB,SAAG,gBAAgB,CAAC,cAAc,CAAC,aAAa,CAAC,IAAI,GAAG,OAAO,CAAC,CAAC,mCAAI,EAAE,CAAA;IAC3F,IAAI,gBAAgB,KAAK,EAAE,EAAE;QACzB,KAAK,CAAC,gBAAgB,CAAC,CAAA;QACvB,OAAO;KACV;IACD,IAAI,OAAO,KAAK,QAAQ,GAAG,CAAC,EAAE;QAC1B,OAAO;KACV;IACD,OAAO,EAAE,CAAC;IACV,UAAU,CAAC,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC;AAC/B,CAAC;AAED,SAAS,IAAI;IACT,IAAI,OAAO,KAAK,CAAC,EAAE;QACf,OAAO;KACV;IACD,OAAO,EAAE,CAAC;IACV,UAAU,CAAC,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC;AAC/B,CAAC"} -------------------------------------------------------------------------------- /test/fixtures/seq2seq.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "commands", 4 | "source_token_indexers": { 5 | "source_tokens": { 6 | "type": "single_id", 7 | "namespace": "source_tokens" 8 | } 9 | }, 10 | "target_token_indexers": { 11 | "tokens": { 12 | "type": "single_id", 13 | "namespace": "tokens" 14 | } 15 | }, 16 | "target_tokenizer": { 17 | "type": "just_spaces" 18 | } 19 | }, 20 | "train_data_path": "test/fixtures/train.txt", 21 | "validation_data_path": "test/fixtures/val.txt", 22 | "model": { 23 | "type": "seq2seq", 24 | "source_embedder": { 25 | "token_embedders": { 26 | "source_tokens": { 27 | "type": "embedding", 28 | "vocab_namespace": "source_tokens", 29 | "embedding_dim": 100, 30 | "trainable": true 31 | } 32 | } 33 | }, 34 | "encoder": { 35 | "type": "lstm", 36 | "input_size": 100, 37 | "hidden_size": 200, 38 | "num_layers": 1, 39 | "bidirectional": true 40 | }, 41 | "attention": { 42 | "type": "bilinear", 43 | "vector_dim": 400, 44 | "matrix_dim": 400 45 | }, 46 | "target_embedding_dim": 100, 47 | "beam_size": 5, 48 | "max_decoding_steps": 100, 49 | "use_bleu": true 50 | }, 51 | "data_loader": { 52 | "batch_sampler": { 53 | "type": "bucket", 54 | // Padding noise will break test. Relies on batches coming in deterministic order 55 | "padding_noise": 0.0, 56 | "batch_size": 32 57 | } 58 | }, 59 | "trainer": { 60 | "optimizer": { 61 | "type": "adam", 62 | "lr": 0.01 63 | }, 64 | "learning_rate_scheduler": { 65 | "type": "noam", 66 | "warmup_steps": 1000, 67 | "model_size": 200 68 | }, 69 | "num_epochs": 15, 70 | "patience": 5, 71 | "validation_metric": "+seq_acc" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/paged-navigation.ts: -------------------------------------------------------------------------------- 1 | var current = 1; //the current page 2 | var moved: number[] = []; //pages which have been moved by the randomization 3 | var movedto: number[] = []; 4 | let numPages = 0; 5 | let pagesContainer: HTMLElement = null; 6 | let pageNumber: HTMLElement = null; 7 | 8 | let preNextValidator: (arg0: HTMLElement) => boolean = null 9 | 10 | function setUpPages(container: HTMLElement, nextValidator: (arg0: HTMLElement) => boolean){ 11 | pagesContainer = container 12 | preNextValidator = nextValidator 13 | pageNumber = document.getElementById("page-number") 14 | let pages: any = container.querySelectorAll(".page") 15 | 16 | // Give pages index IDs 17 | for (let i = 0; i < pages.length; i++) { 18 | // IDs can't start with a number. Some things break if they do 19 | pages[i].setAttribute("id", "p"+i.toString()) 20 | pages[i].style.display = "none" 21 | } 22 | 23 | document.getElementById("p0").style.display = "block"; 24 | current = 0; 25 | numPages = pages.length 26 | } 27 | 28 | //takes randomization into account 29 | function effectivePage(pagenum: number) { 30 | let index = moved.indexOf(pagenum); 31 | if (index === -1) { 32 | return pagenum; 33 | } else { 34 | return movedto[index]; 35 | } 36 | } 37 | 38 | //make one vanish and the other appear 39 | function swap(vanish: number, appear: number) { 40 | (pagesContainer.querySelector("#p"+vanish)).style.display = "none"; 41 | (document.querySelector("#p"+appear)).style.display = ""; 42 | } 43 | 44 | //go to the next page 45 | function next() { 46 | let validationResult = preNextValidator(pagesContainer.querySelector("#p" + current)) ?? "" 47 | if (validationResult !== "") { 48 | alert(validationResult) 49 | return; 50 | } 51 | if (current === numPages - 1) { 52 | return; 53 | } 54 | current++; 55 | pageNumber.innerText = current.toString(); 56 | swap(current - 1, current); 57 | } 58 | 59 | function back() { 60 | if (current === 0) { 61 | return; 62 | } 63 | current--; 64 | pageNumber.innerText = current.toString(); 65 | swap(current + 1, current); 66 | } -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/questions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Who invented the C programming language? 5 | Ken Thompson and Dennis Ritchie. 6 | 7 | 8 | When was the C programming language invented? 9 | C was developed after B in 1972 at Bell Labs 10 | 11 | 12 | When was the B programming language invented? 13 | B was developed circa 1969 at Bell Labs 14 | 15 | 16 | Where does the term computer bug come from? 17 | From a moth trapped in a relay 18 | 19 | 20 | Who invented the first compiler 21 | Grace Brewster Murray Hopper invented it 22 | 23 | 24 | Which robot is used in the Open Platform League 25 | There is no standard defined for OPL 26 | 27 | 28 | Which robot is used in the Domestic Standard Platform League 29 | The Toyota Human Support Robot 30 | 31 | 32 | Which robot is used in the Social Standard Platform League 33 | The SoftBank Robotics Pepper 34 | 35 | 36 | What's the name of your team? 37 | ... 38 | 39 | 40 | What time is it? 41 | ... 42 | 43 | 44 | What day is today? 45 | ... 46 | 47 | 48 | Do you have dreams? 49 | I dream of Electric Sheep. 50 | 51 | 52 | In which city will next year's RoboCup be hosted? 53 | It hasn't been announced yet. 54 | 55 | 56 | What is the origin of the name Canada? 57 | The name Canada comes from the Iroquois word Kanata, meaning village or settlement. 58 | 59 | 60 | What is the capital of Canada? 61 | The capital of Canada is Ottawa. 62 | 63 | 64 | What is the national anthem of Canada? 65 | O Canada. 66 | 67 | 68 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/questions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Who invented the C programming language? 5 | Ken Thompson and Dennis Ritchie. 6 | 7 | 8 | When was the C programming language invented? 9 | C was developed after B in 1972 at Bell Labs 10 | 11 | 12 | When was the B programming language invented? 13 | B was developed circa 1969 at Bell Labs 14 | 15 | 16 | Where does the term computer bug come from? 17 | From a moth trapped in a relay 18 | 19 | 20 | Who invented the first compiler 21 | Grace Brewster Murray Hopper invented it 22 | 23 | 24 | Which robot is used in the Open Platform League 25 | There is no standard defined for OPL 26 | 27 | 28 | Which robot is used in the Domestic Standard Platform League 29 | The Toyota Human Support Robot 30 | 31 | 32 | Which robot is used in the Social Standard Platform League 33 | The SoftBank Robotics Pepper 34 | 35 | 36 | What's the name of your team? 37 | ... 38 | 39 | 40 | What time is it? 41 | ... 42 | 43 | 44 | What day is today? 45 | ... 46 | 47 | 48 | Do you have dreams? 49 | I dream of Electric Sheep. 50 | 51 | 52 | In which city will next year's RoboCup be hosted? 53 | It hasn't been announced yet. 54 | 55 | 56 | What is the origin of the name Canada? 57 | The name Canada comes from the Iroquois word Kanata, meaning village or settlement. 58 | 59 | 60 | What is the capital of Canada? 61 | The capital of Canada is Ottawa. 62 | 63 | 64 | What is the national anthem of Canada? 65 | O Canada. 66 | 67 | 68 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_1_slot.txt: -------------------------------------------------------------------------------- 1 | #; grammar name Category I 2 | #; grammar tier Easy 3 | #; import common.txt 4 | 5 | 6 | #$Main = $deliver 7 | #$Main = $fndppl 8 | #$Main = $fndobj 9 | 10 | ############################################################################## 11 | # 12 | # Production Rules required by common.txt 13 | # 14 | ############################################################################## 15 | #; Define an object type 16 | #$object = {kobject} 17 | {kobject} = OBJECT 18 | 19 | #; Rule for finding a specific (named) person 20 | #$findp = $vbfind ( {name} | a person | someone ) 21 | $vbfind ( {name} | a person | someone ) = $vbfind ( {name} | O B-PERSON | B-PERSON ) 22 | 23 | #; A named or described person in the given place 24 | #$whowhere = {name} at the {beacon 1} 25 | {name} at the {beacon 1} = {name} O O {beacon 1} 26 | 27 | ############################################################################## 28 | # 29 | # Manipulation 30 | # 31 | ############################################################################## 32 | #$deliver = $vbbring (me | to $whowhere) the {kobject} from the {placement} 33 | $vbbring (me | to $whowhere) the {kobject} from the {placement} = $vbbring (TO[B-OPERATOR] | O TO[$whowhere]) O {kobject} O O FROM[{placement}] 34 | #$deliver = $takefrom and ($delivme | $delivat) 35 | #$deliver = $takefrom and $place 36 | 37 | 38 | ############################################################################## 39 | # 40 | # Find people 41 | # 42 | ############################################################################## 43 | #$fndppl = Tell me the name of the person at the {beacon} 44 | Tell me the name of the person at the {beacon} = {get_info} O TO[B-OPERATOR] O B-INFO O O FROM[B-PERSON] O O FROM[{beacon}] 45 | #$fndppl = Tell me the name of the person in the {room} 46 | Tell me the name of the person in the {room} = {get_info} O TO[B-OPERATOR] O B-INFO O O FROM[B-PERSON] O O FROM[{room}] 47 | 48 | ############################################################################## 49 | # 50 | # Find objects 51 | # 52 | ############################################################################## 53 | #$fndobj = $vbfind the {kobject?} in the {room} 54 | $vbfind the {kobject?} in the {room} = {find} $vbfind O {kobject?} O O FROM[{room}] 55 | {kobject?} = OBJECT 56 | 57 | 58 | ############################################################################## 59 | # 60 | # Rules 61 | # 62 | ############################################################################## 63 | 64 | 65 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_2_grammar.txt: -------------------------------------------------------------------------------- 1 | ; grammar name Category II 2 | ; grammar tier Moderate 3 | ; import common.txt 4 | 5 | $Main = $deliver 6 | $Main = $fndppl 7 | $Main = $fndobj 8 | $Main = $guide 9 | $Main = $follow 10 | 11 | ############################################################################## 12 | # 13 | # Production Rules required by common.txt 14 | # 15 | ############################################################################## 16 | ; Define an object type 17 | $object = {aobject} 18 | 19 | ; Rule for finding a specific (named) person 20 | $findp = $vbfind a $pgenders 21 | $findp = $vbfind a person {gesture} 22 | $findp = $vbfind a person $pose 23 | 24 | ; A named or described person in the given place 25 | $whowhere = the person {gesture} in the {room} 26 | 27 | 28 | 29 | ############################################################################## 30 | # 31 | # Manipulation 32 | # 33 | ############################################################################## 34 | $deliver = $vbbtake the {aobject} from the {room} to the {placement 2} 35 | 36 | $deliver = $vbbring me the {aobject} from the {placement} 37 | $deliver = $takefrom and ($delivme | $delivat) 38 | $deliver = $takefrom and $place 39 | 40 | ############################################################################## 41 | # 42 | # Find people 43 | # 44 | ############################################################################## 45 | $fndppl = Tell me the (name | gender | pose) of the person at the {beacon} 46 | $fndppl = Tell me the (name | gender | pose) of the person in the {room} 47 | $fndppl = Tell me how many people in the {room} are ($pgenderp | $pose) 48 | 49 | ############################################################################## 50 | # 51 | # Find objects 52 | # 53 | ############################################################################## 54 | $fndobj = $vbfind the {aobject?} in the {room} 55 | $fndobj = Tell me how many {category} there are on the {placement} 56 | $fndobj = Tell me what's the $oprop object on the {placement} 57 | $fndobj = Tell me what's the $oprop {category} on the {placement} 58 | $fndobj = $vbfind the {category} in the {room} 59 | 60 | 61 | ############################################################################## 62 | # 63 | # Rules 64 | # 65 | ############################################################################## 66 | ; Find people 67 | $pgenders = man | woman | boy | girl | male person | female person 68 | $pgenderp = men | women | boys | girls | male | female 69 | $pose = sitting | standing | lying down 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPSR Command Understanding ![Build Status](https://github.com/nickswalker/gpsr-command-understanding/workflows/CI/badge.svg) 2 | 3 | A semantic parser for commands from the [RoboCup@Home](http://www.robocupathome.org/) _General Purpose Service Robot_ task. 4 | 5 | * [X] Utterance to λ-calculus representation parser 6 | * [X] Lexer/parser for loading the released command generation CFG 7 | * [X] Tools for generating commands along with a λ-calculus representation 8 | * [X] Crowd-sourcing interface for collecting paraphrases 9 | 10 | If you use this code or data, consider citing our paper [Neural Semantic Parsing for Command Understanding in General-Purpose Service Robots](https://arxiv.org/abs/1907.01115). The data collected for this paper is [available separately](https://github.com/nickswalker/gpsr-commands-dataset). 11 | 12 | ## Usage 13 | 14 | Set up a virtual environment using at least Python 3.6: 15 | 16 | python3.7 -m virtualenv venv 17 | source venv/bin/activate 18 | pip install -r requirements.txt 19 | 20 | ### Generation 21 | 22 | The latest grammar and knowledgebase files (pulled from [the generator](https://github.com/kyordhel/GPSRCmdGen)) are provided in the resources directory. The grammar [format specification](https://github.com/kyordhel/GPSRCmdGen/wiki/Grammar-Format-Specification) will clarify how to interpret the files. 23 | 24 | To produce the dataset, see `data/make_dataset.py`. 25 | 26 | ### Training 27 | 28 | We base our training on [previous work](https://github.com/jbkjr/allennlp_sempar) using [AllenNLP](https://allennlp.org) for seq2seq semantic parser training. All of our experiments are 29 | declaratively specified in the `experiments` directory. 30 | 31 | You can run them with 32 | 33 | allennlp train \ 34 | experiments/seq2seq.json \ 35 | -s results/seq2seq \ 36 | --include-package gpsr_command_understanding 37 | 38 | You can monitor training with Tensorboard, just point it at the log directory. 39 | 40 | The `train_all_models` script will train every config back to back. 41 | 42 | ./scripts/train_all_models gen_demo experiments -t data/gen/train.txt -v data/gen/val.txt 43 | 44 | ### Testing 45 | 46 | To see a model's output on a data file, use the `predict command` 47 | 48 | allennlp predict --archive-path results/ --include-package gpsr_command_understanding 49 | 50 | You can poke at a trained model through the browser using AllenNLP as well 51 | 52 | python -m gpsr_command_understanding.demo.logging_server \ 53 | --archive-path results/seq2seq/model.tar.gz \ 54 | --predictor command_parser\ 55 | --include-package gpsr_command_understanding -------------------------------------------------------------------------------- /gpsr_command_understanding/demo/sem_parse_utterance.py: -------------------------------------------------------------------------------- 1 | """ 2 | A REPL that demonstrates how to use the generator to "parse" commands. 3 | """ 4 | from nltk.metrics.distance import edit_distance 5 | import sys 6 | import warnings 7 | from gpsr_command_understanding.anonymizer import NumberingAnonymizer 8 | from gpsr_command_understanding.generator.grammar import tree_printer 9 | from gpsr_command_understanding.generator.knowledge import AnonymizedKnowledgebase 10 | from gpsr_command_understanding.generator.loading_helpers import load_paired, GRAMMAR_YEAR_TO_MODULE 11 | from gpsr_command_understanding.generator.paired_generator import pairs_without_placeholders 12 | from gpsr_command_understanding.parser import AnonymizingParser, KNearestNeighborParser 13 | 14 | 15 | def main(): 16 | year = 2018 17 | if len(sys.argv) == 2: 18 | year = int(sys.argv[1]) 19 | generator = load_paired("gpsr", GRAMMAR_YEAR_TO_MODULE[year]) 20 | old_kb = generator.knowledge_base 21 | # Make the generator produce sentences with "anonymous" objects/locations. It wouldn't be feasible to 22 | # enumerate the grammar like this if we used more than few entities anyways 23 | generator.knowledge_base = AnonymizedKnowledgebase() 24 | # Get all anonymous pairs from the grammar. This involves traversing all of the user provided 25 | # annotations to the grammars. There are some known edgecases which trip warnings, so we'll ignore them. 26 | with warnings.catch_warnings(record=True) as w: 27 | all_pairs = pairs_without_placeholders(generator) 28 | print("Caught {} warnings will enumerating grammar".format(len(w))) 29 | ground_pairs = [generator.ground(pair, ignore_types=True) for pair in all_pairs.items()] 30 | baked_pairs = [(tree_printer(key), tree_printer(value)) for key, value in ground_pairs] 31 | # To parse, we'll look for close neighbors in the space of all generated sentences 32 | anon_edit_distance_parser = KNearestNeighborParser(baked_pairs, k=1, distance_threshold=5, metric=edit_distance) 33 | # Use the regular knowledgebase to strip entity names out of the input text 34 | anonymizer = NumberingAnonymizer.from_knowledge_base(old_kb) 35 | parser = AnonymizingParser(anon_edit_distance_parser, anonymizer) 36 | 37 | while True: 38 | print("Type in a command") 39 | utterance = input() 40 | # The AnonymyzingParser will print out the anonymized command, then the KNN parser will print out the 41 | # nearest neighbors 42 | parsed = parser(utterance, verbose=True) 43 | 44 | if parsed: 45 | print(parsed) 46 | else: 47 | print("Could not parse utterance based on the command grammar") 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator.lark: -------------------------------------------------------------------------------- 1 | ?rule_start: rule | 2 | ?expression_start: top_expression | 3 | rule: non_terminal "=" top_expression 4 | top_expression: expression+ | bare_choice 5 | ?expression: (non_terminal|wildcard |COMMA|PERIOD|choice|WORD)+ | "(" (non_terminal|wildcard|COMMA|PERIOD|choice|WORD)+ ")" 6 | choice: "(" expression+ ("|" expression+ )+ ")" 7 | bare_choice: expression+ ("|" expression+ )+ 8 | 9 | non_terminal: "$"CNAME 10 | wildcard: _simple_wildcard | _rich_wildcard 11 | _simple_wildcard: "{" _pronoun_wildcard "}" 12 | _rich_wildcard: "{" _card_type INT? condition? meta? "}" 13 | condition: "where" (simple_constraint ( "and"? simple_constraint)*) 14 | simple_constraint: WORD ("=" | "!=") (ESCAPED_STRING | BOOL) 15 | meta: "meta:" (WORD | wildcard | non_terminal | choice | COMMA | LPAREN | RPAREN | COLON | DASH | PERIOD | QUOTE | INT)+ 16 | 17 | _card_type: (location_wildcard_super | name_wildcard | object_wildcard_super | void_wildcard | question_wildcard | gesture_wildcard | category_wildcard | whattosay_wildcard) 18 | 19 | ?location_wildcard_super: loc_beacon_wildcard | loc_placement_wildcard | loc_room_wildcard | location_wildcard 20 | location_wildcard: "location"QUESTION_MARK? 21 | loc_placement_wildcard: "location"? "placement"QUESTION_MARK? 22 | loc_beacon_wildcard: "location"? "beacon"QUESTION_MARK? 23 | loc_room_wildcard: "location"? "room"QUESTION_MARK? 24 | 25 | ?object_wildcard_super: object_alike_wildcard | object_known_wildcard | object_special_wildcard | object_wildcard 26 | object_wildcard: "object"QUESTION_MARK? 27 | object_alike_wildcard: ("object alike" | "aobject")QUESTION_MARK? 28 | object_known_wildcard: ("object known" | "kobject")QUESTION_MARK? 29 | object_special_wildcard: ("object special" | "sobject")QUESTION_MARK? 30 | 31 | name_wildcard: "name" ( "female" | "male")? 32 | question_wildcard: "question" 33 | _pronoun_wildcard: pronoun_objective | pronoun_subjective | pronoun_possessive_absolute | pronoun_possessive_adjective 34 | pronoun_objective: "pron" "obj"? 35 | pronoun_subjective: "pron" "sub" 36 | pronoun_possessive_absolute: "pron" ("pab" | "posabs") 37 | pronoun_possessive_adjective: "pron" ("pos" | "paj" | "posadj") 38 | void_wildcard: "void" 39 | gesture_wildcard: "gesture" 40 | category_wildcard: "category"QUESTION_MARK? 41 | whattosay_wildcard: "whattosay" 42 | 43 | COMMA: "," 44 | COLON: ":" 45 | LPAREN: "(" 46 | RPAREN: ")" 47 | PERIOD: "." 48 | DASH: "-" 49 | QUOTE: "\"" 50 | QUESTION_MARK: "?" 51 | WORD: (LETTER | "'" | "-")+ 52 | COMMENT: ("#"|";"|"//") /[^\n]/* 53 | BOOL: ("true" | "false") 54 | 55 | %ignore COMMENT 56 | %import common.ESCAPED_STRING 57 | %import common.INT 58 | %import common.LETTER 59 | %import common.WS 60 | %import common.CNAME 61 | %ignore WS -------------------------------------------------------------------------------- /gpsr_command_understanding/models/metrics.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import lark 4 | from overrides import overrides 5 | 6 | from allennlp.training.metrics import Metric 7 | 8 | 9 | @Metric.register("token_sequence_accuracy") 10 | class TokenSequenceAccuracy(Metric): 11 | """ 12 | Simple sequence accuracy based on tokens, as opposed to tensors. 13 | 14 | """ 15 | 16 | def __init__(self) -> None: 17 | self._correct_counts = 0. 18 | self._total_counts = 0. 19 | 20 | @overrides 21 | def reset(self) -> None: 22 | self._correct_counts = 0. 23 | self._total_counts = 0. 24 | 25 | @overrides 26 | def __call__(self, 27 | predictions: List[List[str]], 28 | gold_targets: List[List[str]]) -> None: 29 | self._total_counts += len(predictions) 30 | for predicted_tokens, gold_tokens in zip(predictions, gold_targets): 31 | # If the whole sequence matches exactly, then we consider 32 | # it a hit. 33 | if predicted_tokens == gold_tokens: 34 | self._correct_counts += 1 35 | 36 | @overrides 37 | def get_metric(self, reset: bool = False) -> Dict[str, float]: 38 | if self._total_counts == 0: 39 | accuracy = 0. 40 | else: 41 | accuracy = self._correct_counts / self._total_counts 42 | 43 | if reset: 44 | self.reset() 45 | 46 | return {"seq_acc": accuracy} 47 | 48 | 49 | @Metric.register("valid_parse") 50 | class ParseValidity(Metric): 51 | """ 52 | Simple sequence accuracy based on tokens, as opposed to tensors. 53 | 54 | """ 55 | 56 | def __init__(self, parser) -> None: 57 | self._correct_counts = 0. 58 | self._total_counts = 0. 59 | self._parser = parser 60 | 61 | @overrides 62 | def reset(self) -> None: 63 | self._correct_counts = 0. 64 | self._total_counts = 0. 65 | 66 | @overrides 67 | def __call__(self, 68 | predictions: List[List[str]], 69 | gold_targets: List[List[str]]) -> None: 70 | self._total_counts += len(predictions) 71 | for predicted_tokens in predictions: 72 | as_str = " ".join(predicted_tokens) 73 | try: 74 | self._parser.parse(as_str) 75 | self._correct_counts += 1 76 | except lark.exceptions.LarkError: 77 | continue 78 | 79 | @overrides 80 | def get_metric(self, reset: bool = False) -> Dict[str, float]: 81 | if self._total_counts == 0: 82 | accuracy = 0. 83 | else: 84 | accuracy = self._correct_counts / self._total_counts 85 | 86 | if reset: 87 | self.reset() 88 | 89 | return {"parse_validity": accuracy} 90 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/locations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/locations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /scripts/compile_multiseed_results.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import pandas as pd 4 | import os 5 | import re 6 | 7 | 8 | def get_immediate_subdirectories(a_dir): 9 | return [name for name in os.listdir(a_dir) 10 | if os.path.isdir(os.path.join(a_dir, name))] 11 | 12 | 13 | def get_files_with_prefix(dir, prefix): 14 | return [name for name in os.listdir(dir) 15 | if os.path.isfile(os.path.join(dir, name)) and name.startswith(prefix)] 16 | 17 | 18 | def load_results_data_for_experiment(path): 19 | results = [] 20 | eval_files = get_files_with_prefix(path, "evaluation") 21 | for file in eval_files: 22 | eval_path = os.path.join(path, file) 23 | eval_name = re.match("evaluation_(.*)\.", file)[1] 24 | eval_data = pd.read_json(eval_path, typ="series") 25 | eval_data["test_name"] = eval_name 26 | results.append(eval_data) 27 | return results 28 | 29 | 30 | def print_full(x): 31 | pd.set_option('display.max_rows', len(x)) 32 | pd.set_option('display.max_columns', None) 33 | pd.set_option('display.width', 2000) 34 | pd.set_option('display.float_format', '{:20,.3f}'.format) 35 | pd.set_option('display.max_colwidth', -1) 36 | print(x) 37 | pd.reset_option('display.max_rows') 38 | pd.reset_option('display.max_columns') 39 | pd.reset_option('display.width') 40 | pd.reset_option('display.float_format') 41 | pd.reset_option('display.max_colwidth') 42 | 43 | 44 | def main(): 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument("results_folders", nargs="+") 47 | args = parser.parse_args() 48 | 49 | results_data = [] 50 | for seed_path in args.results_folders: 51 | seed_num = int(seed_path[-1]) 52 | experiment_names = get_immediate_subdirectories(seed_path) 53 | for exp in experiment_names: 54 | exp_path = os.path.join(seed_path, exp) 55 | model_dirs = get_immediate_subdirectories(exp_path) 56 | for model in model_dirs: 57 | model_res_path = os.path.join(exp_path, model) 58 | results_data.append((seed_num, exp, model, model_res_path)) 59 | 60 | results_data_by_test_set = [] 61 | for exp in results_data: 62 | test_results = load_results_data_for_experiment(exp[3]) 63 | for result in test_results: 64 | results_data_by_test_set.append(exp + tuple(result)) 65 | 66 | frame = pd.DataFrame(results_data_by_test_set, 67 | columns=["seed", "exp", "model", "results_path", "seq_acc", "parse_validity", "loss", 68 | "test_name"]) 69 | settings = frame.groupby(["exp", "model", "test_name"]) 70 | summary_stats = settings.agg({"seq_acc": ["mean", "std", "count"]}) 71 | print_full(summary_stats) 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/locations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /scripts/test_all_models: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function usage { 4 | cat <<- EOF 5 | usage: test_all_models.sh results_path test_file 6 | 7 | Runs allennlp evaluate and predict on all results directories in a path. 8 | 9 | Options: 10 | -h --help Print this message 11 | -c --use-cpu Use CPU instead of GPU 12 | EOF 13 | exit 1 14 | } 15 | 16 | 17 | if [[ $# < 2 ]]; then 18 | usage 19 | exit 1 20 | fi 21 | 22 | results_path=$1 23 | shift 24 | test_path=$1 25 | shift 26 | 27 | use_cpu=false 28 | 29 | while [[ $# -gt 0 ]]; do 30 | key="$1" 31 | case $key in 32 | -h|--help) 33 | usage 34 | ;; 35 | -c|--use-cpu) 36 | use_cpu=true 37 | ;; 38 | --) 39 | # Get rid of -- 40 | shift 41 | # The remainder are grabbag args to pass to the script 42 | args="${args}$@" 43 | break 44 | ;; 45 | *) 46 | >&2 echo "Unknown argument: $1" 47 | exit 1 48 | ;; 49 | esac 50 | shift # move past argument 51 | done 52 | 53 | # This is a list of results directories (one per config) 54 | results_dirs=${results_path}/* 55 | # We assume the test data file lives in a directory that is named to describe what the dataset is 56 | dataset_dir_path=$(dirname "${test_path}") 57 | dataset_name=$(basename "${dataset_dir_path}") 58 | summary_path="${results_path}/${dataset_name}_results_summary.txt" 59 | 60 | # Clear this file 61 | echo "" > "$summary_path" 62 | 63 | gpu_arg="--cuda-device 0" 64 | if [[ ${use_cpu} = "true" ]]; then 65 | gpu_arg="" 66 | fi 67 | set -x; 68 | python -m gpsr_command_understanding.data.evaluate_baseline_parsers -t "${dataset_dir_path}/train.txt" -v "${dataset_dir_path}/val.txt" -te "${dataset_dir_path}/test.txt" -o "${results_path}/baseline_evaluation_${dataset_name}.json"> "${results_path}/baselines_log_${dataset_name}.txt" 69 | set +x; 70 | for dir in $results_dirs; do 71 | # Filter out non-directory files 72 | if [[ ! -d "$dir" ]]; then 73 | continue 74 | fi 75 | # Folder should be named for the config it was produced by 76 | model_name=$(basename "$dir") 77 | model_path="${dir}/model.tar.gz" 78 | if [[ ! -f "${model_path}" ]]; then 79 | echo "No model to test for ${dir}" 80 | continue 81 | fi 82 | set -x 83 | allennlp evaluate "${model_path}" "${test_path}" --output-file "${dir}/evaluation_${dataset_name}.json" --include-package gpsr_command_understanding $gpu_arg > /dev/null 2>&1 84 | allennlp predict "${model_path}" "${test_path}" --output-file "${dir}/predictions_${dataset_name}.json" --use-dataset-reader --predictor command_parser --include-package gpsr_command_understanding --batch-size 64 $gpu_arg > /dev/null 2>&1 85 | 86 | set +x 87 | echo -e "\n ${model_name}" >> "${summary_path}" 88 | cat "${dir}/evaluation_${dataset_name}.txt" >> "${summary_path}" 89 | done 90 | -------------------------------------------------------------------------------- /gpsr_command_understanding/generator/tokens.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | class NonTerminal(object): 5 | def __init__(self, name): 6 | self.name = name 7 | 8 | def to_human_readable(self): 9 | return "$" + self.name 10 | 11 | def __str__(self): 12 | return "NonTerminal({})".format(self.name) 13 | 14 | def __hash__(self): 15 | return hash(self.__str__()) 16 | 17 | def __eq__(self, other): 18 | return isinstance(other, NonTerminal) and self.name == other.name 19 | 20 | 21 | class WildCard(NonTerminal): 22 | def __init__(self, name): 23 | super(WildCard, self).__init__(name) 24 | 25 | def __str__(self): 26 | return "Wildcard({})".format(self.name) 27 | 28 | def to_human_readable(self): 29 | return "{" + self.name + "}" 30 | 31 | def to_snake_case(self): 32 | return "_".join(self.name.split(" ")) 33 | 34 | 35 | class ComplexWildCard(WildCard): 36 | """ 37 | A nonterminal type representing some object, location, gesture, category, or name. 38 | """ 39 | 40 | def __init__(self, name, type=None, wildcard_id=None, obfuscated=False, meta=None, conditions=None): 41 | self.obfuscated = obfuscated 42 | self.type = type.strip() if type else None 43 | self.id = wildcard_id 44 | self.metadata = meta 45 | self.conditions = conditions if conditions else [] 46 | super(ComplexWildCard, self).__init__(name) 47 | 48 | def __str__(self): 49 | return "Wildcard(" + self.to_human_readable()[1:-1] + ')' 50 | 51 | def to_human_readable(self): 52 | obfuscated_str = '?' if self.obfuscated else None 53 | if self.metadata: 54 | meta_members_as_str = list(map(str, self.metadata)) 55 | meta_str = "meta: " + " ".join(meta_members_as_str) 56 | else: 57 | meta_str = None 58 | if self.conditions: 59 | conditions_str = "where " 60 | for key, value in self.conditions.items(): 61 | if isinstance(value, str): 62 | conditions_str += "{}=\"{}\" ".format(key, value) 63 | else: 64 | conditions_str += "{}={} ".format(key, value) 65 | conditions_str = conditions_str[:-1] 66 | else: 67 | conditions_str = None 68 | # Get the args in the right order as strings 69 | args_to_map = filter(lambda x: x is not None, 70 | [self.name, self.type, self.id, obfuscated_str, conditions_str, meta_str]) 71 | args_str = map(str, args_to_map) 72 | return '{' + " ".join(args_str) + '}' 73 | 74 | def to_snake_case(self): 75 | obfuscated_str = '?' if self.obfuscated else None 76 | items = [self.name, self.type, self.id, obfuscated_str] 77 | items = filter(lambda x: x is not None, items) 78 | return "_".join(map(str, items)) 79 | 80 | def __hash__(self): 81 | return hash(self.__str__()) 82 | 83 | def __eq__(self, other): 84 | return isinstance(other, 85 | WildCard) and self.name == other.name and \ 86 | self.type == other.type and self.id == other.id and \ 87 | self.obfuscated == other.obfuscated and self.conditions == other.conditions 88 | 89 | 90 | # The GPSR grammars all have this as their root 91 | ROOT_SYMBOL = NonTerminal("Main") 92 | -------------------------------------------------------------------------------- /gpsr_command_understanding/util.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from more_itertools import peekable 4 | 5 | from gpsr_command_understanding.generator.tokens import WildCard, NonTerminal 6 | 7 | 8 | def merge_dicts(x, y): 9 | z = x.copy() # start with x's keys and values 10 | z.update(y) # modifies z with y's keys and values & returns None 11 | return z 12 | 13 | 14 | def has_placeholders(tree): 15 | return any(tree.scan_values(lambda x: isinstance(x, WildCard) or isinstance(x, NonTerminal))) 16 | 17 | 18 | def has_nonterminals(tree): 19 | return any(tree.scan_values(lambda x: isinstance(x, NonTerminal) and not isinstance(x, WildCard))) 20 | 21 | 22 | def get_placeholders(tree): 23 | return set(tree.scan_values(lambda x: isinstance(x, WildCard) or isinstance(x, NonTerminal))) 24 | 25 | 26 | def replace_child(tree, child_target, replacement, only_once=False): 27 | replace_count = 0 28 | for i, child in enumerate(tree.children): 29 | if child == child_target: 30 | tree.children[i] = replacement 31 | replace_count += 1 32 | if only_once and replace_count >= 1: 33 | return replace_count 34 | return replace_count 35 | 36 | 37 | def replace_child_in_tree(tree, child_target, replacement, only_once=False): 38 | replace_count = 0 39 | for tree in tree.iter_subtrees(): 40 | replace_count += replace_child(tree, child_target, replacement, only_once=only_once) 41 | if only_once and replace_count >= 1: 42 | return replace_count 43 | return replace_count 44 | 45 | 46 | def get_wildcards(tree): 47 | return peekable(tree.scan_values(lambda x: isinstance(x, WildCard))) 48 | 49 | 50 | def get_wildcards_forest(trees): 51 | """ 52 | Get all wildcards that occur in a grammar 53 | :param production_rules: 54 | :return: 55 | """ 56 | wildcards = set() 57 | for tree in trees: 58 | extracted = tree.scan_values(lambda x: isinstance(x, WildCard)) 59 | for item in extracted: 60 | wildcards.add(item) 61 | return wildcards 62 | 63 | 64 | def determine_unique_data(pairs): 65 | unique_utterance_pair = {} 66 | unique_parse_pair = defaultdict(list) 67 | 68 | for utterance, parse in pairs.items(): 69 | unique_utterance_pair[utterance] = parse 70 | unique_parse_pair[parse].append(utterance) 71 | 72 | return unique_utterance_pair, unique_parse_pair 73 | 74 | 75 | def chunker(seq, size): 76 | return (seq[pos:pos + size] for pos in range(0, len(seq), size)) 77 | 78 | 79 | def save_data(data, out_path): 80 | if len(data) == 0: 81 | print("Set is empty, not saving file") 82 | return 83 | data = sorted(data, key=lambda x: len(x[0])) 84 | with open(out_path, "w") as f: 85 | for sentence, parse in data: 86 | f.write(sentence + '\n' + str(parse) + '\n') 87 | 88 | 89 | def flatten(original): 90 | flattened = [] 91 | for parse, utterances in original: 92 | for utterance in utterances: 93 | flattened.append((utterance, parse)) 94 | return flattened 95 | 96 | 97 | def to_num(s): 98 | try: 99 | return int(s) 100 | except ValueError: 101 | return None 102 | 103 | 104 | class ParseForward: 105 | def __init__(self, parser, start): 106 | self.__parser = parser 107 | self.__start = start 108 | 109 | def parse(self, string): 110 | return self.__parser.parse(string, self.__start) 111 | -------------------------------------------------------------------------------- /gpsr_command_understanding/data/generate_list_for_paraphrasing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import csv 4 | 5 | from gpsr_command_understanding.generator.generator import Generator 6 | from gpsr_command_understanding.generator.grammar import tree_printer 7 | from gpsr_command_understanding.generator.loading_helpers import load, GRAMMAR_YEAR_TO_MODULE 8 | from gpsr_command_understanding.generator.tokens import ROOT_SYMBOL 9 | from gpsr_command_understanding.util import chunker 10 | 11 | seed = 0 12 | rehprasings_per_hit = 6 13 | groundings_per_parse = 1 14 | hits_per_file = 100 15 | 16 | 17 | def main(): 18 | random_source = random.Random(seed) 19 | 20 | generator = Generator(None) 21 | load(generator, "gpsr", GRAMMAR_YEAR_TO_MODULE[2021]) 22 | 23 | all = list(generator.generate(ROOT_SYMBOL, random_generator=random_source)) 24 | # Throw away metadata 25 | [generator.extract_metadata(x) for x in all] 26 | all_examples = [] 27 | grounded_examples = [] 28 | for ungrounded in all: 29 | grounder = generator.generate_grounding_assignments(ungrounded, random_generator=random_source) 30 | for i in range(groundings_per_parse): 31 | assignment = next(grounder) 32 | grounded = generator.apply_grounding_assignment(ungrounded, assignment) 33 | # Remove unhelpful wildcards like pronouns 34 | for key in list(assignment.keys()): 35 | if "pron" in key.name: 36 | del assignment[key] 37 | hint = ",".join(assignment.values()) 38 | grounded_examples.append((grounded, ungrounded, hint)) 39 | random_source.shuffle(grounded_examples) 40 | all_examples += grounded_examples 41 | 42 | 43 | command_columns = [("command" + str(x), "ungrounded" + str(x), "groundinghint"+str(x)) for x in 44 | range(0, rehprasings_per_hit)] 45 | 46 | command_columns = [x for tuple in command_columns for x in tuple] 47 | command_columns.append("hitid") 48 | chunks = list(chunker(all_examples, rehprasings_per_hit)) 49 | print("Writing {} HITS".format(len(chunks))) 50 | i = 0 51 | chunk_count = 0 52 | while i < len(chunks): 53 | out_file_path = os.path.abspath( 54 | os.path.dirname(__file__) + "/../../data/paraphrasing_input_{}.csv".format(chunk_count)) 55 | with open(out_file_path, 'w') as csvfile: 56 | output = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) 57 | output.writerow(command_columns) 58 | 59 | for j in range(i, min(i + hits_per_file, len(chunks))): 60 | chunk = chunks[j] 61 | if len(chunk) < rehprasings_per_hit: 62 | needed = rehprasings_per_hit - len(chunk) 63 | # Sample from previous hits to fill out this last one 64 | chunk += random_source.sample([pair for chunk in chunks[:i] for pair in chunk], k=needed) 65 | line = [] 66 | for command, ungrounded, hint in chunk: 67 | line += [tree_printer(command), tree_printer(ungrounded), hint] 68 | # HIT ID 69 | line.append(str(j)) 70 | output.writerow(line) 71 | 72 | # Let's verify that we can load the output back in... 73 | with open(out_file_path, 'r') as csvfile: 74 | input = csv.DictReader(csvfile) 75 | for line in input: 76 | pass 77 | # print(line) 78 | i += hits_per_file 79 | chunk_count += 1 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/objects.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/objects.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /scripts/train_all_models: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function usage { 4 | cat <<- EOF 5 | usage: train_all_models.sh name experiments_dir_path 6 | 7 | Runs allennlp train on all experiment files in a path. 8 | 9 | Options: 10 | -t --train-path Absolute path to the training data 11 | -v --val-path Absolute path to the validation data 12 | -c --cpu Use the CPU (instead of GPU) 13 | -h --help Print this message 14 | EOF 15 | exit 1 16 | } 17 | 18 | 19 | if [[ $# -lt 2 ]]; then 20 | echo "Pass a name and a path to a directory of experiments" 21 | exit 1 22 | fi 23 | 24 | force=0 25 | use_cpu=0 26 | experiment_name="$1" 27 | shift 28 | experiment_path="$1" 29 | shift 30 | 31 | if [[ ! -d "$experiment_path" ]]; then 32 | echo "Second argument should be a path to a directory of experiment configurations" 33 | exit 1 34 | fi 35 | 36 | 37 | results_path="${experiment_path}/results_${experiment_name}" 38 | mkdir -p "$results_path" 39 | args=() 40 | while [[ $# -gt 0 ]]; do 41 | key="$1" 42 | case $key in 43 | -t|--train-path) 44 | shift 45 | train_path=$1 46 | ;; 47 | -v|--val-path) 48 | shift 49 | validation_path=$1 50 | ;; 51 | -c|--cpu) 52 | use_cpu=1 53 | ;; 54 | -h|--help) 55 | usage 56 | ;; 57 | -f|--force) 58 | force=1 59 | ;; 60 | --) 61 | # Get rid of -- 62 | shift 63 | # The remainder are grabbag args to pass to the script 64 | args+=($@) 65 | break 66 | ;; 67 | *) 68 | >&2 echo "Unknown argument: $1" 69 | exit 1 70 | ;; 71 | esac 72 | shift # move past argument 73 | done 74 | 75 | if [[ $use_cpu == 0 ]]; then 76 | export CUDA_DEVICE=0 77 | else 78 | export CUDA_DEVICE=-1 79 | fi 80 | if [[ "$train_path" ]]; then 81 | export TRAIN_PATH="${train_path}" 82 | fi 83 | if [[ "$validation_path" ]]; then 84 | export VALIDATION_PATH="${validation_path}" 85 | fi 86 | 87 | export EVALUATE_ON_TEST=0 88 | experiment_files=${experiment_path}/*.jsonnet 89 | summary_path="${results_path}/training_summary.txt" 90 | 91 | if [[ $force == 0 && -f "${summary_path}" ]]; then 92 | echo "${summary_path} already exists. Remove it if you want to train again" 93 | exit 1 94 | fi 95 | # Clear the file 96 | echo "" > "$summary_path" 97 | 98 | # Runs a particular experiment file 99 | run_file() { 100 | local file=$1 101 | model_name="${TRANSFORMER_NAME}${name}" 102 | echo "${model_name}" 103 | set -x; 104 | # TODO: Figure out how to quiet some but not all logging 105 | allennlp train "$file" -s "${results_path}/${model_name}" --include-package gpsr_command_understanding "${args[@]}" > /dev/null 2>&1 106 | set +x; 107 | 108 | # Paste the final metrics into a summary file 109 | metrics_path="${results_path}/${model_name}/metrics.json" 110 | echo -e "\n ${model_name}" >> "$summary_path" 111 | cat "$metrics_path" >> "$summary_path" 112 | } 113 | 114 | set -x; 115 | for file in ${experiment_files}; do 116 | name=$(basename "$file") 117 | # remove .json extension 118 | name="${name%.*}" 119 | if [[ "${name}" == "transformer_seq2seq" ]]; then 120 | # Get the list of different transformers to try 121 | readarray -t embeddings < "${experiment_path}/transformers" 122 | for embedding in "${embeddings[@]}"; do 123 | # If the line starts with a hash, skip this parameterization 124 | if [[ ${embedding:0:1} == "#" ]]; then 125 | continue 126 | fi 127 | export TRANSFORMER_NAME="${embedding}" 128 | run_file "$file" 129 | done 130 | else 131 | export TRANSFORMER_NAME="" 132 | run_file "$file" 133 | fi 134 | done 135 | set +x; 136 | -------------------------------------------------------------------------------- /gpsr_command_understanding/data/enumerate_grammar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from random import Random 4 | 5 | import numpy as np 6 | from os.path import join 7 | import re 8 | 9 | from gpsr_command_understanding.generator.grammar import tree_printer 10 | from gpsr_command_understanding.generator.loading_helpers import load, GRAMMAR_YEAR_TO_MODULE, load_paired 11 | from gpsr_command_understanding.generator.tokens import ROOT_SYMBOL 12 | from gpsr_command_understanding.generator.paired_generator import pairs_without_placeholders, PairedGenerator 13 | 14 | 15 | def get_annotated_sentences(sentences_and_pairs): 16 | sentences, pairs = sentences_and_pairs 17 | expanded_pairs = {tree_printer(key): tree_printer(value) for key, value in pairs.items()} 18 | # These came straight from the grammar 19 | grammar_sentences = set([tree_printer(x) for x in sentences]) 20 | # These came from expanding the semantics, so they may not be in the grammar 21 | annotated_sentences = set(expanded_pairs.keys()) 22 | # Only keep annotations that cover sentences actually in the grammar 23 | out_of_grammar = annotated_sentences.difference(grammar_sentences) 24 | annotated_sentences.intersection_update(grammar_sentences) 25 | unannotated_sentences = grammar_sentences.difference(annotated_sentences) 26 | return annotated_sentences, unannotated_sentences, out_of_grammar 27 | 28 | 29 | def main(): 30 | year = int(sys.argv[1]) 31 | task = sys.argv[2] 32 | out_root = os.path.abspath(os.path.dirname(__file__) + "/../../data/") 33 | 34 | generator = load_paired(task, GRAMMAR_YEAR_TO_MODULE[year]) 35 | 36 | sentences = [pair[0] for pair in 37 | generator.generate(ROOT_SYMBOL, yield_requires_semantics=False)] 38 | [generator.extract_metadata(sentence) for sentence in sentences] 39 | sentences = set(sentences) 40 | 41 | out_path = join(out_root, "{}_{}_sentences.txt".format(year, task)) 42 | with open(out_path, "w") as f: 43 | for sentence in sentences: 44 | f.write(tree_printer(sentence) + '\n') 45 | 46 | baked_sentences = [tree_printer(x) for x in sentences] 47 | all_pairs = pairs_without_placeholders(generator) 48 | baked_pairs = {tree_printer(key): tree_printer(value) for key, value in all_pairs.items()} 49 | 50 | annotated, unannotated, out_of_grammar = get_annotated_sentences((sentences, all_pairs)) 51 | 52 | unique_sentence_parses = [baked_pairs[ann_sen] for ann_sen in annotated] 53 | unique_sentence_parses = set(unique_sentence_parses) 54 | 55 | out_path = join(out_root, "{}_{}_pairs.txt".format(year, task)) 56 | 57 | with open(out_path, "w") as f: 58 | for sentence, parse in baked_pairs.items(): 59 | f.write(sentence + '\n' + parse + '\n') 60 | 61 | meta_out_path = join(out_root, "{}_{}_annotations_meta.txt".format(year, task)) 62 | with open(meta_out_path, "w") as f: 63 | f.write("Coverage:\n") 64 | f.write("{0}/{1} {2:.1f}%\n".format(len(annotated), len(baked_sentences), 65 | 100.0 * len(annotated) / len(baked_sentences))) 66 | f.write("\t unique parses: {}\n".format(len(unique_sentence_parses))) 67 | sen_lengths = [len(sentence.split()) for sentence in baked_pairs.keys()] 68 | avg_sentence_length = np.mean(sen_lengths) 69 | parse_lengths = [] 70 | filtered_parse_lengths = [] 71 | for parse in unique_sentence_parses: 72 | parse_lengths.append(len(parse.split())) 73 | stop_tokens_removed = re.sub(r"(\ e\ |\"|\)|\()", "", parse) 74 | filtered_parse_lengths.append(len(stop_tokens_removed.split())) 75 | avg_parse_length = np.mean(parse_lengths) 76 | avg_filtered_parse_length = np.mean(filtered_parse_lengths) 77 | f.write( 78 | "\t avg sentence length (tokens): {:.1f} avg parse length (tokens): {:.1f} avg filtered parse length (tokens): {:.1f}\n".format( 79 | avg_sentence_length, avg_parse_length, avg_filtered_parse_length)) 80 | 81 | """print("No parses for:") 82 | for sentence in sorted(unannotated): 83 | print(sentence) 84 | print("-----------------")""" 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_2_slot.txt: -------------------------------------------------------------------------------- 1 | #; grammar name Category II 2 | #; grammar tier Moderate 3 | #; import common.txt 4 | 5 | #$Main = $deliver 6 | #$Main = $fndppl 7 | #$Main = $fndobj 8 | #$Main = $guide 9 | #$Main = $follow 10 | 11 | ############################################################################## 12 | # 13 | # Production Rules required by common.txt 14 | # 15 | ############################################################################## 16 | #; Define an object type 17 | #$object = {aobject} 18 | {aobject} = OBJECT 19 | 20 | #; Rule for finding a specific (named) person 21 | #$findp = $vbfind a $pgenders 22 | #$findp = $vbfind a person {gesture} 23 | #$findp = $vbfind a person $pose 24 | 25 | #; A named or described person in the given place 26 | #$whowhere = the person {gesture} in the {room} 27 | the person {gesture} in the {room} = O B-PERSON {gesture} O O {room} 28 | 29 | {gesture} = GESTURE 30 | 31 | ############################################################################## 32 | # 33 | # Manipulation 34 | # 35 | ############################################################################## 36 | #$deliver = $vbbtake the {aobject} from the {room} to the {placement 2} 37 | $vbbtake the {aobject} from the {room} to the {placement 2} = $vbbtake O {aobject} O O FROM[{room}] O O TO[{placement 2}] 38 | 39 | #$deliver = $vbbring me the {aobject} from the {placement} 40 | $vbbring me the {aobject} from the {placement} = $vbbring TO[B-OPERATOR] O {aobject} O O FROM[{placement}] 41 | 42 | #$deliver = $takefrom and ($delivme | $delivat) 43 | #$deliver = $takefrom and $place 44 | 45 | ############################################################################## 46 | # 47 | # Find people 48 | # 49 | ############################################################################## 50 | #$fndppl = Tell me the (name | gender | pose) of the person at the {beacon} 51 | Tell me the (name | gender | pose) of the person at the {beacon} = {get_info} O TO[B-OPERATOR] O (B-INFO | B-INFO | B-INFO) O O FROM[B-PERSON] O O FROM[{beacon}] 52 | 53 | #$fndppl = Tell me the (name | gender | pose) of the person in the {room} 54 | Tell me the (name | gender | pose) of the person in the {room} = {get_info} O TO[B-OPERATOR] O (B-INFO | B-INFO | B-INFO) O O FROM[B-PERSON] O O FROM[{room}] 55 | 56 | #$fndppl = Tell me how many people in the {room} are ($pgenderp | $pose) 57 | Tell me how many people in the {room} are ($pgenderp | $pose) = {count} O TO[B-OPERATOR] O O FROM[B-PERSON] O O FROM[{room}] O ($pgenderp | $pose) 58 | 59 | ############################################################################## 60 | # 61 | # Find objects 62 | # 63 | ############################################################################## 64 | #$fndobj = $vbfind the {aobject?} in the {room} 65 | $vbfind the {aobject?} in the {room} = {find} $vbfind O {aobject?} O O {room} 66 | 67 | #$fndobj = Tell me how many {category} there are on the {placement} 68 | Tell me how many {category} there are on the {placement} = {count} O TO[B-OPERATOR] O O {category} O O O O FROM[{placement}] 69 | 70 | #$fndobj = Tell me what's the $oprop object on the {placement} 71 | Tell me what's the $oprop object on the {placement} = {find} O TO[B-OPERATOR] O O $oprop B-OBJECT O O FROM[{placement}] 72 | 73 | #$fndobj = Tell me what's the $oprop {category} on the {placement} 74 | Tell me what's the $oprop {category} on the {placement} = {find} O TO[B-OPERATOR] O O $oprop {category} O O FROM[{placement}] 75 | 76 | #$fndobj = $vbfind the {category} in the {room} 77 | $vbfind the {category} in the {room} = {find} $vbfind O {category} O O {room} 78 | 79 | {aobject?} = OBJECT 80 | {category} = OBJECT 81 | 82 | ############################################################################## 83 | # 84 | # Rules 85 | # 86 | ############################################################################## 87 | #; Find people 88 | 89 | #$pgenders = man | woman | boy | girl | male person | female person 90 | man | woman | boy | girl | male person | female person = B-GENDER | B-GENDER | B-GENDER | B-GENDER | B-GENDER B-PERSON | B-GENDER B-PERSON 91 | 92 | #$pgenderp = men | women | boys | girls | male | female 93 | men | women | boys | girls | male | female = B-GENDER | B-GENDER | B-GENDER | B-GENDER | B-GENDER | B-GENDER 94 | 95 | #$pose = sitting | standing | lying down 96 | sitting | standing | lying down = B-POSE | B-POSE | B-POSE I-POSE -------------------------------------------------------------------------------- /gpsr_command_understanding/generator/knowledge.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from collections import defaultdict 3 | 4 | import importlib_resources 5 | 6 | from gpsr_command_understanding.generator.xml_parsers import ObjectParser, LocationParser, NameParser, GesturesParser, \ 7 | QuestionParser 8 | 9 | 10 | class KnowledgeBase: 11 | def __init__(self, items, attributes): 12 | self.by_name = items 13 | self.attributes = attributes 14 | 15 | @staticmethod 16 | def from_dir(xml_path): 17 | raw_ontology_xml = list(map(lambda x: importlib_resources.open_text(xml_path, x), 18 | ["objects.xml", "locations.xml", "names.xml", "gestures.xml", "questions.xml", "whattosay.txt", "categories.txt"])) 19 | kb = KnowledgeBase.from_component_paths(*raw_ontology_xml) 20 | # Clean up IO to avoid warnings 21 | for stream in raw_ontology_xml: 22 | stream.close() 23 | return kb 24 | 25 | @staticmethod 26 | def from_component_paths(objects_xml_file, locations_xml_file, names_xml_file, gestures_xml_file, 27 | questions_xml_file, sayings_file, categories_file): 28 | object_parser = ObjectParser(objects_xml_file) 29 | locations_parser = LocationParser(locations_xml_file) 30 | names_parser = NameParser(names_xml_file) 31 | gestures_parser = GesturesParser(gestures_xml_file) 32 | question_parser = QuestionParser(questions_xml_file) 33 | 34 | sayings = sayings_file.readlines() 35 | sayings = list(map(str.strip, sayings)) 36 | 37 | # Drop the header 38 | categories_data = categories_file.readlines()[1:] 39 | categories_data = list(map(tuple, csv.reader(categories_data))) 40 | categories_plural = list(map(lambda x: x[0], categories_data)) 41 | 42 | objects = object_parser.all_objects() 43 | names = names_parser.all_names() 44 | locations = locations_parser.get_all_locations() 45 | gestures = list(gestures_parser.get_gestures()) 46 | questions = list(question_parser.get_question_answer_dict().keys()) 47 | attributes = {"object": object_parser.get_attributes(), "location": locations_parser.get_attributes(), "category": {}} 48 | 49 | attributes["object"]["category"] = object_parser.get_objects_to_categories() 50 | attributes["category"]["singular"] = {x[0]: x[1] for x in categories_data} 51 | attributes["location"]["in"] = locations_parser.get_room_locations_are_in() 52 | 53 | by_name = { 54 | "object": objects, 55 | "category": categories_plural, 56 | "name": names, 57 | "location": locations, 58 | "gesture": gestures, 59 | "question": questions, 60 | "whattosay": sayings 61 | } 62 | return KnowledgeBase(by_name, attributes) 63 | 64 | 65 | class AnonymizedKnowledgebase: 66 | def __init__(self): 67 | names = [ 68 | "object", 69 | "category", 70 | "name", 71 | "location", 72 | "gesture", 73 | "question", 74 | "whattosay" 75 | ] 76 | rooms = ["room" + str(i) for i in range(3)] 77 | self.by_name = {name: [name + str(i) for i in range(3)] for name in names} 78 | self.by_name["location"] += rooms 79 | self.attributes = {"object": {"type": defaultdict(lambda: "known"), 80 | "category": defaultdict(lambda: "category1")}, 81 | "location": {"isplacement": defaultdict(lambda: True), 82 | "isbeacon": defaultdict(lambda: True), 83 | "isroom": defaultdict(lambda: False), 84 | "in": defaultdict(lambda: "room1")}, 85 | "category": {"singular": {x: x for x in self.by_name["category"]}}} 86 | for room in rooms: 87 | self.attributes["location"]["isroom"][room] = True 88 | 89 | # Make sure the defaultdicts have concrete values set for all the known keys 90 | for object in self.by_name["object"]: 91 | self.attributes["object"]["type"][object] 92 | self.attributes["object"]["category"][object] 93 | for location in self.by_name["location"]: 94 | self.attributes["location"]["isplacement"][location] 95 | self.attributes["location"]["isbeacon"][location] 96 | self.attributes["location"]["in"][location] 97 | -------------------------------------------------------------------------------- /gpsr_command_understanding/generator/loading_helpers.py: -------------------------------------------------------------------------------- 1 | import importlib_resources 2 | 3 | from gpsr_command_understanding.generator.generator import Generator 4 | from gpsr_command_understanding.generator.knowledge import KnowledgeBase 5 | from gpsr_command_understanding.generator.paired_generator import PairedGenerator 6 | 7 | GRAMMAR_DIR_2018 = "gpsr_command_understanding.resources.generator2018" 8 | GRAMMAR_DIR_2019 = "gpsr_command_understanding.resources.generator2019" 9 | GRAMMAR_DIR_2021 = "gpsr_command_understanding.resources.generator2021" 10 | 11 | GRAMMAR_YEAR_TO_MODULE = {2018: GRAMMAR_DIR_2018, 2019: GRAMMAR_DIR_2019, 2021: GRAMMAR_DIR_2021} 12 | 13 | def load_2018_by_cat(grammar_dir): 14 | kb = KnowledgeBase.from_dir(grammar_dir) 15 | with importlib_resources.open_text(grammar_dir, "common_rules.txt") as common_file: 16 | common = common_file.readlines() 17 | 18 | cat1_gen = Generator(kb, grammar_format_version=2018) 19 | cat2_gen = Generator(kb, grammar_format_version=2018) 20 | cat3_gen = Generator(kb, grammar_format_version=2018) 21 | with importlib_resources.open_text(grammar_dir, "gpsr_category_1_grammar.txt") as cat1: 22 | cat1_gen.load_rules([common, cat1]) 23 | with importlib_resources.open_text(grammar_dir, "gpsr_category_2_grammar.txt") as cat2: 24 | cat2_gen.load_rules([common, cat2]) 25 | with importlib_resources.open_text(grammar_dir, "gpsr_category_3_grammar.txt") as cat3: 26 | cat3_gen.load_rules([common, cat3]) 27 | 28 | return [cat1_gen, cat2_gen, cat3_gen] 29 | 30 | 31 | def load_paired_2018_by_cat(grammar_dir): 32 | cat1_gen, cat2_gen, cat3_gen = map(PairedGenerator.from_generator, load_2018_by_cat(grammar_dir)) 33 | 34 | with importlib_resources.open_text(grammar_dir, "gpsr_category_1_semantics.txt") as cat1: 35 | cat1_gen.load_semantics_rules(cat1) 36 | with importlib_resources.open_text(grammar_dir, 37 | "gpsr_category_1_semantics.txt") as cat1, importlib_resources.open_text( 38 | grammar_dir, "gpsr_category_2_semantics.txt") as cat2: 39 | cat2_gen.load_semantics_rules([cat1, cat2]) 40 | with importlib_resources.open_text(grammar_dir, "gpsr_category_3_semantics.txt") as cat3: 41 | cat3_gen.load_semantics_rules([cat3]) 42 | 43 | return [cat1_gen, cat2_gen, cat3_gen] 44 | 45 | 46 | def load_2018(grammar_dir): 47 | kb = KnowledgeBase.from_dir(grammar_dir) 48 | generator = Generator(kb, grammar_format_version=2018) 49 | 50 | common_path = importlib_resources.open_text(grammar_dir, "common_rules.txt") 51 | grammar_files = [common_path, importlib_resources.open_text(grammar_dir, "gpsr_category_1_grammar.txt"), 52 | importlib_resources.open_text(grammar_dir, "gpsr_category_2_grammar.txt"), 53 | importlib_resources.open_text(grammar_dir, "gpsr_category_3_grammar.txt")] 54 | generator.load_rules(grammar_files) 55 | 56 | for file in grammar_files: 57 | file.close() 58 | return generator 59 | 60 | 61 | def load_paired_2018(grammar_dir): 62 | generator = load_2018(grammar_dir) 63 | generator = PairedGenerator.from_generator(generator) 64 | semantics = [importlib_resources.open_text(grammar_dir, "gpsr_category_1_semantics.txt"), 65 | importlib_resources.open_text(grammar_dir, "gpsr_category_2_semantics.txt"), 66 | importlib_resources.open_text(grammar_dir, "gpsr_category_3_semantics.txt")] 67 | generator.load_semantics_rules(semantics) 68 | for file in semantics: 69 | file.close() 70 | return generator 71 | 72 | 73 | def load(generator, task, grammar_dir, expand_shorthand=True): 74 | generator.knowledge_base = KnowledgeBase.from_dir(grammar_dir) 75 | with importlib_resources.open_text(grammar_dir, task + ".txt") as task_rules_file: 76 | task_rules = task_rules_file.readlines() 77 | # Only load common rules if they're imported in this task's grammar 78 | if any(map(lambda rule: "common.txt" in rule, task_rules)): 79 | with importlib_resources.open_text(grammar_dir, "common_rules.txt") as common_rules_file: 80 | task_rules += common_rules_file.readlines() 81 | generator.load_rules([task_rules], 82 | expand_shorthand=expand_shorthand) 83 | 84 | 85 | def load_paired(task, grammar_dir, expand_shorthand=True): 86 | generator = Generator(None) 87 | load(generator, task, grammar_dir, expand_shorthand=expand_shorthand) 88 | generator = PairedGenerator.from_generator(generator) 89 | with importlib_resources.open_text(grammar_dir, task + "_semantics.txt") as semantics: 90 | generator.load_semantics_rules(semantics) 91 | return generator 92 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/speech-crowdsourcing/record-element.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"record-element.js","sourceRoot":"./","sources":["record-element.ts"],"names":[],"mappings":";;;;;;;;;;AAQA,MAAM,CAAC,YAAY,SAAG,MAAM,CAAC,YAAY,mCAAI,MAAM,CAAC,kBAAkB,CAAC;AAIvE,SAAe,SAAS;;;QACpB,IAAI,OAAA,MAAM,CAAC,iBAAiB,0CAAE,MAAM,MAAK,KAAK,EAAE;YAC5C,MAAM,CAAC,iBAAiB,GAAG,IAAI,CAAA;YAC/B,MAAM,CAAC,kBAAkB,GAAG,IAAI,CAAA;SACnC;QACD,aAAO,MAAM,CAAC,iBAAiB,mCAAI,MAAM;;gBAErC,MAAM,CAAC,kBAAkB,GAAG,IAAI,YAAY,EAAE,CAAC;gBAG/C,IAAI;oBAEA,MAAM,CAAC,iBAAiB,GAAG,MAAa,SAAS,CAAC,YAAa,CAAC,YAAY,CACxE;wBACI,OAAO,EAAE;4BACL,eAAe,EAAE,KAAK;4BACtB,YAAY,EAAE,CAAC;4BACf,gBAAgB,EAAE,KAAK;4BACvB,OAAO,EAAE,CAAC;4BACV,gBAAgB,EAAE,KAAK;4BACvB,UAAU,EAAE,KAAK;4BACjB,UAAU,EAAE,EAAE;4BACd,aAAa,EAAE,MAAM;4BACrB,MAAM,EAAE,GAAG;yBACd;qBACJ,CAAC,CAAC;oBACP,OAAQ,MAAM,CAAC,iBAAiB,CAAC;iBACpC;gBAAC,OAAO,CAAC,EAAE;oBACR,KAAK,CAAC,gCAAgC,GAAG,CAAC,CAAC,CAAC;oBAC5C,OAAO,IAAI,CAAA;iBACd;YACL,CAAC;SAAA,EAAE,CAAC;;CAEP;AAED,MAAM,OAAO,oBAAqB,SAAQ,WAAW;IAUjD;QACI,KAAK,EAAE,CAAC;QARJ,mBAAc,GAAW,EAAE,CAAA;QAC3B,eAAU,GAAY,KAAK,CAAA;QAS/B,IAAI,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,EAAC,IAAI,EAAE,MAAM,EAAC,CAAC,CAAC;QAC/C,IAAI,eAAe,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;QACtD,eAAe,CAAC,SAAS,GAAG,QAAQ,CAAA;QACpC,eAAe,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;QACvC,eAAe,CAAC,KAAK,CAAC,aAAa,GAAG,KAAK,CAAA;QAC3C,eAAe,CAAC,KAAK,CAAC,WAAW,GAAG,KAAK,CAAA;QACzC,MAAM,CAAC,WAAW,CAAC,eAAe,CAAC,CAAA;QACnC,eAAe,CAAC,OAAO,GAAG,GAAG,EAAE,GAAE,IAAI,CAAC,qBAAqB,EAAE,CAAA,CAAA,CAAC,CAAA;QAC9D,IAAI,CAAC,oBAAoB,GAAG,eAAe,CAAA;QAG3C,IAAI,CAAC,aAAa,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAA;QACpD,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAChD,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;QAEtC,IAAI,CAAC,aAAa,CAAC,gBAAgB,CAAC,gBAAgB,EAAE,GAAG,EAAE,GAAE,IAAI,CAAC,aAAa,CAAC,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAA,CAAA,CAAC,CAAC,CAAA;QAEhH,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;QAC9C,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QACxC,IAAI,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAA;IAEjD,CAAC;IAED,IAAI,SAAS;QAGT,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,KAAK,EAAE,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;YAC3E,OAAO,IAAI,CAAA;SACd;aAAM;YACH,OAAO,IAAI,CAAC,aAAa,CAAC,GAAG,CAAA;SAChC;IACL,CAAC;IAED,IAAI,WAAW;;QACX,OAAO,OAAA,IAAI,CAAC,QAAQ,0CAAE,KAAK,MAAK,WAAW,CAAA;IAC/C,CAAC;IAEA,qBAAqB;QAClB,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,IAAI,CAAC,IAAI,EAAE,CAAA;SACd;aAAM,IAAI,IAAI,CAAC,SAAS,EAAE;YACtB,IAAI,CAAC,KAAK,EAAE,CAAA;SAChB;aAAM;YACH,IAAI,CAAC,MAAM,EAAE,CAAA;SAChB;IACL,CAAC;IAEK,MAAM;;;YAER,IAAI,OAAA,IAAI,CAAC,QAAQ,0CAAE,KAAK,MAAK,UAAU,EAAE;gBACrC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAA;aACvB;YACD,IAAI,CAAC,QAAQ,SAAG,IAAI,CAAC,QAAQ,mCAAM,MAAM,CAAE,GAAQ,EAAE;gBACjD,IAAI,MAAM,GAAG,MAAM,SAAS,EAAE,CAAA;gBAC9B,IAAI,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;oBACnC,IAAI,CAAC,WAAW,CAAC,SAAS,GAAG,uGAAuG,CAAA;oBACpI,OAAO,IAAI,CAAC;iBACf;gBACD,IAAI,OAAO,aAAa,KAAK,WAAW,EAAE;oBACtC,IAAI,CAAC,WAAW,CAAC,SAAS,GAAG,6EAA6E,CAAA;oBAC1G,OAAO,IAAI,CAAC;iBACf;gBACD,IAAI,GAAG,GAAG,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;gBACpC,GAAG,CAAC,gBAAgB,CAAC,eAAe,EAAG,CAAC,CAAM,EAAE,EAAE,GAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAA,CAAA,CAAC,CAAC,CAAA;gBAC3E,GAAG,CAAC,gBAAgB,CAAC,MAAM,EAAE,GAAG,EAAE,GAAE,IAAI,CAAC,OAAO,EAAE,CAAA,CAAA,CAAC,CAAC,CAAA;gBACpD,GAAG,CAAC,gBAAgB,CAAC,OAAO,EAAE,GAAG,EAAE,GAAE,IAAI,CAAC,OAAO,EAAE,CAAA,CAAA,CAAC,CAAC,CAAA;gBACrD,OAAO,GAAG,CAAC;YACf,CAAC,CAAA,CAAC,EAAE,CAAA;YACJ,IAAI,IAAI,CAAC,QAAQ,KAAK,IAAI,EAAE;gBACxB,OAAO;aACV;YACD,IAAI,CAAC,UAAU,GAAG,KAAK,CAAA;YACvB,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAA;YACrB,IAAI,CAAC,WAAW,CAAC,SAAS,GAAG,EAAE,CAAA;;KAClC;IAED,OAAO;QACH,IAAI,CAAC,oBAAoB,CAAC,SAAS,GAAG,MAAM,CAAC;QAC7C,IAAI,CAAC,aAAa,CAAC,IAAI,WAAW,CAAC,kBAAkB,EAAE,EAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAC,CAAC,CAAC,CAAA;IAC5F,CAAC;IAED,IAAI;QACA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;QACpB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAA;IAC1B,CAAC;IAED,OAAO;QACH,IAAI,CAAC,oBAAoB,CAAC,SAAS,GAAG,QAAQ,CAAA;QAC9C,IAAI,CAAC,YAAY,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;QACtE,IAAI,CAAC,oBAAoB,CAAC,SAAS,GAAG,OAAO,CAAA;QAC7C,IAAI,CAAC,aAAa,CAAC,IAAI,WAAW,CAAC,kBAAkB,EAAE,EAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAC,CAAC,CAAC,CAAA;IAC5F,CAAC;IAED,KAAK;QAED,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,GAAG,CAAA;QAC5B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAA;QACxB,IAAI,CAAC,cAAc,GAAG,EAAE,CAAA;QACxB,IAAI,CAAC,oBAAoB,CAAC,SAAS,GAAG,OAAO,CAAA;IACjD,CAAC;IAEA,aAAa,CAAC,CAAM;QACjB,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE;YACjB,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;SACpC;QAED,IAAG,IAAI,CAAC,UAAU,KAAK,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,KAAK,UAAU,EAAE;YAC/D,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;SACxB;IACL,CAAC;CAGJ;AAED,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,gBAAgB,EAAE,oBAAoB,CAAC,CAAA"} -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2021/common_rules.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This grammar file has no main, therefore, cannot be loaded 3 | # standalone. Import using: 4 | # ; import common.txt 5 | # 6 | # The following rules must be defined in the importer 7 | # ; An object type 8 | # $object 9 | # ; Rule for finding a specific (named) person 10 | # $findp 11 | # ; A named or described person in the given place 12 | # $whowhere 13 | # 14 | ; grammar name Common 15 | 16 | ############################################################################## 17 | # 18 | # Manipulation 19 | # 20 | ############################################################################## 21 | $deliver = $take and $vbplace it on the {placement 2} 22 | $deliver = $vbplace the $object on the {placement 2} 23 | $deliver = $vbbring me the $object 24 | $deliver = $vbdeliver the $object to $someone 25 | $deliver = $takefrom and $vbplace it on the {placement 2} 26 | $deliver = $goplace, $vbfind the $object, and ($delivme | $delivat) 27 | $deliver = $goplace, $vbfind the $object, and $place 28 | 29 | 30 | 31 | ############################################################################## 32 | # 33 | # Tell people things 34 | # 35 | ############################################################################## 36 | $fndppl = $findp in the {room} and $speak 37 | $fndppl = $goroom, $findp, and $speak 38 | 39 | 40 | 41 | ############################################################################## 42 | # 43 | # Find objects 44 | # 45 | ############################################################################## 46 | $fndobj = tell me how many $object there are on the {placement} 47 | $fndobj = $vbfind the $object in the {room} 48 | 49 | 50 | ############################################################################## 51 | # 52 | # Follow people 53 | # 54 | ############################################################################## 55 | $follow = $vbfollow {name 1} from the {beacon 1} to the {room 2} 56 | $follow = meet {name 1} at the {beacon 1} and $vbfollow {pron} $fllwdest 57 | $follow = $gobeacon, meet {name 1}, and $vbfollow {pron} $fllwhdst 58 | 59 | $fllmeet = meet {name 1} | find a person 60 | $fllwdest = (to the {room 2}) | $fllwhdst 61 | $fllwhdst = {void meta: Follow {name 1} to the {room 2}} 62 | 63 | 64 | ############################################################################## 65 | # 66 | # Guide people 67 | # 68 | ############################################################################## 69 | $guide = $gdcmd $gdmeta 70 | 71 | $gdcmd = $vbguide {name 1} from the {beacon 1} to the {beacon 2} 72 | $gdcmd = meet {name 1} at the {beacon 1} and $guideto 73 | $gdcmd = $gobeacon, meet {name 1}, and $guideto 74 | $gdcmd = $vbguide {name 1} to the {beacon 2}, $gdwhere 75 | 76 | $guideto = $vbguide {pron} to the {beacon 2} 77 | $gdwhere = (you (may | can | will) find {pron} at | they're (by | beside) | they might be near) the {beacon 1} 78 | $gdmeta = {void meta: The person being guided must deviate when indicated by the referee before reaching the {beacon 2}} 79 | 80 | ############################################################################## 81 | # 82 | # Rules 83 | # 84 | ############################################################################## 85 | ; People 86 | $someone = me | $whowhere 87 | 88 | ; Place an object 89 | $place = $vbplace it on the {placement 2} 90 | 91 | ; Object properties 92 | $oprop = biggest | largest | smallest | heaviest | lightest | thinnest 93 | 94 | ; Navigating 95 | $goplace = $vbgopl to the {placement 1} 96 | $gobeacon = $vbgopl to the {beacon 1} 97 | $goroom = $vbgopl to the {room 1} 98 | 99 | ; Take an object 100 | $take = $vbtake the $object 101 | $takefrom = $take from the {placement 1} 102 | 103 | ; Deliver an object 104 | $delivme = $vbdeliver it to me 105 | $delivto = $vbdeliver it to {name} 106 | $delivat = $vbdeliver it to {name} at the {beacon} 107 | 108 | ; Speak 109 | $speak = $vbtell {whattosay} 110 | 111 | 112 | ############################################################################## 113 | # 114 | # Verbs 115 | # 116 | ############################################################################## 117 | 118 | $vbbring = bring | give 119 | $vbclean = clean 120 | $vbtidy = (tidy up) | neaten | order | organize | (clean out) | (clean up) 121 | $vbdeliver = $vbbring | deliver 122 | $vbfind = find | locate | look for 123 | $vbfollow = follow | follow (behind | after ) | trail | accompany 124 | $vbgopl = go | navigate 125 | $vbgor = $vbgopl | enter 126 | $vbguide = guide | escort | take | lead | accompany 127 | $vbpickup = get | grasp | take | retrieve | pick up 128 | $vbplace = put | place 129 | $vbpour = pour | serve 130 | $vbserve = serve | deliver | distribute | give | provide 131 | $vbtake = bring | carry | take 132 | $vbtell = tell them 133 | $vbtakeout = (take out) | dump 134 | $vbmeet = meet | find | greet | (get to know) | (get acquainted with) 135 | 136 | 137 | ############################################################################## 138 | # 139 | # Polite 140 | # 141 | ############################################################################## 142 | $polite = {void} | please 143 | $polite = could you 144 | $polite = robot please 145 | $polite = could you please -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_3_grammar.txt: -------------------------------------------------------------------------------- 1 | ; grammar name Category III 2 | ; grammar tier High 3 | ; import common.txt 4 | 5 | $Main = $pcat3 | $cat3 6 | $pcat3 = $polite $cat3 7 | 8 | $cat3 = $complexman 9 | $cat3 = $fndobj 10 | $cat3 = $incomplete 11 | $cat3 = $erroneous 12 | $cat3 = $followout 13 | 14 | 15 | ############################################################################## 16 | # 17 | # Production Rules required by common.txt 18 | # 19 | ############################################################################## 20 | ; Define an object type 21 | ;$object = bowl | cloth | tray 22 | ; MODIFIED FOR GENERALITY 23 | $object = {object} 24 | 25 | ; Rule for finding a specific (named) person 26 | $findp = $vbfind a $pgenders 27 | $findp = $vbfind a person {gesture} 28 | $findp = $vbfind a person $pose 29 | 30 | ; A named or described person at the given place 31 | $whowhere = {name 1 meta: {name 1} is at the {beacon 1}} 32 | $whowhere = the {gesture} person in the {room} 33 | 34 | 35 | 36 | ############################################################################## 37 | # 38 | # Complex manipulation 39 | # 40 | ############################################################################## 41 | $complexman = $cmanwarn $cmancmd 42 | 43 | $cmancmd = Pour some cereals into the bowl 44 | $cmancmd = $goroom {void meta: All access doors to {room 1} bust be closed.} 45 | $cmancmd = $vbbtake the {kobject} to the {placement 2 meta: Block access to {placement 2} with a movable object or furniture} 46 | $cmancmd = $vbbring me the $abspos object $cmanobjsrc 47 | $cmancmd = $vbbring me the object $relpos the {object} $cmanobjsrc 48 | $cmancmd = $vbbring me the $oprop (object | {category}) $cmanobjsrc 49 | $cmanwarn = {void meta: This command is for DSPL and OPL only} 50 | $cmanobjsrc = from the {placement 2 meta: Place at least 5 objects in the {placement 2}} 51 | 52 | 53 | 54 | ############################################################################## 55 | # 56 | # Incomplete commands 57 | # 58 | ############################################################################## 59 | $incomplete = $vbfollow {name 1 meta: {name 1} is at the {beacon 1}} 60 | $incomplete = $cmanwarn $vbbring me the {object?} 61 | $incomplete = $vbdeliver {object?} to $someone 62 | $incomplete = $vbguide {name 1 meta: {name 1} is at the {beacon 1}} to the {beacon 2} 63 | $incomplete = meet $inguidewho and $vbguide {pron} 64 | $incomplete = $gobeacon, meet $inguidewho, and $vbguide {pron} 65 | 66 | $inguidewho = {name 1 meta: {name 1} must be taken from {beacon 1} to {beacon 2}, but he will get lost before reaching it} 67 | 68 | 69 | 70 | ############################################################################## 71 | # 72 | # Wrong commands 73 | # 74 | ############################################################################## 75 | $erroneous = $cmanwarn $vbbtake the $errnoobj to the {placement 2} 76 | $erroneous = $cmanwarn $vbplace the $errnoobj on the {placement 2} 77 | $erroneous = $cmanwarn $vbbring me the $errnoobj 78 | 79 | $erroneous = $vbguide $errnoper from the {beacon 1} to the {beacon 2} 80 | $erroneous = $vbguide {name 1} from the {beacon 1} to the {beacon 1 meta: Destination is {beacon 2}} 81 | $erroneous = $vbguide the $ernamperobj from the {beacon 1} to the {beacon 2} 82 | 83 | $erroneous = $gobeacon, meet {name 1}, and $vbfollow {pron} to the {beacon 1 meta: Destination is {room 2}} 84 | $erroneous = $gobeacon, meet $errnoper, and $vbfollow {pron} to the {room 2} 85 | $erroneous = $gobeacon, meet the $ernamperobj, and $vbfollow it {void meta: Destination is {room 2}} 86 | 87 | $errnoobj = {object 1 meta: is not at the {placement 1} but somewhere else in the same room} 88 | $errnoper = {name 1 meta: is not at the {beacon 1} but somewhere else in the same room} 89 | $ernamperobj = {object 1 meta: Replace {object 1} with {name 1}} 90 | $ernamperobj = {category 1 meta: Replace {category 1} with {name 1}} 91 | 92 | 93 | ############################################################################## 94 | # 95 | # Follow [& guide] 96 | # 97 | ############################################################################## 98 | $followout = $fllwoutdest meet {name 1} at the {beacon 1}, $vbfollow {pron}, and $goroom 99 | $followout = $fllwoutdest meet {name 1} at the {beacon 1}, $vbfollow {pron}, and $vbguide {pron} back 100 | 101 | $fllwoutdest = {void meta: The robot must follow {name 1} outside the arena} 102 | 103 | 104 | 105 | ############################################################################## 106 | # 107 | # Find objects 108 | # 109 | ############################################################################## 110 | $fndobj = tell me which are the three $oprop (objects | {category}) on the {placement 1 meta: Place at least 5 objects on the {placement 1}} 111 | $fndobj = $vbfind three {category} in the {room meta: Place only 3 objects in the room} 112 | 113 | 114 | 115 | ############################################################################## 116 | # 117 | # Rules 118 | # 119 | ############################################################################## 120 | ; Position 121 | $abspos = (left | right) most 122 | $relpos = at the (left | right) of 123 | $relpos = on top of 124 | $relpos = above | behind | under 125 | -------------------------------------------------------------------------------- /scripts/process_turk_data.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import re 3 | 4 | from nltk.metrics.distance import edit_distance, jaccard_distance 5 | import pandas as pd 6 | 7 | paraphrasings = [] 8 | new = [] 9 | 10 | 11 | def process_turk_files(paths, filter_rejected=True): 12 | print("Processing paths: {}".format(str(paths))) 13 | 14 | def drop_trailing_num(name): 15 | try: 16 | return next(re.finditer(r'[\D\.]*', name)).group(0) 17 | except StopIteration: 18 | return name 19 | 20 | frame = pd.concat([pd.read_csv(path, na_filter=False) for path in paths], ignore_index=True) 21 | if filter_rejected: 22 | frame.drop(frame[frame["AssignmentStatus"] == "Rejected"].index, inplace=True) 23 | 24 | data_views = [] 25 | num_commands = frame.filter(regex='^Input.command', axis=1).shape[1] 26 | for n in range(1, num_commands + 1): 27 | columns = ["Input.command" + str(n), "Answer.utterance" + str(n), "Input.parse" + str(n), 28 | "Input.parse_ground" + str(n), "WorkerId"] 29 | data_views.append(frame[columns].rename(columns=drop_trailing_num)) 30 | paraphrasings = pd.concat(data_views) 31 | paraphrasings.sort_values(by="Answer.utterance", inplace=True) 32 | new_views = [] 33 | num_new_commands = frame.filter(regex='^Answer.custom', axis=1).shape[1] 34 | for n in range(1, num_new_commands + 1): 35 | new_views.append(frame[["Answer.custom" + str(n), "WorkerId"]].rename(columns=drop_trailing_num)) 36 | new = pd.concat(new_views) 37 | new.sort_values(by="Answer.custom", ignore_index=True, inplace=True) 38 | 39 | nice_names = {"Answer.utterance": "paraphrase", "Input.command": "command", "Answer.custom": "command"} 40 | paraphrasings.rename(columns=nice_names, inplace=True) 41 | new.rename(columns=nice_names, inplace=True) 42 | other_data = frame.drop( 43 | columns=[c for c in frame.columns if ("Input" in c or "Answer" in c) and not (c == "Answer.comment")]) 44 | return paraphrasings, new, other_data 45 | 46 | 47 | paraphrasings, new, other_data = process_turk_files(glob.glob("batch_*.csv")) 48 | 49 | paraphrasings["EditDistanceNormalized"] = paraphrasings.apply( 50 | lambda row: edit_distance(row["command"], row["paraphrase"]) / len(row["command"]), axis=1) 51 | paraphrasings["EditDistance"] = paraphrasings.apply( 52 | lambda row: edit_distance(row["command"], row["paraphrase"]), axis=1) 53 | paraphrasings["JaccardDistance"] = paraphrasings.apply( 54 | lambda row: jaccard_distance(set(row["command"].split()), set(row["paraphrase"].split())), axis=1) 55 | 56 | print( 57 | "{:.2f} {:.2f} {:.2f}".format(paraphrasings["EditDistanceNormalized"].mean(), paraphrasings["EditDistance"].mean(), 58 | paraphrasings["JaccardDistance"].mean())) 59 | by_worker = paraphrasings.groupby(paraphrasings["WorkerId"]) 60 | for name, group in by_worker: 61 | print(name) 62 | for i, (original, paraphrase) in group[["command", "paraphrase"]].iterrows(): 63 | print(original) 64 | print(paraphrase) 65 | print("") 66 | 67 | turker_performance = pd.DataFrame() 68 | turker_performance["HITTime"] = other_data.groupby("WorkerId")["WorkTimeInSeconds"].mean() 69 | turker_performance["MeanNormalizedEditDistance"] = paraphrasings.groupby("WorkerId")["EditDistanceNormalized"].mean() 70 | turker_performance["MeanJaccardDistance"] = paraphrasings.groupby("WorkerId")["JaccardDistance"].mean() 71 | turker_performance["Comment"] = other_data.groupby("WorkerId")["Answer.comment"] 72 | for _, (original, parse, paraphrase, edit, jaccard) in paraphrasings[ 73 | ["command", "Input.parse", "paraphrase", "EditDistance", "JaccardDistance"]].iterrows(): # noqa 74 | print(original) 75 | print(parse) 76 | print(paraphrase) 77 | print("dist: ed{:.2f} ja{:.2f}".format(edit, jaccard)) 78 | print("") 79 | 80 | print("--------------") 81 | new_by_worker = new.groupby(new["WorkerId"]) 82 | for name, group in new_by_worker: 83 | print(name) 84 | for custom_utt in group["command"]: 85 | print(custom_utt) 86 | print("") 87 | 88 | print("{} workers provided {} paraphrases and {} new commands".format(len(by_worker), len(paraphrasings), len(new))) 89 | 90 | with open("paraphrasings.txt", 'w') as outfile: 91 | for _, (paraphrase, command) in paraphrasings[["paraphrase", "command"]].sort_values( 92 | by="paraphrase").iterrows(): 93 | outfile.write(command + "\n") 94 | outfile.write(paraphrase + "\n") 95 | 96 | with open("paraphrasings_grounded.txt", 'w') as outfile: 97 | for _, (paraphrase, logical_ground) in paraphrasings[["paraphrase", "Input.parse_ground"]].sort_values( 98 | by="paraphrase").iterrows(): 99 | outfile.write(paraphrase + "\n") 100 | outfile.write(logical_ground + "\n") 101 | 102 | with open("orig_para_logical.txt", 'w') as outfile: 103 | for _, (paraphrase, command, logical_ground) in paraphrasings[ 104 | ["paraphrase", "command", "Input.parse_ground"]].sort_values( 105 | by="paraphrase").iterrows(): 106 | outfile.write(command + "\n") 107 | outfile.write(paraphrase + "\n") 108 | outfile.write(logical_ground + "\n") 109 | 110 | with open("custom.txt", 'w') as outfile: 111 | for command in new["command"].sort_values(): 112 | outfile.write(command + "\n") 113 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2019/common_rules.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This grammar file has no main, therefore, cannot be loaded 3 | # standalone. Import using: 4 | # ; import common.txt 5 | # 6 | # The following rules must be defined in the importer 7 | # ; An object type 8 | # $object 9 | # ; Rule for finding a specific (named) person 10 | # $findp 11 | # ; A named or described person in the given place 12 | # $whowhere 13 | # 14 | ; grammar name Common 15 | 16 | ############################################################################## 17 | # 18 | # Manipulation 19 | # 20 | ############################################################################## 21 | $deliver = $take and $vbplace it on the {placement 2} 22 | $deliver = $vbplace the $object on the {placement 2} 23 | $deliver = $vbbring me the $object 24 | $deliver = $vbdeliver the $object to $someone 25 | $deliver = $takefrom and $vbplace it on the {placement 2} 26 | $deliver = $goplace, $vbfind the $object, and ($delivme | $delivat) 27 | $deliver = $goplace, $vbfind the $object, and $place 28 | 29 | 30 | 31 | ############################################################################## 32 | # 33 | # Find people 34 | # 35 | ############################################################################## 36 | $fndppl = $talk to $whowhere 37 | $fndppl = $findp in the {room} and $talk 38 | $fndppl = $goroom, $findp, and $talk 39 | 40 | 41 | 42 | ############################################################################## 43 | # 44 | # Find objects 45 | # 46 | ############################################################################## 47 | $fndobj = Tell me how many $object there are on the {placement} 48 | $fndobj = $vbfind the $object in the {room} 49 | 50 | 51 | ############################################################################## 52 | # 53 | # Follow people 54 | # 55 | ############################################################################## 56 | $follow = $vbfollow {name 1} from the {beacon 1} to the {room 2} 57 | $follow = meet {name 1} at the {beacon 1} and $vbfollow {pron} $fllwdest 58 | $follow = $gobeacon, meet {name 1}, and $vbfollow {pron} $fllwhdst 59 | 60 | $fllmeet = meet {name 1} | find a person 61 | $fllwdest = (to the {room 2}) | $fllwhdst 62 | $fllwhdst = {void meta: Follow {name 1} to the {room 2}} 63 | 64 | 65 | ############################################################################## 66 | # 67 | # Guide people 68 | # 69 | ############################################################################## 70 | $guide = $gdcmd $gdmeta 71 | 72 | $gdcmd = $vbguide {name 1} from the {beacon 1} to the {beacon 2} 73 | $gdcmd = meet {name 1} at the {beacon 1} and $guideto 74 | $gdcmd = $gobeacon, meet {name 1}, and $guideto 75 | $gdcmd = $vbguide {name 1} to the {beacon 2}, $gdwhere 76 | 77 | $guideto = $vbguide {pron} to the {beacon 2} 78 | $gdwhere = you (may | can | will) find {pron} at the {beacon 1} 79 | $gdmeta = {void meta: The person being guided must deviate when indicated by the referee before reaching the {beacon 2}} 80 | 81 | ############################################################################## 82 | # 83 | # Rules 84 | # 85 | ############################################################################## 86 | ; People 87 | $someone = me | $whowhere 88 | 89 | ; Place an object 90 | $place = $vbplace it on the {placement 2} 91 | 92 | ; Object properties 93 | $oprop = biggest | largest | smallest | heaviest | lightest | thinnest 94 | 95 | ; Navigating 96 | $goplace = $vbgopl to the {placement 1} 97 | $gobeacon = $vbgopl to the {beacon 1} 98 | $goroom = $vbgopl to the {room 1} 99 | 100 | ; Take an object 101 | $take = $vbtake the $object 102 | $takefrom = $take from the {placement 1} 103 | 104 | ; Deliver an object 105 | $delivme = $vbdeliver it to me 106 | $delivto = $vbdeliver it to {name} 107 | $delivat = $vbdeliver it to {name} at the {beacon} 108 | 109 | ; Speak 110 | $talk = $answer | $speak 111 | $answer = answer a {question} 112 | $speak = $vbspeak $whattosay 113 | 114 | 115 | ############################################################################## 116 | # 117 | # What to say 118 | # 119 | ############################################################################## 120 | $whattosay = something about yourself 121 | $whattosay = the time 122 | $whattosay = what day is (today | tomorrow) 123 | $whattosay = your team's (name | country | affiliation) 124 | $whattosay = the day of the (week | month) 125 | $whattosay = a joke 126 | 127 | ############################################################################## 128 | # 129 | # Verbs 130 | # 131 | ############################################################################## 132 | $vbbtake = bring | take 133 | $vbplace = put | place 134 | $vbbring = bring | give 135 | $vbclean = clean 136 | $vbdeliver = $vbbring | deliver 137 | $vbtake = get | grasp | take | pick up 138 | $vbspeak = tell | say 139 | $vbgopl = go | navigate 140 | $vbgor = $vbgopl | enter 141 | $vbfind = find | locate | look for 142 | $vbguide = guide | escort | take | lead | accompany 143 | $vbfollow = follow 144 | 145 | 146 | ############################################################################## 147 | # 148 | # Polite 149 | # 150 | ############################################################################## 151 | $polite = {void} | Please 152 | $polite = Could you 153 | $polite = Robot please 154 | $polite = Could you please -------------------------------------------------------------------------------- /gpsr_command_understanding/models/commands_reader.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import logging 3 | 4 | from overrides import overrides 5 | 6 | from allennlp.common.checks import ConfigurationError 7 | from allennlp.common.file_utils import cached_path 8 | from allennlp.common.util import START_SYMBOL, END_SYMBOL 9 | from allennlp.data.dataset_readers.dataset_reader import DatasetReader 10 | from allennlp.data.fields import TextField, MetadataField 11 | from allennlp.data.instance import Instance 12 | from allennlp.data.tokenizers import Token, Tokenizer, SpacyTokenizer 13 | from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer 14 | 15 | import more_itertools 16 | 17 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 18 | 19 | 20 | @DatasetReader.register("commands") 21 | class CommandsDatasetReader(DatasetReader): 22 | """ 23 | Read a tsv file containing paired sequences, and create a dataset suitable for a 24 | ``SimpleSeq2Seq`` model, or any model with a matching API. 25 | Expected format for each input line: \t 26 | The output of ``read`` is a list of ``Instance`` s with the fields: 27 | source_tokens: ``TextField`` and 28 | target_tokens: ``TextField`` 29 | `START_SYMBOL` and `END_SYMBOL` tokens are added to the source and target sequences. 30 | Parameters 31 | ---------- 32 | source_tokenizer : ``Tokenizer``, optional 33 | Tokenizer to use to split the input sequences into words or other kinds of tokens. Defaults 34 | to ``WordTokenizer()``. 35 | target_tokenizer : ``Tokenizer``, optional 36 | Tokenizer to use to split the output sequences (during training) into words or other kinds 37 | of tokens. Defaults to ``source_tokenizer``. 38 | source_token_indexers : ``Dict[str, TokenIndexer]``, optional 39 | Indexers used to define input (source side) token representations. Defaults to 40 | ``{"tokens": SingleIdTokenIndexer()}``. 41 | target_token_indexers : ``Dict[str, TokenIndexer]``, optional 42 | Indexers used to define output (target side) token representations. Defaults to 43 | ``source_token_indexers``. 44 | source_add_start_token : bool, (optional, default=True) 45 | Whether or not to add `START_SYMBOL` to the beginning of the source sequence. 46 | """ 47 | 48 | def __init__(self, 49 | source_tokenizer: Tokenizer = None, 50 | target_tokenizer: Tokenizer = None, 51 | source_token_indexers: Dict[str, TokenIndexer] = None, 52 | target_token_indexers: Dict[str, TokenIndexer] = None, 53 | source_add_start_token: bool = True, 54 | source_add_end_token: bool = True) -> None: 55 | super().__init__() 56 | self._source_tokenizer = source_tokenizer or SpacyTokenizer() 57 | self._target_tokenizer = target_tokenizer or SpacyTokenizer() 58 | self._source_token_indexers = source_token_indexers or {"tokens": SingleIdTokenIndexer()} 59 | self._target_token_indexers = target_token_indexers or self._source_token_indexers 60 | self._source_add_start_token = source_add_start_token 61 | self._source_add_end_token = source_add_end_token 62 | 63 | @overrides 64 | def _read(self, file_path): 65 | with open(cached_path(file_path), "r") as data_file: 66 | logger.info("Reading instances from lines in file at: %s", file_path) 67 | line_generator = more_itertools.peekable(enumerate(data_file)) 68 | while line_generator: 69 | line_num, line = next(line_generator) 70 | line = line.strip("\n") 71 | if len(line) == 0: 72 | continue 73 | 74 | next_pair = line_generator.peek(None) 75 | if not next_pair: 76 | raise ConfigurationError("Expected another line") 77 | next_line_num, next_line = next(line_generator) 78 | 79 | source_sequence, target_sequence = line, next_line 80 | yield self.text_to_instance(source_sequence, target_sequence) 81 | 82 | @overrides 83 | def text_to_instance(self, source_string: str, target_string: str = None) -> Instance: # type: ignore 84 | # pylint: disable=arguments-differ 85 | tokenized_source = self._source_tokenizer.tokenize(source_string) 86 | if self._source_add_start_token: 87 | tokenized_source.insert(0, Token(START_SYMBOL)) 88 | if self._source_add_end_token: 89 | tokenized_source.append(Token(END_SYMBOL)) 90 | 91 | source_field = TextField(tokenized_source, self._source_token_indexers) 92 | meta_fields = {"source_tokens": tokenized_source.copy()} 93 | fields_dict = {"source_tokens": source_field} 94 | if target_string is not None: 95 | tokenized_target = self._target_tokenizer.tokenize(target_string) 96 | tokenized_target.insert(0, Token(START_SYMBOL)) 97 | tokenized_target.append(Token(END_SYMBOL)) 98 | target_field = TextField(tokenized_target, self._target_token_indexers) 99 | fields_dict["target_tokens"] = target_field 100 | meta_fields["target_tokens"] = [y.text for y in tokenized_target[1:-1]] 101 | fields_dict["metadata"] = MetadataField(meta_fields) 102 | 103 | return Instance(fields_dict) 104 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/common_rules.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This grammar file has no main, therefore, cannot be loaded 3 | # standalone. Import using: 4 | # ; import common.txt 5 | # 6 | # The following rules must be defined in the importer 7 | # ; An object type 8 | # $object 9 | # ; Rule for finding a specific (named) person 10 | # $findp 11 | # ; A named or described person in the given place 12 | # $whowhere 13 | # 14 | ; grammar name Common 15 | 16 | ############################################################################## 17 | # 18 | # Manipulation 19 | # 20 | ############################################################################## 21 | $deliver = $take and $vbplace it on the {placement 2} 22 | $deliver = $vbplace the $object on the {placement 2} 23 | $deliver = $vbbring me the $object 24 | $deliver = $vbdeliver the $object to $someone 25 | $deliver = $takefrom and $vbplace it on the {placement 2} 26 | $deliver = $goplace, $vbfind the $object, and ($delivme | $delivat) 27 | $deliver = $goplace, $vbfind the $object, and $place 28 | 29 | 30 | 31 | ############################################################################## 32 | # 33 | # Find people 34 | # 35 | ############################################################################## 36 | $fndppl = $talk to $whowhere 37 | $fndppl = $findp in the {room} and $talk 38 | $fndppl = $goroom, $findp, and $talk 39 | 40 | 41 | 42 | ############################################################################## 43 | # 44 | # Find objects 45 | # 46 | ############################################################################## 47 | $fndobj = Tell me how many $object there are on the {placement} 48 | $fndobj = $vbfind the $object in the {room} 49 | 50 | 51 | ############################################################################## 52 | # 53 | # Follow people 54 | # 55 | ############################################################################## 56 | $follow = $vbfollow {name 1} from the {beacon 1} to the {room 2} 57 | $follow = meet {name 1} at the {beacon 1} and $vbfollow {pron} $fllwdest 58 | $follow = $gobeacon, meet {name 1}, and $vbfollow {pron} $fllwhdst 59 | 60 | $fllmeet = meet {name 1} | find a person 61 | $fllwdest = (to the {room 2}) | $fllwhdst 62 | $fllwhdst = {void meta: Follow {name 1} to the {room 2}} 63 | 64 | 65 | ############################################################################## 66 | # 67 | # Guide people 68 | # 69 | ############################################################################## 70 | $guide = $gdcmd $gdmeta 71 | 72 | $gdcmd = $vbguide {name 1} from the {beacon 1} to the {beacon 2} 73 | $gdcmd = meet {name 1} at the {beacon 1} and $guideto 74 | $gdcmd = $gobeacon, meet {name 1}, and $guideto 75 | $gdcmd = $vbguide {name 1} to the {beacon 2}, $gdwhere 76 | 77 | $guideto = $vbguide {pron} to the {beacon 2} 78 | $gdwhere = you (may | can | will) find {pron} at the {beacon 1} 79 | $gdmeta = {void meta: The person being guided must deviate when indicated by the referee before reaching the {beacon 2}} 80 | 81 | ############################################################################## 82 | # 83 | # Rules 84 | # 85 | ############################################################################## 86 | ; People 87 | $someone = me | $whowhere 88 | 89 | ; Place an object 90 | $place = $vbplace it on the {placement 2} 91 | 92 | ; Object properties 93 | $oprop = biggest | largest | smallest | heaviest | lightest | thinnest 94 | 95 | ; Navigating 96 | $goplace = $vbgopl to the {placement 1} 97 | $gobeacon = $vbgopl to the {beacon 1} 98 | $goroom = $vbgopl to the {room 1} 99 | 100 | ; Take an object 101 | $take = $vbtake the $object 102 | $takefrom = $take from the {placement 1} 103 | 104 | ; Deliver an object 105 | $delivme = $vbdeliver it to me 106 | $delivto = $vbdeliver it to {name} 107 | $delivat = $vbdeliver it to {name} at the {beacon} 108 | 109 | ; Speak 110 | $talk = $answer | $speak 111 | $answer = answer a {question} 112 | 113 | ; MODIFICATION: Make whatosay a wildcard instead of a nonterminal. 114 | ; This makes it easier to swap in more variations here, and better 115 | ; fits our anonymization scheme 116 | $speak = $vbspeak {whattosay} 117 | 118 | 119 | ############################################################################## 120 | # 121 | # What to say 122 | # 123 | ############################################################################## 124 | $whattosay = something about yourself 125 | $whattosay = the time 126 | $whattosay = what day is (today | tomorrow) 127 | $whattosay = your team's (name | country | affiliation) 128 | $whattosay = the day of the (week | month) 129 | $whattosay = a joke 130 | 131 | ############################################################################## 132 | # 133 | # Verbs 134 | # 135 | ############################################################################## 136 | $vbbtake = bring | take 137 | $vbplace = put | place 138 | $vbbring = bring | give 139 | $vbdeliver = $vbbring | deliver 140 | $vbtake = get | grasp | take | pick up 141 | $vbspeak = tell | say 142 | $vbgopl = go | navigate 143 | $vbgor = $vbgopl | enter 144 | $vbfind = find | locate | look for 145 | $vbguide = guide | escort | take | lead | accompany 146 | $vbfollow = follow 147 | 148 | 149 | ############################################################################## 150 | # 151 | # Polite 152 | # 153 | ############################################################################## 154 | $polite = {void} | Please 155 | $polite = Could you 156 | $polite = Robot please 157 | $polite = Could you please 158 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_1_semantics.txt: -------------------------------------------------------------------------------- 1 | # Predicates available: at(entity, location_name) 2 | # answer(question_entity) 3 | # count(entities) 4 | # Note that count is treated specially by the grounder; it'll take the expression 5 | # for `entities` and count the number of satisfying groundings. 6 | # female(entity) 7 | # is_a(entity, string) 8 | # location(entity) 9 | # name(entity, name_str) 10 | # male(entity) 11 | # person(entity) 12 | # sequence(command_predicate, command_predicate, [...]) 13 | # A special predicate that the system will evaluate by running the constituents 14 | # in order. 15 | 16 | # Preposition predicates: 17 | # above(entity, reference) 18 | # below(entity, reference) 19 | # left_of(entity, reference) 20 | # on_top_of(entity, reference) 21 | # right_of(entity, reference) 22 | # Superlatives: 23 | # biggest(entity, [int: in_top_k]) 24 | # smallest(entity, [int: in_top_k]) 25 | # heaviest(entity, [int: in_top_k]) 26 | # lightest(entity, [int: in_top_k]) 27 | # thinnest(entity, [int: in_top_k]) 28 | # 29 | # Commands: bring(object_entity[, person_entity) 30 | # find(entity) 31 | # follow(person_entity) 32 | # go(location_entity) 33 | # guide(person_entity, location_entity) 34 | # put(object_entity, location_entity) 35 | # say(string[, person_entity]) 36 | 37 | ################## 38 | # Find objects ($fndobj) 39 | ################## 40 | 41 | $vbfind the $object in the {room} = (find (λ$1:e (is_a $1 $object) (at $1 {room}))) 42 | $vbfind the {kobject?} in the {room} = (find (λ$1:e (is_a $1 {kobject?}) (at $1 {room}))) 43 | Tell me how many $object there are on the {placement} = (say (count (λ$1:e (is_a $1 $object) (at $1 {placement})))) 44 | 45 | $vbfind (someone | a person) in the {room} and $vbspeak {whattosay} = (say {whattosay} (λ$1:e (person $1) (at $1 {room}))) 46 | $vbfind (someone | a person) in the {room} and answer a {question} = (say (answer {question}) (λ$1:e (person $1) (at $1 {room}))) 47 | $vbfind {name} in the {room} and $vbspeak {whattosay} = (say {whattosay} (λ$1:e (person $1) (name $1 {name}) (at $1 {room}))) 48 | $vbfind {name} in the {room} and answer a {question} = (say (answer {question}) (λ$1:e (person $1) (name $1 {name}) (at $1 {room}))) 49 | $vbspeak {whattosay} to {name} at the {beacon 1} = (say {whattosay} (λ$1:e (person $1) (name $1 {name}) (at $1 {beacon 1}))) 50 | $vbgopl to the {room 1}, $vbfind (someone | a person), and answer a {question} = (say (answer {question}) (λ$1:e (person $1) (at $1 {room 1}))) 51 | $vbgopl to the {room 1}, $vbfind (someone | a person), and $vbspeak {whattosay} = (say {whattosay} (λ$1:e (person $1) (at $1 {room 1}))) 52 | $vbgopl to the {room 1}, $vbfind {name}, and $vbspeak {whattosay} = (say {whattosay} (λ$1:e (person $1) (name $1 {name}) (at $1 {room 1}))) 53 | $vbgopl to the {room 1}, $vbfind {name}, and answer a {question} = (say (answer {question}) (λ$1:e (person $1) (name $1 {name}) (at $1 {room 1}))) 54 | 55 | ############################## 56 | # Find people ($fndppl) 57 | ############################## 58 | 59 | Tell me the name of the person at the {beacon} = (say (λ$1:e (λ$2:e (person $2) (name $2 $1) (at $2 {beacon})))) 60 | Tell me the name of the person in the {room} = (say (λ$1:e (λ$2:e (person $2) (name $2 $1) (at $2 {room})))) 61 | answer a {question} to {name} at the {beacon 1} = (say (answer {question}) (λ$1:e (person $1) (name $1 {name}) (at $1 {beacon 1}))) 62 | 63 | ############################## 64 | # Manipulation ($deliver) 65 | ############################## 66 | $vbbring me the $object = (bring (λ$1:e (is_a $1 $object))) 67 | $vbbring me the {kobject} from the {placement} = (bring (λ$1:e (is_a $1 {kobject}) (at $1 {placement}))) 68 | $vbbring to {name} at the {beacon 1} the $object from the {placement} = (bring (λ$1:e (is_a $1 $object) (at $1 {placement})) (λ$1:e (person $1) (name $1 {name}) (at $1 {beacon 1}))) 69 | $vbdeliver the $object to me = (bring (λ$1:e (is_a $1 $object))) 70 | $vbdeliver the $object to {name} at the {beacon 1} = (bring (λ$1:e (is_a $1 $object)) (λ$1:e (person $1) (name $1 {name}) (at $1 {beacon 1}))) 71 | $vbgopl to the {placement 1}, $vbfind the $object, and $delivme = (bring (λ$1:e (is_a $1 $object) (at $1 {placement 1}))) 72 | $vbgopl to the {placement 1}, $vbfind the $object, and $vbdeliver it to {name} at the {beacon} = (bring (λ$1:e (is_a $1 $object) (at $1 {placement 1})) (λ$1:e (person $1) (name $1 {name}) (at $1 {beacon}))) 73 | $vbgopl to the {placement 1}, $vbfind the $object, and $vbplace it on the {placement 2} = (put (λ$1:e (is_a $1 $object) (at $1 {placement 1})) {placement 2}) 74 | $vbplace the $object on the {placement 2} = (put (λ$1:e (is_a $1 $object)) {placement 2}) 75 | $vbtake the $object and $vbplace it on the {placement 2} = (put (λ$1:e (is_a $1 $object)) {placement 2}) 76 | $vbtake the $object from the {placement 1} and $delivme = (bring (λ$1:e (is_a $1 $object) (at $1 {placement 1}))) 77 | $vbtake the $object from the {placement 1} and $vbdeliver it to {name} at the {beacon} = (bring (λ$1:e (is_a $1 $object) (at $1 {placement 1})) (λ$1:e (person $1) (at $1 {beacon})(name $1 {name}))) 78 | $vbtake the $object from the {placement 1} and $vbplace it on the {placement 2} = (put (λ$1:e (is_a $1 $object) (at $1 {placement 1})) {placement 2}) -------------------------------------------------------------------------------- /gpsr_command_understanding/anonymizer.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | 4 | 5 | class CaseInsensitiveDict(dict): 6 | def __setitem__(self, key, value): 7 | super(CaseInsensitiveDict, self).__setitem__(key.lower(), value) 8 | 9 | def __getitem__(self, key): 10 | return super(CaseInsensitiveDict, self).__getitem__(key.lower()) 11 | 12 | 13 | class Anonymizer(object): 14 | def __init__(self, objects, categories, names, locations, rooms, gestures, whattosay): 15 | """ 16 | Give the entities that will be anonymized. Matching and replacement is 17 | case insensitive. 18 | 19 | :param objects: 20 | :param categories: 21 | :param names: 22 | :param locations: 23 | :param rooms: 24 | :param gestures: 25 | :param whattosay: 26 | """ 27 | self.names = names 28 | self.categories = categories 29 | self.locations = locations 30 | self.rooms = rooms 31 | self.objects = objects 32 | self.gestures = gestures 33 | self.whattosay = whattosay 34 | replacements = CaseInsensitiveDict() 35 | for name in self.names: 36 | replacements[name] = "name" 37 | 38 | for location in self.locations: 39 | replacements[location] = "location" 40 | 41 | for room in self.rooms: 42 | replacements[room] = "room" 43 | 44 | # Note they're we're explicitly clumping beacons and placements (which may overlap) 45 | # together to make anonymizing/parsing easier. 46 | """ 47 | for beacon in self.beacons: 48 | replacements[beacon] = "location beacon" 49 | 50 | for placement in self.placements: 51 | replacements[placement] = "location placement" 52 | """ 53 | for object in self.objects: 54 | replacements[object] = "object" 55 | 56 | for gesture in self.gestures: 57 | replacements[gesture] = "gesture" 58 | 59 | for category in self.categories: 60 | replacements[category] = "category" 61 | 62 | for whattosay in self.whattosay: 63 | replacements[whattosay] = "whattosay" 64 | 65 | replacements["objects"] = "category" 66 | 67 | self.replacements = replacements 68 | escaped = {re.escape(k): v for k, v in replacements.items()} 69 | self.pattern = re.compile("\\b(" + "|".join(escaped.keys()) + ")\\b", re.IGNORECASE) 70 | 71 | def __call__(self, utterance, return_replacements=False): 72 | """ 73 | Replaces entity occurrences with their specified replacement string (usually a type token). 74 | "apple apple banana" -> "object object object" 75 | :param utterance: 76 | :param return_replacements: 77 | :return: 78 | """ 79 | anonymized = utterance 80 | replacements = defaultdict(lambda: set()) 81 | for match in self.pattern.finditer(utterance): 82 | match_str = match.group() 83 | replacements[match_str].add(self.replacements[match_str]) 84 | anonymized = anonymized.replace(match_str, self.replacements[match_str]) 85 | if return_replacements: 86 | return anonymized, replacements 87 | else: 88 | return anonymized 89 | 90 | @staticmethod 91 | def from_knowledge_base(kb): 92 | # Room is a subtype of location, but we make an exception and anonymize it as "roomN" 93 | isroom = kb.attributes["location"]["isroom"] 94 | rooms = [] 95 | for key, isroom in isroom.items(): 96 | if isroom: 97 | rooms.append(key) 98 | return Anonymizer(kb.by_name["object"], kb.by_name["category"], kb.by_name["name"], kb.by_name["location"], 99 | rooms, kb.by_name["gesture"], kb.by_name["whattosay"]) 100 | 101 | 102 | class NumberingAnonymizer(Anonymizer): 103 | @staticmethod 104 | def from_knowledge_base(kb): 105 | plain = Anonymizer.from_knowledge_base(kb) 106 | return NumberingAnonymizer(plain.objects, plain.categories, plain.names, plain.locations, plain.rooms, 107 | plain.gestures, plain.whattosay) 108 | 109 | def __call__(self, utterance, return_replacements=False): 110 | """ 111 | Replaces entities with some other token (usually a type token) with a number appended. 112 | "apple apple" -> "object0 object1" 113 | "apple banana" -> "object0 object1" 114 | We expect objects to be referred to once in commands, thus the decision to allow even the same 115 | word to be mapped to multiple numbers. We assume that a repeated occurrence is actually a 116 | separate entity with the same name. 117 | :param utterance: 118 | :param return_replacements: 119 | :return: 120 | """ 121 | anonymized = utterance 122 | replacements = defaultdict(lambda: set()) 123 | num_type_anon_so_far = defaultdict(lambda: 0) 124 | while True: 125 | match = self.pattern.search(anonymized) 126 | if not match: 127 | break 128 | string = match.group() 129 | replacement_type = self.replacements[string] 130 | 131 | current_num = num_type_anon_so_far[replacement_type] 132 | replacement_string = self.replacements[string] + str(current_num) 133 | num_type_anon_so_far[replacement_type] += 1 134 | replacements[string].add(replacement_string) 135 | anonymized = anonymized.replace(string, replacement_string, 1) 136 | if return_replacements: 137 | return anonymized, replacements 138 | else: 139 | return anonymized 140 | -------------------------------------------------------------------------------- /test/test_paired_generator.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | 4 | import unittest 5 | 6 | from lark import Tree, Token 7 | 8 | from gpsr_command_understanding.generator.tokens import ROOT_SYMBOL 9 | from gpsr_command_understanding.generator.grammar import NonTerminal, ComplexWildCard 10 | from gpsr_command_understanding.generator.knowledge import KnowledgeBase 11 | from gpsr_command_understanding.generator.loading_helpers import load_paired_2018_by_cat, load_paired, GRAMMAR_DIR_2018, \ 12 | GRAMMAR_DIR_2019, load_paired_2018, load_2018 13 | from gpsr_command_understanding.generator.paired_generator import PairedGenerator 14 | 15 | FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 16 | 17 | 18 | class TestPairedGenerator(unittest.TestCase): 19 | 20 | def setUp(self): 21 | kb = KnowledgeBase({"name": ["n1", "n2", "n3", "n4"], "location": ["l1", "l2"]}, {}) 22 | self.generator = PairedGenerator(kb, grammar_format_version=2018) 23 | with open(os.path.join(FIXTURE_DIR, "grammar.txt")) as fixture_grammar_file, open( 24 | os.path.join(FIXTURE_DIR, "semantics.txt")) as fixture_semantics_file: 25 | num_rules = self.generator.load_rules(fixture_grammar_file) 26 | num_semantic_rules = self.generator.load_semantics_rules(fixture_semantics_file) 27 | self.assertEqual(num_rules, 5) 28 | self.assertEqual(num_semantic_rules, 3) 29 | 30 | def test_load_2018(self): 31 | gpsr_2018_by_cat = load_paired_2018_by_cat(GRAMMAR_DIR_2018) 32 | expected_num_rules = [43, 46, 59] 33 | expected_num_rules_semantics = [32, 107, 42] 34 | for i, gen in enumerate(gpsr_2018_by_cat): 35 | rules, semantics = gen.rules, gen.semantics 36 | self.assertEqual(expected_num_rules[i], len(rules)) 37 | self.assertEqual(expected_num_rules_semantics[i], len(semantics)) 38 | """ 39 | for nonterm, rules in cat[0].items(): 40 | print(nonterm) 41 | print("") 42 | for rule in rules: 43 | print(rule.pretty()) 44 | print("---") 45 | """ 46 | all_2018 = load_paired_2018(GRAMMAR_DIR_2018) 47 | """ 48 | for nonterm, rules in all_2018.rules.items(): 49 | print(nonterm) 50 | print("") 51 | for rule in rules: 52 | print(rule.pretty()) 53 | print("---") 54 | """ 55 | self.assertEqual(62, len(all_2018.rules)) 56 | self.assertEqual(149, len(all_2018.semantics)) 57 | 58 | def test_load_2019_gpsr(self): 59 | gen = load_paired("gpsr", GRAMMAR_DIR_2019) 60 | 61 | rules, semantics = gen.rules, gen.semantics 62 | self.assertEqual(71, len(rules)) 63 | self.assertEqual(0, len(semantics)) 64 | # To manually inspect correctness for now... 65 | """for nonterm, rules in all_2019[0].items(): 66 | print(nonterm) 67 | print("") 68 | for rule in rules: 69 | print(rule.pretty()) 70 | print("---")""" 71 | 72 | def test_load_2019_egpsr(self): 73 | gen = load_paired("egpsr", GRAMMAR_DIR_2019) 74 | 75 | rules, semantics = gen.rules, gen.semantics 76 | self.assertEqual(100, len(rules)) 77 | self.assertEqual(0, len(semantics)) 78 | # To manually inspect correctness for now... 79 | """for nonterm, rules in all_2019[0].items(): 80 | print(nonterm) 81 | print("") 82 | for rule in rules: 83 | print(rule.pretty()) 84 | print("---")""" 85 | 86 | def test_generate_pairs(self): 87 | pairs = list(self.generator.generate(NonTerminal("Main"))) 88 | self.assertEqual(6, len(pairs)) 89 | 90 | def test_generate_pairs_2018(self): 91 | generator = load_2018(GRAMMAR_DIR_2018) 92 | paired_generator = load_paired_2018(GRAMMAR_DIR_2018) 93 | # If we don't require semantics, the same rules should generate the same number of expansions 94 | pairs = list(paired_generator.generate(ROOT_SYMBOL, yield_requires_semantics=False)) 95 | sentences = list(generator.generate(ROOT_SYMBOL)) 96 | self.assertEqual(len(pairs), len(sentences)) 97 | 98 | def test_ground(self): 99 | def expr_builder(string): 100 | return Tree("expression", string.split(" ")) 101 | 102 | test_tree = Tree("expression", ["Say", "hi", "to", ComplexWildCard("name", wildcard_id=1), "and", 103 | ComplexWildCard("name", wildcard_id=2)]) 104 | test_sem = Tree("expression", [ 105 | Tree("greet", [ComplexWildCard("name", wildcard_id=1), ComplexWildCard("name", wildcard_id=2)])]) 106 | expected_utt = expr_builder("Say hi to n1 and n2") 107 | expected_logical = Tree("expression", 108 | [Tree("greet", [Token("ESCAPED_STRING", "\"n1\""), Token("ESCAPED_STRING", "\"n2\"")])]) 109 | # FIXME: Setup a mocked knowledgebase 110 | self.assertEqual((expected_utt, expected_logical), self.generator.ground((test_tree, test_sem))) 111 | 112 | # Never repeat 113 | test_tree = Tree("expression", [ComplexWildCard("name", wildcard_id=1), ComplexWildCard("name", wildcard_id=2)]) 114 | groundings = self.generator.generate_groundings((test_tree, test_tree)) 115 | for ground_utt, ground_logical in groundings: 116 | first, second = ground_utt.children 117 | self.assertNotEqual(first, second) 118 | 119 | test_tree = Tree("expression", 120 | [ComplexWildCard("location", wildcard_id=2), "and", ComplexWildCard("name", wildcard_id=2)]) 121 | expected = expr_builder("l1 and n1") 122 | self.assertEqual((expected, Tree(None, [])), self.generator.ground((test_tree, Tree(None, [])))) 123 | -------------------------------------------------------------------------------- /gpsr_command_understanding/resources/generator2018/gpsr_category_3_semantics.txt: -------------------------------------------------------------------------------- 1 | ##### 2 | # Manipulation 3 | ##### 4 | 5 | $polite $vbtake the {object 1} from the {beacon 1} to the {beacon 2} = (put (λ$1:e (is_a $1 {object 1}) (at $1 {beacon 1})){beacon 2}) 6 | 7 | # Note: Good example of underspecified command that has a well formed parse 8 | $polite $vbtake the {object 1} to the {beacon 2} = (put (λ$1:e(is_a $1 {object 1})){beacon 2}) 9 | $polite $vbtake the {object 1} to the {placement 2} = (put (λ$1:e(is_a $1 {object 1})){placement 2}) 10 | 11 | ###### 12 | # Bring 13 | ###### 14 | 15 | $polite $vbdeliver me the {object 1} = (bring (λ$1:e(is_a $1 {object 1}))) 16 | $polite $vbdeliver {object?} to me = (bring (λ$1:e(is_a $1 {object?}))) 17 | $polite $vbdeliver me the {object?} = (bring (λ$1:e(is_a $1 {object?}))) 18 | $polite $vbdeliver {object?} to {name 1} = (bring (λ$1:e(is_a $1 {object?})) (λ$1:e (person $1)(name $1 {name 1}))) 19 | $polite $vbdeliver {object?} to the {gesture} person in the {room} = (bring (λ$1:e(is_a $1 {object?})) (λ$1:e (person $1)(is $1 {gesture})(at $1 {room}))) 20 | 21 | 22 | ##### 23 | # Manipulation 24 | ## 25 | 26 | $polite $vbplace the {object 1} on the {placement 2} = (put (λ$1:e(is_a $1 {object 1})){placement 2}) 27 | $polite $vbbring the {object 1} to the {placement 2} = (put (λ$1:e(is_a $1 {object 1})){placement 2}) 28 | 29 | ######################################### 30 | # Complex manipulation ($complexman) 31 | ######################################### 32 | 33 | $polite Pour some cereals into the bowl = (pour (λ$1:e(is_a $1 "cereal")) (λ$1:e(is_a $1 "bowl"))) 34 | 35 | $polite $vbbtake the {kobject} to the {placement 2} = (put (λ$1:e (is_a $1 {kobject})){placement 2}) 36 | $polite $vbbring me the left most object from the {placement 2} = (bring (λ$1:e(leftmost $1{placement 2}))) 37 | $polite $vbbring me the right most object from the {placement 2} = (bring (λ$1:e(rightmost $1{placement 2}))) 38 | $polite $vbbring me the object at the left of the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(left_of $1 $2))){placement 2}) 39 | $polite $vbbring me the object at the right of the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(right_of $1 $2))){placement 2}) 40 | $polite $vbbring me the object on top of the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(on_top_of $1 $2))){placement 2}) 41 | $polite $vbbring me the object above the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(above $1 $2))){placement 2}) 42 | $polite $vbbring me the object behind the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(behind $1 $2))){placement 2}) 43 | $polite $vbbring me the object under the {object} from the {placement 2} = (bring (λ$1:e(λ$2:e (is_a $2 {object})(below $1 $2))){placement 2}) 44 | $polite $vbbring me the $oprop {category} from the {placement 2} = (bring (λ$1:e (is_a $1 {category})($oprop $1)){placement 2}) 45 | $polite $vbbring me the $oprop object from the {placement 2} = (bring (λ$1:e ($oprop $1)){placement 2}) 46 | 47 | ##### 48 | # Info gathering 49 | ##### 50 | $polite Tell me how many $object there are on the {placement} = (say (count (λ$1:e (is_a $1 $object) (at $1 {placement})))) 51 | $polite tell me which are the three $oprop objects on the {placement 1} = (say (λ$1:e ($oprop $1 3) (at $1 {placement 1}))) 52 | $polite tell me which are the three $oprop objects on the {placement 1} = (say (λ$1:e ($oprop $1 3) (at $1 {placement 1}))) 53 | $polite tell me which are the three $oprop {category} on the {placement 1} = (say (λ$1:e ($oprop $1 3) (is_a $1 {category}) (at $1 {placement 1}))) 54 | $polite tell me which are the three $oprop {category} on the {placement 1} = (say (λ$1:e ($oprop $1 3) (is_a $1 {category}) (at $1 {placement 1}))) 55 | 56 | ##### 57 | # Guide 58 | ##### 59 | $polite meet {name 1} and $vbguide {pron} = (guide (λ$1:e (person $1) (name $1 {name 1})) UNKNOWN) 60 | $polite $vbguide {name 1} from the {beacon 1} to the {beacon 1} = (guide (λ$1:e (person $1) (name $1 {name 1})(at $1 {beacon 1})) {beacon 1}) 61 | $polite $vbguide {name 1} from the {beacon 1} to the {beacon 2} = (guide (λ$1:e (person $1) (name $1 {name 1})(at $1 {beacon 1})){beacon 2}) 62 | $polite $vbguide {name 1} to the {beacon 2} = (guide (λ$1:e (person $1) (name $1 {name 1})){beacon 2}) 63 | $polite $vbgopl to the {beacon 1}, meet {name 1}, and $vbguide {pron} = (guide (λ$1:e (person $1) (name $1 {name 1}) (at $1 {beacon 1})) UNKNOWN) 64 | 65 | $polite $vbguide the {object 1} from the {beacon 1} to the {beacon 2} = UNKNOWN 66 | $polite $vbguide the {category 1} from the {beacon 1} to the {beacon 2} = UNKNOWN 67 | 68 | ##### 69 | # Sequence 70 | ##### 71 | $polite $fllwoutdest meet {name 1} at the {beacon 1}, $vbfollow {pron}, and $vbgopl to the {room 1} = (sequence (follow (λ$1:e (person $1) (name $1 {name 1})(at $1 {beacon 1}))) (go {room 1})) 72 | $polite $fllwoutdest meet {name 1} at the {beacon 1}, $vbfollow {pron}, and $vbguide {pron} back = (λ$1:e(sequence (follow (person $1) (name $1 {name 1})(at $1 {beacon 1})) (guide $1 "current"))) 73 | 74 | ##### 75 | # Follow 76 | ##### 77 | 78 | $polite $vbgopl to the {beacon 1}, meet {name 1}, and $vbfollow {pron} to the {beacon 1} = (follow (λ$1:e (person $1) (name $1 {name 1})(at $1 {beacon 1})) {beacon 1}) 79 | $polite $vbgopl to the {beacon 1}, meet {name 1}, and $vbfollow {pron} to the {room 2} = (follow (λ$1:e (person $1) (name $1 {name 1})(at $1 {beacon 1}))) 80 | 81 | $polite follow {name 1} = (follow (λ$1:e (person $1) (name $1 {name 1}))) 82 | 83 | $polite $vbgopl to the {beacon 1}, meet the {category 1}, and follow it = UNKNOWN 84 | $polite $vbgopl to the {beacon 1}, meet the {object 1}, and follow it = UNKNOWN 85 | 86 | 87 | ###### 88 | # Find 89 | ###### 90 | 91 | $polite $vbfind the $object in the {room} = (find (λ$1:e (is_a $1 $object) (at $1 {room}))) 92 | $polite $vbfind three {category} in the {room} = (find (λ$1:e (is_a $1 {category}) (at $1 {room})) 3) 93 | 94 | ###### 95 | # Go 96 | ###### 97 | 98 | $polite $vbgopl to the {room 1} = (go {room 1}) --------------------------------------------------------------------------------