├── dialog_simulator
    ├── memories
    │   └── mini_set
    │   │   ├── memories_metadata_all.json
    │   │   └── mini_set_0_memory_graph.json
    ├── SimulatorBase.py
    ├── DummyMemoryDialogModel.py
    ├── get_user_utterances.py
    ├── constants.py
    ├── InteractiveDialogHandler.py
    ├── merge_data_json.py
    ├── merge_synth_and_appen.py
    ├── MemoryDialogSimulator.py
    ├── GoalGenerator.py
    ├── MemoryDialogModel.py
    ├── main.py
    ├── utils.py
    └── MemoryServiceAPI.py
├── models
    ├── gpt2_mm
    │   ├── requirements.txt
    │   ├── LICENSE
    │   ├── run_me.sh
    │   ├── README.md
    │   ├── utils
    │   │   ├── create_result_jsons.py
    │   │   ├── preprocess_memory_dataset.py
    │   │   └── extract_memory_features.py
    │   ├── dataset_memory.py
    │   └── dataset.py
    └── gpt2_text
    │   ├── run_evaluate_gpt2.sh
    │   ├── run_generate_gpt2.sh
    │   ├── run_train_gpt2.sh
    │   ├── gpt2_dst
    │       └── scripts
    │       │   ├── evaluate.py
    │       │   ├── get_best_model.py
    │       │   ├── preprocess_input.py
    │       │   ├── evaluate_response.py
    │       │   ├── reformat_dst_response_outputs.py
    │       │   └── run_generation.py
    │   ├── run_preprocess_gpt2.sh
    │   └── utils
    │       └── response_evaluation.py
├── teaser_memory_dialog.png
├── data
    ├── mem_dials_test.json
    ├── mem_dials_val.json
    ├── mem_dials_merged.json
    ├── mem_dials_train.json
    ├── memory_may21_v1_100graphs.json
    └── mscoco_memory_graphs_1k.json
├── .gitattributes
├── CONTRIBUTING.md
├── .gitignore
├── CODE_OF_CONDUCT.md
└── README.md


/dialog_simulator/memories/mini_set/memories_metadata_all.json:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/models/gpt2_mm/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.0.1
2 | pytorch-ignite==0.2.1
3 | transformers==2.1.1
4 | tqdm==4.36.1
5 | 
6 | 


--------------------------------------------------------------------------------
/teaser_memory_dialog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/comet_memory_dialog/HEAD/teaser_memory_dialog.png


--------------------------------------------------------------------------------
/data/mem_dials_test.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ecbe949172b8878737e2806bd4ba8369c5d3b473f336e4b0ddec86ec2fdf1b7a
3 | size 8252698
4 | 


--------------------------------------------------------------------------------
/data/mem_dials_val.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dbffd2c3a225a0bb566670e3221ecf737903048a9b42b932433cb7c76f024e29
3 | size 8364828
4 | 


--------------------------------------------------------------------------------
/data/mem_dials_merged.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8c42a83c9d13f358895f62332713d6c9fd19cc31c5efbb21c5e1ec9fe49a6634
3 | size 156707220
4 | 


--------------------------------------------------------------------------------
/data/mem_dials_train.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:79adbb17be1565988c0afec8e72ee74b2ad014c2cbc86a5f4f81d91579be7ff9
3 | size 38741569
4 | 


--------------------------------------------------------------------------------
/data/memory_may21_v1_100graphs.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dcd5307759d0b4f80842773f50af1096f8e8df04f105f012b8aa02810158fb63
3 | size 22364689
4 | 


--------------------------------------------------------------------------------
/data/mscoco_memory_graphs_1k.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7f45f24f7497bb29abaab75c04fe91959f0ef20b7cfa02df13ebc7ed0beeb242
3 | size 225170054
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | data/memory_may21_v1_100graphs.json filter=lfs diff=lfs merge=lfs -text
2 | data/mscoco_memory_graphs_1k.json filter=lfs diff=lfs merge=lfs -text
3 | data/mem_dials_merged.json filter=lfs diff=lfs merge=lfs -text
4 | data/mem_dials_test.json filter=lfs diff=lfs merge=lfs -text
5 | data/mem_dials_train.json filter=lfs diff=lfs merge=lfs -text
6 | data/mem_dials_val.json filter=lfs diff=lfs merge=lfs -text
7 | 


--------------------------------------------------------------------------------
/models/gpt2_text/run_evaluate_gpt2.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/bin/bash
 3 | if [[ $# -lt 1 ]]
 4 | then
 5 |     PATH_DIR=$(realpath .)
 6 | else
 7 |     PATH_DIR=$(realpath "$1")
 8 | fi
 9 | 
10 | 
11 | python -m gpt2_dst.scripts.evaluate_dst_response \
12 |     --input_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_target.txt \
13 |     --input_path_predicted="${PATH_DIR}"/gpt2_dst/results/mem_dials_test_predicted.txt \
14 |     --compute_bert_score
15 | 


--------------------------------------------------------------------------------
/models/gpt2_text/run_generate_gpt2.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/bin/bash
 3 | if [[ $# -lt 1 ]]
 4 | then
 5 |     PATH_DIR=$(realpath .)
 6 | else
 7 |     PATH_DIR=$(realpath "$1")
 8 | fi
 9 | 
10 | # Generate sentences (Furniture, multi-modal)
11 | python3 -m gpt2_dst.scripts.run_generation \
12 |     --model_type=gpt2 \
13 |     --model_name_or_path="${PATH_DIR}"/gpt2_dst/save/model_run0/checkpoint-23000 \
14 |     --num_return_sequences=1 \
15 |     --length=100 \
16 |     --stop_token='<EOS>' \
17 |     --prompts_from_file="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_predict.txt \
18 |     --path_output="${PATH_DIR}"/gpt2_dst/results/mem_dials_test_predicted_run0.txt
19 | 


--------------------------------------------------------------------------------
/dialog_simulator/SimulatorBase.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | #!/usr/bin/env python3
 5 | from Data import MemoryDialog, Goal, Frame
 6 | from typing import List
 7 | 
 8 | 
 9 | class SimulatorBase:
10 |     def register_memory_service_api(self, memory_service_api):
11 |         self.memory_service_api = memory_service_api
12 | 
13 |     def fit_goal_to_intent(self, args):
14 |         # Define the goal to intent mapping behavior
15 |         pass
16 | 
17 |     def is_servable(self, goal: Goal) -> bool:
18 |         # Check whether this simulator can serve the input goal.
19 |         pass
20 | 
21 |     def generate_nlu_label(self, goal: Goal, context: MemoryDialog) -> Frame:
22 |         # Need to define this behavior first e.g. as a config, a model, etc.
23 |         pass
24 | 
25 |     def generate_uttr(self, nlu_label: Frame) -> str:
26 |         pass
27 | 


--------------------------------------------------------------------------------
/models/gpt2_text/run_train_gpt2.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/bin/bash
 3 | if [[ $# -lt 1 ]]
 4 | then
 5 |     PATH_DIR=$(realpath .)
 6 | else
 7 |     PATH_DIR=$(realpath "$1")
 8 | fi
 9 | 
10 | # Train (multi-modal)
11 | python3 -m gpt2_dst.scripts.run_language_modeling \
12 |     --output_dir="${PATH_DIR}"/gpt2_dst/save/model_run2 \
13 |     --model_type=gpt2 \
14 |     --model_name_or_path=gpt2 \
15 |     --line_by_line \
16 |     --add_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
17 |     --do_train \
18 |     --train_data_file="${PATH_DIR}"/gpt2_dst/data/mem_dials_train_target.txt \
19 |     --do_eval --eval_all_checkpoints \
20 |     --eval_data_file="${PATH_DIR}"/gpt2_dst/data/mem_dials_val_target.txt \
21 |     --num_train_epochs=10 \
22 |     --overwrite_output_dir \
23 |     --per_gpu_train_batch_size=4 \
24 |     --per_gpu_eval_batch_size=4
25 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ICTNLP
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dialog_simulator/DummyMemoryDialogModel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
 2 | 
 3 | #!/usr/bin/env python3
 4 | from constants import API_CALL_TYPE, TurnSpeaker, DialogAct
 5 | from Data import Turn, Frame, ActAttributes, MemoryDialog, APIResponse, APIRequest
 6 | from typing import Dict, Tuple
 7 | 
 8 | 
 9 | class DummyMemoryDialogModel(MemoryDialogModelBase):
10 |     def __init__(self, *args, **kwargs):
11 |         super(DummyMemoryDialogModel, self).__init__(*args, **kwargs)
12 | 
13 |     def predict_api_call(self, query: str) -> Dict:
14 |         return {
15 |             "call_type": API_CALL_TYPE.SEARCH,
16 |             "dialog_act": DialogAct.UNKNOWN,
17 |             "slot_values": {},
18 |             "request_slots": [],
19 |             "memories": [],
20 |         }
21 | 
22 |     def predict_assistant_response(
23 |         self, query: str, api_response: APIResponse, memory_dialog: MemoryDialog
24 |     ):
25 | 
26 |         response_str = (
27 |             "User asked:"
28 |             + query
29 |             + ". Dialog history: "
30 |             + str(memory_dialog)
31 |             + ". API response:"
32 |             + str(api_response)
33 |         )
34 | 
35 |         return {
36 |             "uttr": response_str,
37 |             "dialog_act": DialogAct.UNKNOWN,
38 |             "slot_values": {},
39 |             "request_slots": [],
40 |             "memories": api_response.results.get("retrieved_memories"),
41 |         }
42 | 


--------------------------------------------------------------------------------
/dialog_simulator/get_user_utterances.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | #!/usr/bin/env python3
 5 | """
 6 |     Merges multiple batches of SIMMC 2.0 files into one,
 7 |     and also outputs train, dev, devtest, and test sets.
 8 | """
 9 | import os
10 | import json
11 | import csv
12 | import random
13 | import pickle
14 | import numpy as np
15 | from utils import load_data_pickle
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     random.seed(0)
20 |     np.random.seed(0)
21 | 
22 |     # Paths for merge
23 |     path_in_pickle = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_merged.p"
24 |     path_out_tsv = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/user_utterances.tsv"
25 | 
26 |     mm_dialogs = []
27 |     mm_dialogs.extend(load_data_pickle(path_in_pickle))
28 | 
29 |     # Output
30 |     print("Total: %d dialogs" % len(mm_dialogs))
31 | 
32 |     with open(path_out_tsv, "w", newline="") as csvfile:
33 |         writer = csv.writer(csvfile, delimiter="\t", quotechar="'")
34 |         writer.writerow(["dialog_id", "turn_id", "user_utterance"])
35 | 
36 |         for i, mm_dialog in enumerate(mm_dialogs):
37 |             user_turns = mm_dialog.dialog.user_turns
38 |             dialog_id = mm_dialog.dialog.idx
39 | 
40 |             for j, user_turn in enumerate(user_turns):
41 |                 user_uttr = user_turn.frames[-1].uttr
42 | 
43 |                 if user_uttr not in set(["N/A", "NA"]):
44 |                     row = [dialog_id, j, user_uttr]
45 |                     writer.writerow(row)
46 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to comet\_memory\_dialog
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | ... (in particular how this is synced with internal changes to the project)
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `main`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Meta's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces for indentation rather than tabs
34 | * 80 character line length
35 | * ...
36 | 
37 | ## License
38 | By contributing to comet\_memory\_dialog, you agree that your contributions will be licensed
39 | under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/evaluate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/usr/bin/env python3
 3 | """
 4 |     Scripts for evaluating the GPT-2 DST model predictions.
 5 | 
 6 |     First, we parse the line-by-line stringified format into
 7 |     the structured DST output.
 8 | 
 9 |     We then run the main DST Evaluation script to get results.
10 | """
11 | import argparse
12 | import json
13 | from gpt2_dst.utils.convert import parse_flattened_results_from_file
14 | from utils.evaluate_dst import evaluate_from_flat_list
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     # Parse input args
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument(
21 |         "--input_path_target", help="path for target, line-separated format (.txt)"
22 |     )
23 |     parser.add_argument(
24 |         "--input_path_predicted",
25 |         help="path for model prediction output, line-separated format (.txt)",
26 |     )
27 |     parser.add_argument(
28 |         "--output_path_report", help="path for saving evaluation summary (.json)"
29 |     )
30 | 
31 |     args = parser.parse_args()
32 |     input_path_target = args.input_path_target
33 |     input_path_predicted = args.input_path_predicted
34 |     output_path_report = args.output_path_report
35 | 
36 |     # Convert the data from the GPT-2 friendly format to JSON
37 |     list_target = parse_flattened_results_from_file(input_path_target)
38 |     list_predicted = parse_flattened_results_from_file(input_path_predicted)
39 | 
40 |     # Evaluate
41 |     report = evaluate_from_flat_list(list_target, list_predicted)
42 | 
43 |     # Save report
44 |     with open(output_path_report, "w") as f_out:
45 |         json.dump(report, f_out)
46 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/get_best_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #! /usr/bin/env python
 3 | """
 4 | Gets the best model given all the checkpoints.
 5 | 
 6 | Author(s): Satwik Kottur
 7 | """
 8 | 
 9 | from __future__ import absolute_import, division, print_function, unicode_literals
10 | import argparse
11 | import os
12 | import re
13 | 
14 | 
15 | def main(args):
16 |     for folder_name in args["model_checkpoint_folder"]:
17 |         listing = [ii for ii in os.listdir(folder_name) if "checkpoint-" in ii]
18 |         valid_metrics = {}
19 |         for checkpoint_name in listing:
20 |             checkpoint_folder = os.path.join(folder_name, checkpoint_name)
21 |             eval_path = os.path.join(checkpoint_folder, "eval_results.txt")
22 |             epoch_search = re.search(r"checkpoint-(\d*)", checkpoint_name)
23 |             with open(eval_path, "r") as file_id:
24 |                 result = [ii.strip("\n") for ii in file_id.readlines()][0]
25 |             perplexity_search = re.search(r"([0-9\.]+)", result)
26 | 
27 |             # NOTE: Does not handle error conditions.
28 |             if perplexity_search is None or epoch_search is None:
29 |                 print(f"Missing epoch: {checkpoint_name}")
30 |                 continue
31 | 
32 |             perplexity = float(perplexity_search.group(1))
33 |             epoch = int(epoch_search.group(1))
34 |             valid_metrics[epoch] = perplexity
35 | 
36 |         best_epoch, _ = sorted(valid_metrics.items(), key=lambda x: x[1])[0]
37 |         best_folder = os.path.join(folder_name, f"checkpoint-{best_epoch}")
38 |         print(best_folder)
39 |         print("." * 50)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     parser = argparse.ArgumentParser(description=__doc__)
44 |     parser.add_argument(
45 |         "--model_checkpoint_folder",
46 |         nargs="+",
47 |         required=True,
48 |         help="List of model checkpoint folders",
49 |     )
50 | 
51 |     try:
52 |         parsed_args = vars(parser.parse_args())
53 |     except (IOError) as msg:
54 |         parser.error(str(msg))
55 |     main(parsed_args)
56 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Ignore the data folder.
132 | data
133 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/preprocess_input.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 4 |     Scripts for converting the main SIMMC datasets (.JSON format)
 5 |     into the line-by-line stringified format (and back).
 6 | 
 7 |     The reformatted data is used as input for the GPT-2 based
 8 |     DST model baseline.
 9 | """
10 | from gpt2_dst.utils.convert import convert_json_to_flattened
11 | import argparse
12 | 
13 | if __name__ == "__main__":
14 |     # Parse input args
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument(
17 |         "--input_path_json", help="input path to the original dialog data"
18 |     )
19 |     parser.add_argument("--output_path_predict", help="output path for model input")
20 |     parser.add_argument("--output_path_target", help="output path for full target")
21 |     parser.add_argument(
22 |         "--input_path_special_tokens",
23 |         help="input path for special tokens. blank if not provided",
24 |         default="",
25 |     )
26 |     parser.add_argument(
27 |         "--output_path_special_tokens",
28 |         help="output path for special tokens. blank if not saving",
29 |         default="",
30 |     )
31 |     parser.add_argument(
32 |         "--len_context",
33 |         help="# of turns to include as dialog context",
34 |         type=int,
35 |         default=2,
36 |     )
37 |     parser.add_argument(
38 |         "--use_multimodal_contexts",
39 |         help="determine whether to use the multimodal contexts each turn",
40 |         type=int,
41 |         default=1,
42 |     )
43 |     parser.add_argument(
44 |         "--no_belief_states",
45 |         dest="use_belief_states",
46 |         action="store_false",
47 |         default=True,
48 |         help="determine whether to use belief state for each turn",
49 |     )
50 | 
51 |     args = parser.parse_args()
52 |     input_path_json = args.input_path_json
53 |     output_path_predict = args.output_path_predict
54 |     output_path_target = args.output_path_target
55 |     input_path_special_tokens = args.input_path_special_tokens
56 |     output_path_special_tokens = args.output_path_special_tokens
57 |     len_context = args.len_context
58 |     use_multimodal_contexts = bool(args.use_multimodal_contexts)
59 | 
60 |     # DEBUG:
61 |     print("Belief states: {}".format(args.use_belief_states))
62 | 
63 |     # Convert the data into GPT-2 friendly format
64 |     convert_json_to_flattened(
65 |         input_path_json,
66 |         output_path_predict,
67 |         output_path_target,
68 |         input_path_special_tokens=input_path_special_tokens,
69 |         output_path_special_tokens=output_path_special_tokens,
70 |         len_context=len_context,
71 |         use_multimodal_contexts=use_multimodal_contexts,
72 |         use_belief_states=args.use_belief_states,
73 |     )
74 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/run_me.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | FEATURES="butd"
 3 | LOG_PATH="logs/"
 4 | MODE="train"
 5 | GPU_ID=0
 6 | 
 7 | # Test flags.
 8 | MODEL_EPOCH=6
 9 | OUTPUT_RESULT_FILE="$LOG_PATH/model_ep${MODEL_EPOCH}_generate.json"
10 | 
11 | # Visual features.
12 | FEATURE_PATH="data/memory_features/butd_10w_features/"
13 | VISUAL_FEATURE_SIZE=2053
14 | VISUAL_FEATURE_WIDTH=10
15 | 
16 | case $MODE in
17 | "train")
18 |     echo "Training.."
19 |     # Train Memory Dialog Model.
20 |     CUDA_VISIBLE_DEVICES=$GPU_ID \
21 |     python train.py --log_path $LOG_PATH \
22 |         --train_path "data/gpt2_data/mem_dials_gpt2_train.json" \
23 |         --valid_path "data/gpt2_data/mem_dials_gpt2_val.json" \
24 |         --special_tokens_path "data/gpt2_data/mem_dials_gpt2_special_tokens.json" \
25 |         --train_batch_size 8 \
26 |         --predict_belief_state \
27 |         --n_epochs 20 \
28 |         --feature_path $FEATURE_PATH \
29 |         --visual_feature_size $VISUAL_FEATURE_SIZE \
30 |         --visual_feature_width $VISUAL_FEATURE_WIDTH
31 |         ;;
32 | 
33 | "generate")
34 |     # Generate responses from Memory Dialog Model.
35 |     CUDA_VISIBLE_DEVICES=$GPU_ID \
36 |         python generate.py \
37 |             --model_checkpoint $LOG_PATH \
38 |             --model_epoch $MODEL_EPOCH \
39 |             --test_set "data/gpt2_data/mem_dials_gpt2_test.json" \
40 |             --special_tokens_path "data/gpt2_data/mem_dials_gpt2_special_tokens.json" \
41 |             --feature_path $FEATURE_PATH \
42 |             --visual_feature_size $VISUAL_FEATURE_SIZE \
43 |             --visual_feature_width $VISUAL_FEATURE_WIDTH \
44 |             --output $OUTPUT_RESULT_FILE \
45 |             --max_len 100
46 |             ;;
47 | 
48 | "compile")
49 |     # Compile results and create JSON files to run standard evaluation.
50 |     python utils/create_result_jsons.py \
51 |         --memory_test_json "data/mem_dials_test.json" \
52 |         --model_output_json $OUTPUT_RESULT_FILE
53 |     ;;
54 | esac
55 | 
56 | 
57 | FEATURE_PATH="/data/img_feats1.0/visdial_img_feat.lmdb"
58 | # Extracting visual features (BUTD features).
59 | # python utils/extract_memory_features.py \
60 | #     --input_dialog_json data/mem_dials_merged.json \
61 | #     --input_memory_json \
62 | #         data/memory_may21_v1_100graphs.json \
63 | #         data/mscoco_memory_graphs_1k.json \
64 | #     --input_feature_path $FEATURE_PATH \
65 | #     --max_bboxes 10 \
66 | #     --feature_save_path data/memory_features/butd_10w_features/ \
67 | #     --feature_type butd
68 | 
69 | 
70 | # Preprocessing the dataset.
71 | # python utils/preprocess_memory_dataset.py \
72 | #     --train_json_path "data/mem_dials_train.json" \
73 | #     --unseen_json_path \
74 | #         "data/mem_dials_val.json" \
75 | #         "data/mem_dials_test.json" \
76 | #     --save_folder "data/gpt2_data/"
77 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/evaluate_response.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/usr/bin/env python3
 3 | """
 4 |     Scripts for evaluating the GPT-2 DST model predictions.
 5 | 
 6 |     First, we parse the line-by-line stringified format into responses
 7 |     and compute BLEU score.
 8 | """
 9 | import argparse
10 | import json
11 | from gpt2_dst.utils.convert import parse_flattened_results_from_file
12 | from utils.evaluate_dst import evaluate_from_flat_list
13 | 
14 | import nltk
15 | import numpy as np
16 | 
17 | 
18 | def normalize_sentence(sentence):
19 |     """Normalize the sentences and tokenize."""
20 |     return nltk.tokenize.word_tokenize(sentence.lower())
21 | 
22 | 
23 | def parse_response_from_file(input_path):
24 |     """Parses the response from a flattened file.
25 | 
26 |     Args:
27 |         input_path: Path to read the responses from.
28 |     """
29 |     lines = []
30 |     with open(input_path, "r") as file_id:
31 |         for ii in file_id.readlines():
32 |             split_line = ii.split("<SOR>", 1)
33 |             lines.append(
34 |                 (split_line[0].strip("\n"), split_line[1].strip("\n").strip("<EOS>"))
35 |             )
36 |     return lines
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     # Parse input args
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument(
43 |         "--input_path_target", help="path for target, line-separated format (.txt)"
44 |     )
45 |     parser.add_argument(
46 |         "--input_path_predicted",
47 |         help="path for model prediction output, line-separated format (.txt)",
48 |     )
49 |     parser.add_argument(
50 |         "--output_path_report", help="path for saving evaluation summary (.json)"
51 |     )
52 | 
53 |     args = parser.parse_args()
54 |     input_path_target = args.input_path_target
55 |     input_path_predicted = args.input_path_predicted
56 |     output_path_report = args.output_path_report
57 | 
58 |     # Convert the data from the GPT-2 friendly format to JSON
59 |     list_target = parse_response_from_file(input_path_target)
60 |     list_predicted = parse_response_from_file(input_path_predicted)
61 | 
62 |     # Compute BLEU scores.
63 |     bleu_scores = []
64 |     # Smoothing function.
65 |     chencherry = nltk.translate.bleu_score.SmoothingFunction()
66 | 
67 |     for response, gt_response in zip(list_predicted, list_target):
68 |         assert response[0] == gt_response[0], "Input contexts do not match!"
69 |         bleu_score = nltk.translate.bleu_score.sentence_bleu(
70 |             [normalize_sentence(gt_response[1])],
71 |             normalize_sentence(response[1]),
72 |             smoothing_function=chencherry.method7,
73 |         )
74 |         bleu_scores.append(bleu_score)
75 |     print(
76 |         "BLEU score: {} +- {}".format(
77 |             np.mean(bleu_scores), np.std(bleu_scores) / np.sqrt(len(bleu_scores))
78 |         )
79 |     )
80 | 


--------------------------------------------------------------------------------
/models/gpt2_text/run_preprocess_gpt2.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
 2 | #!/bin/bash
 3 | if [[ $# -lt 1 ]]
 4 | then
 5 |     PATH_DIR=$(realpath .)
 6 |     PATH_DATA_DIR=$(realpath ../dialog_simulator/final_data)
 7 | else
 8 |     PATH_DIR=$(realpath "$1")
 9 |     PATH_DATA_DIR=$(realpath "$2")
10 | fi
11 | 
12 | # Train split
13 | python3 -m gpt2_dst.scripts.preprocess_input \
14 |     --input_path_json="${PATH_DATA_DIR}"/mem_dials_train_v2.json \
15 |     --output_path_predict="${PATH_DIR}"/gpt2_dst/data/mem_dials_train_predict.txt \
16 |     --output_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_train_target.txt \
17 |     --len_context=2 \
18 |     --use_multimodal_contexts=1 \
19 |     --output_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json
20 | 
21 | # --use_multimodal_contexts=1 \
22 | # Dev split
23 | python3 -m gpt2_dst.scripts.preprocess_input \
24 |     --input_path_json="${PATH_DATA_DIR}"/mem_dials_val_v2.json \
25 |     --output_path_predict="${PATH_DIR}"/gpt2_dst/data/mem_dials_val_predict.txt \
26 |     --output_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_val_target.txt \
27 |     --len_context=2 \
28 |     --use_multimodal_contexts=1 \
29 |     --input_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
30 |     --output_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
31 | 
32 | # Devtest split
33 | python3 -m gpt2_dst.scripts.preprocess_input \
34 |     --input_path_json="${PATH_DATA_DIR}"/mem_dials_test_v2.json \
35 |     --output_path_predict="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_predict.txt \
36 |     --output_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_target.txt \
37 |     --len_context=2 \
38 |     --use_multimodal_contexts=1 \
39 |     --input_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
40 |     --output_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
41 | 
42 | # Test split
43 | # python3 -m gpt2_dst.scripts.preprocess_input \
44 | #     --input_path_json="${PATH_DATA_DIR}"/mem_dials_test.json \
45 | #     --output_path_predict="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_predict.txt \
46 | #     --output_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_test_target.txt \
47 | #     --len_context=2 \
48 | #     --use_multimodal_contexts=1 \
49 | #     --input_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
50 | #     --output_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
51 | 
52 | # Mini split
53 | # python3 -m gpt2_dst.scripts.preprocess_input \
54 | #     --input_path_json="${PATH_DATA_DIR}"/mem_dials_mini.json \
55 | #     --output_path_predict="${PATH_DIR}"/gpt2_dst/data/mem_dials_mini_predict.txt \
56 | #     --output_path_target="${PATH_DIR}"/gpt2_dst/data/mem_dials_mini_target.txt \
57 | #     --len_context=2 \
58 | #     --use_multimodal_contexts=1 \
59 | #     --input_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
60 | #     --output_path_special_tokens="${PATH_DIR}"/gpt2_dst/data/mem_special_tokens.json \
61 | 


--------------------------------------------------------------------------------
/dialog_simulator/constants.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | from enum import Enum
  6 | 
  7 | 
  8 | class GoalType(Enum):
  9 |     UNKNOWN = "unknown"
 10 |     SEARCH = "search"
 11 |     REFINE_SEARCH = "refine_search"
 12 |     GET_RELATED = "get_related"
 13 |     GET_INFO = "get_info"
 14 |     GET_AGGREGATED_INFO = "get_aggregated_info"
 15 |     SHARE = "share"
 16 |     CHITCHAT = "chitchat"
 17 | 
 18 | 
 19 | class DialogAct(Enum):
 20 |     UNKNOWN = "unknown"
 21 | 
 22 |     INFORM_GET = "INFORM:GET"
 23 |     INFORM_REFINE = "INFORM:REFINE"
 24 |     INFORM_PREFER = "INFORM:PREFER"
 25 |     INFORM_DISPREFER = "INFORM:DISPREFER"
 26 |     INFORM_SHARE = "INFORM:SHARE"
 27 |     INFORM_DISAMBIGUATE = "INFORM:DISAMBIGUATE"
 28 |     INFORM_CHITCHAT = "INFORM:CHITCHAT"
 29 | 
 30 |     REQUEST_GET = "REQUEST:GET"
 31 |     REQUEST_REFINE = "REQUEST:REFINE"
 32 |     REQUEST_PREFER = "REQUEST:PREFER"
 33 |     REQUEST_DISPREFER = "REQUEST:DISPREFER"
 34 |     REQUEST_SHARE = "REQUEST:SHARE"
 35 |     REQUEST_DISAMBIGUATE = "REQUEST:DISAMBIGUATE"
 36 | 
 37 |     CONFIRM_GET = "CONFIRM:GET"
 38 |     CONFIRM_REFINE = "CONFIRM:REFINE"
 39 |     CONFIRM_PREFER = "CONFIRM:PREFER"
 40 |     CONFIRM_DISPREFER = "CONFIRM:DISPREFER"
 41 |     CONFIRM_SHARE = "CONFIRM:SHARE"
 42 |     CONFIRM_DISAMBIGUATE = "CONFIRM:DISAMBIGUATE"
 43 | 
 44 |     PROMPT_GET = "PROMPT:GET"
 45 |     PROMPT_REFINE = "PROMPT:REFINE"
 46 |     PROMPT_PREFER = "PROMPT:PREFER"
 47 |     PROMPT_DISPREFER = "PROMPT:DISPREFER"
 48 |     PROMPT_SHARE = "PROMPT:SHARE"
 49 |     PROMPT_DISAMBIGUATE = "PROMPT:DISAMBIGUATE"
 50 | 
 51 |     ASK_GET = "ASK:GET"
 52 |     ASK_REFINE = "ASK:REFINE"
 53 |     ASK_PREFER = "ASK:PREFER"
 54 |     ASK_DISPREFER = "ASK:DISPREFER"
 55 |     ASK_SHARE = "ASK:SHARE"
 56 |     ASK_DISAMBIGUATE = "ASK:DISAMBIGUATE"
 57 | 
 58 | 
 59 | class GoalMemoryRefType(Enum):
 60 |     PREV_TURN = "PREV_TURN"
 61 |     DIALOG = "DIALOG"
 62 |     GRAPH = "GRAPH"
 63 |     NOT_SPECIFIED = "Not Specified"
 64 | 
 65 | 
 66 | class ObjectRefType(Enum):
 67 |     R1 = "R1"  # Unique object in the scene
 68 |     R2 = "R2"  # Object in the dialog history, same view point
 69 |     R3 = "R3"  # Object in the dialog history, previous view point
 70 |     NOT_SPECIFIED = "Not Specified"
 71 | 
 72 | 
 73 | class API_STATUS(Enum):
 74 |     SEARCH_FOUND = "Search Founud"
 75 |     SEARCH_NOT_FOUND = "Search Not Founud"
 76 |     INFO_FOUND = "Info Found"
 77 |     INFO_NOT_FOUND = "Info Not Found"
 78 |     SHARED = "Shared"
 79 | 
 80 | 
 81 | class API_CALL_TYPE(Enum):
 82 |     SEARCH = "Search"
 83 |     REFINE_SEARCH = "Refine Search"
 84 |     GET_INFO = "Get Info"
 85 |     SHARE = "Share"
 86 |     GET_RELATED = "Get Related"
 87 |     UNDEFINED = "Undefined"
 88 | 
 89 | 
 90 | class TurnSpeaker(Enum):
 91 |     USER = "User"
 92 |     ASSISTANT = "Assistant"
 93 | 
 94 | 
 95 | numeric_slots = {"time"}
 96 | 
 97 | non_visual_slots = {
 98 |     "location",
 99 |     "time",
100 | }
101 | 
102 | visual_slots = {"participant", "activity"}
103 | 
104 | all_slots = {"time", "location", "participant", "activity"}
105 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/README.md:
--------------------------------------------------------------------------------
  1 | # GPT-2 (MM)
  2 | This is the code for the GPT-2 model used in [Navigating Connected Memories with a Task-oriented Dialog System][code]. It is based on the AAAI2020-DSTC8-AVSD paper [Bridging Text and Video: A Universal Multimodal Transformer for Video-Audio Scene-Aware Dialog.](<https://arxiv.org/abs/2002.00163>).
  3 | 
  4 | 
  5 | ## How to Run
  6 | 
  7 | **Requirements**
  8 | 
  9 | ```
 10 | Python. 3.6
 11 | torch==1.0.1
 12 | pytorch-ignite==0.2.1
 13 | transformers==2.1.1
 14 | tqdm==4.36.1
 15 | ```
 16 | 
 17 | ```shell
 18 | pip install -r requirements.txt
 19 | ```
 20 | 
 21 | **Data**
 22 | 
 23 | Create a soft link to the `../../data` folder in the main folder here as `data/`.
 24 | Please see `run_me.sh` for example of how to run the code.
 25 | 
 26 | 
 27 | 
 28 | 
 29 | **Step 1: Preprocess the dataset**
 30 | 
 31 | ```shell
 32 | # Preprocessing the dataset.
 33 | python utils/preprocess_memory_dataset.py \
 34 | 	--train_json_path "data/mem_dials_train.json" \
 35 | 	--unseen_json_path \
 36 | 		"data/mem_dials_val.json" \
 37 | 		"data/mem_dials_test.json" \
 38 | 	--save_folder "data/gpt2_data/"
 39 | ```
 40 | 
 41 | **Step 2: Extracting the image features**
 42 | 
 43 | We use this [repository](https://github.com/vmurahari3/visdial-bert#download-preprocessed-data) to download the image features.
 44 | 
 45 | ```shell
 46 | FEATURE_PATH="/data/img_feats1.0/visdial_img_feat.lmdb"
 47 | # Extracting visual features (BUTD features).
 48 | python utils/extract_memory_features.py \
 49 | 	--input_dialog_json data/mem_dials_merged.json \
 50 | 	--input_memory_json \
 51 | 		data/memory_may21_v1_100graphs.json \
 52 | 		data/mscoco_memory_graphs_1k.json \
 53 | 	--input_feature_path $FEATURE_PATH \
 54 | 	--max_bboxes 10 \
 55 | 	--feature_save_path data/memory_features/butd_10w_features/ \
 56 | 	--feature_type butd
 57 | ```
 58 | 
 59 | **Training**
 60 | 
 61 | ```shell
 62 | FEATURES="butd"
 63 | LOG_PATH="logs/"
 64 | # Visual features.
 65 | FEATURE_PATH="data/memory_features/butd_10w_features/"
 66 | VISUAL_FEATURE_SIZE=2053
 67 | VISUAL_FEATURE_WIDTH=10
 68 | 
 69 | python train.py --log_path $LOG_PATH \
 70 | 	--train_path "data/gpt2_data/mem_dials_gpt2_train.json" \
 71 | 	--valid_path "data/gpt2_data/mem_dials_gpt2_val.json" \
 72 | 	--special_tokens_path "data/gpt2_data/mem_dials_gpt2_special_tokens.json" \
 73 | 	--train_batch_size 8 \
 74 | 	--predict_belief_state \
 75 | 	--n_epochs 20 \
 76 | 	--feature_path $FEATURE_PATH \
 77 | 	--visual_feature_size $VISUAL_FEATURE_SIZE \
 78 | 	--visual_feature_width $VISUAL_FEATURE_WIDTH
 79 | ```
 80 | 
 81 | **Evaluation**
 82 | 
 83 | ```shell
 84 | python generate.py \
 85 | 	--model_checkpoint $LOG_PATH \
 86 | 	--model_epoch $MODEL_EPOCH \
 87 | 	--test_set "data/gpt2_data/mem_dials_gpt2_test.json" \
 88 | 	--special_tokens_path "data/gpt2_data/mem_dials_gpt2_special_tokens.json" \
 89 | 	--feature_path $FEATURE_PATH \
 90 | 	--visual_feature_size $VISUAL_FEATURE_SIZE \
 91 | 	--visual_feature_width $VISUAL_FEATURE_WIDTH \
 92 | 	--output <output_path> \
 93 | 	--max_len 100
 94 | ```
 95 |  
 96 | **Compiling Results**
 97 | 
 98 | ```shell
 99 | python utils/create_result_jsons.py \
100 | 	--memory_test_json "data/mem_dials_test.json" \
101 | 	--model_output_json $OUTPUT_RESULT_FILE
102 | ```
103 | 
104 | 
105 | ## Citation
106 | 
107 | If you use this code in your research, please cite our paper and the original AAAI 2020 DSTC8 workshop 
108 | paper.
109 | 
110 | ```
111 | @inproceedings{moon-kottur-2022-navigating,
112 |     title = "Navigating Connected Memories with a Task-oriented Dialog System",
113 |     author = "Moon, Seungwhan and 
114 |     Kottur, Satwik Kottur and
115 |     Geramifard, Alborz and
116 |     Damavandi, Babak",
117 |     booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
118 |     month = dec,
119 |     year = "2022",
120 |     address = "Online and Abu Dhabi, United Arab Emirates",
121 |     publisher = "Association for Computational Linguistics",
122 | }
123 | ```
124 | 
125 | ```
126 | @article{li2020bridging,
127 |     title={Bridging Text and Video: A Universal Multimodal Transformer for Video-Audio Scene-Aware Dialog},
128 |     author={Zekang Li and Zongjia Li and Jinchao Zhang and Yang Feng and Cheng Niu and Jie Zhou},
129 |     year={2020},
130 |     eprint={2002.00163},
131 |     archivePrefix={arXiv},
132 |     journal={CoRR},
133 |     primaryClass={cs.CL}
134 | }
135 | ```
136 | 
137 | [code]:https://github.com/facebookresearch/comet_memory_dialog


--------------------------------------------------------------------------------
/dialog_simulator/memories/mini_set/mini_set_0_memory_graph.json:
--------------------------------------------------------------------------------
  1 | {
  2 |    "memory_graph_id":0,
  3 |    "memories":[
  4 |       {
  5 |          "memory_id":0,
  6 |          "time":"2021-04-10 10:00:00",
  7 |          "start_time":"2021-04-10 10:00:00",
  8 |          "end_time":"2021-04-10 10:30:00",
  9 |          "narrations":"fun day for skiing.",
 10 |          "media":[
 11 |             {
 12 |                "media_id":1000,
 13 |                "type":"video"
 14 |             }
 15 |          ],
 16 |          "location":{
 17 |             "gps":{
 18 |                "lat":40.00,
 19 |                "lon":100.00
 20 |             },
 21 |             "geo_tag":{
 22 |                "place":"Summit at Snoqualmie",
 23 |                "city":"Seattle",
 24 |                "state":"Washington",
 25 |                "country":"USA"
 26 |             }
 27 |          },
 28 |          "participant":[
 29 |             {
 30 |                "name":"John",
 31 |                "memory_graph_id":1
 32 |             },
 33 |             {
 34 |                "name":"Mary",
 35 |                "memory_graph_id":2
 36 |             }
 37 |          ],
 38 |          "activity":[
 39 |             {
 40 |                "activity_name":"skiing"
 41 |             }
 42 |          ],
 43 |          "object":[
 44 |             
 45 |          ]
 46 |       },
 47 |       {
 48 |          "memory_id":1,
 49 |          "time":"2021-03-10 10:00:00",
 50 |          "start_time":"2021-03-10 10:00:00",
 51 |          "end_time":"2021-03-10 10:30:00",
 52 |          "narrations":"fun baseball day.",
 53 |          "media":[
 54 |             {
 55 |                "media_id":1001,
 56 |                "type":"video"
 57 |             }
 58 |          ],
 59 |          "location":{
 60 |             "gps":{
 61 |                "lat":41.00,
 62 |                "lon":110.00
 63 |             },
 64 |             "geo_tag":{
 65 |                "place":"T-Mobile Park",
 66 |                "city":"Seattle",
 67 |                "state":"Washington",
 68 |                "country":"USA"
 69 |             }
 70 |          },
 71 |          "participant":[
 72 |             {
 73 |                "name":"Mary",
 74 |                "memory_graph_id":2
 75 |             },
 76 |             {
 77 |                "name":"Jane",
 78 |                "memory_graph_id":3
 79 |             }
 80 |          ],
 81 |          "activity":[
 82 |             {
 83 |                "activity_name":"baseball"
 84 |             }
 85 |          ],
 86 |          "object":[
 87 |             
 88 |          ]
 89 |       },
 90 |       {
 91 |          "memory_id":3,
 92 |          "time":"2020-03-10 10:00:00",
 93 |          "start_time":"2020-03-10 10:00:00",
 94 |          "end_time":"2020-03-10 10:30:00",
 95 |          "narrations":"fun soccer day.",
 96 |          "media":[
 97 |             {
 98 |                "media_id":1002,
 99 |                "type":"video"
100 |             }
101 |          ],
102 |          "location":{
103 |             "gps":{
104 |                "lat":41.00,
105 |                "lon":110.00
106 |             },
107 |             "geo_tag":{
108 |                "place":"CenturyLink Park",
109 |                "city":"Seattle",
110 |                "state":"Washington",
111 |                "country":"USA"
112 |             }
113 |          },
114 |          "participant":[
115 |             {
116 |                "name":"John",
117 |                "memory_graph_id":1
118 |             },
119 |             {
120 |                "name":"Jane",
121 |                "memory_graph_id":3
122 |             }
123 |          ],
124 |          "activity":[
125 |             {
126 |                "activity_name":"soccer"
127 |             }
128 |          ],
129 |          "object":[
130 |             
131 |          ]
132 |       },      
133 |       {
134 |          "memory_id":4,
135 |          "time":"2020-05-10 10:00:00",
136 |          "start_time":"2020-05-10 10:00:00",
137 |          "end_time":"2020-05-10 10:30:00",
138 |          "narrations":"fun skiing day.",
139 |          "media":[
140 |             {
141 |                "media_id":1002,
142 |                "type":"video"
143 |             }
144 |          ],
145 |          "location":{
146 |             "gps":{
147 |                "lat":45.00,
148 |                "lon":115.00
149 |             },
150 |             "geo_tag":{
151 |                "place":"Stubier",
152 |                "city":"Innsbruck",
153 |                "state":"",
154 |                "country":"Austria"
155 |             }
156 |          },
157 |          "participant":[
158 |             {
159 |                "name":"Jane",
160 |                "memory_graph_id":3
161 |             }
162 |          ],
163 |          "activity":[
164 |             {
165 |                "activity_name":"skiing"
166 |             },
167 |             {
168 |                "activity_name":"hiking"
169 |             }
170 |          ],
171 |          "object":[
172 |             
173 |          ]
174 |       }      
175 |    ]
176 | }
177 | 


--------------------------------------------------------------------------------
/dialog_simulator/InteractiveDialogHandler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | import random
  6 | import json
  7 | from MemoryDialogModel import PilotMemoryDialogModel
  8 | from Data import MemoryGraph, MemoryDialog, Turn
  9 | from MemoryServiceAPI import MemoryServiceAPI
 10 | 
 11 | import sys
 12 | 
 13 | sys.path.append("/Users/shanemoon/workspace/memory_dialog/models/")
 14 | from gpt2_dst.scripts.run_generation import load_model
 15 | 
 16 | 
 17 | class InteractiveDialogHandler:
 18 |     def __init__(self, *args, **kwargs):
 19 |         self.model = kwargs.pop("model", None)
 20 |         self.memory_graph = kwargs.pop("memory_graph", None)
 21 |         self.api = kwargs.pop("api", None)
 22 | 
 23 |         # Start an empty dialog data
 24 |         self.memory_dialog = MemoryDialog(memory_graph=self.memory_graph)
 25 |         self.memory_dialog.initialize()
 26 | 
 27 |     def execute_turn(self, user_query: str) -> Turn:
 28 |         """
 29 |         Given user_query, construct an API call,
 30 |         get the API response, and return an Assistant Turn.
 31 |         """
 32 | 
 33 |         # Construct the API request
 34 |         try:
 35 |             user_turn, api_request = self.model.construct_api_request(
 36 |                 user_query, self.memory_dialog
 37 |             )
 38 |             print("============== API Request ==============")
 39 |             print(api_request)
 40 |             print("=========================================\n")
 41 | 
 42 |             # Call API to get responses back
 43 |             api_response = self.api.call_api(api_request)
 44 |             print("============== API Response ==============")
 45 |             print(api_response)
 46 |             print("==========================================\n")
 47 | 
 48 |             # Update the display based on the API results
 49 |             self.model.update_display(api_response)
 50 | 
 51 |             # Generate an Assistant response based on the API response
 52 |             assistant_turn = self.model.construct_assistant_response(
 53 |                 user_query, api_request, api_response, self.memory_dialog
 54 |             )
 55 |             print("============== Assistant Response ==============")
 56 |             print(assistant_turn)
 57 |             print("================================================\n")
 58 | 
 59 |             # Update the memory_dialog with the new user and assistant turns
 60 |             self.memory_dialog.dialog.add_user_turn(user_turn)
 61 |             self.memory_dialog.dialog.add_asst_turn(assistant_turn)
 62 | 
 63 |             # Update the model
 64 |             self.model.prev_asst_uttr = assistant_turn.frames[-1].uttr
 65 |             self.model.turn_id += 1
 66 | 
 67 |             return assistant_turn
 68 | 
 69 |         except:
 70 |             return None
 71 | 
 72 |     def run_loop_command_prompt(self):
 73 | 
 74 |         while True:
 75 |             print()
 76 |             user_query = input(">> Enter your query (or type quit): ")
 77 |             if user_query == "quit":
 78 |                 break
 79 | 
 80 |             response = self.execute_turn(user_query=user_query)
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     # Define paths
 85 |     # path_memory_graph_list = '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/memories/final/mscoco_memory_graphs_1k.json'
 86 |     path_memory_graph_list = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/memories/final/mscoco_memory_graphs_mini.json"
 87 |     path_model = (
 88 |         "/Users/shanemoon/workspace/memory_dialog/models/gpt2_dst/save/model_v2"
 89 |     )
 90 |     path_parameter_ontology = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/all_parameter_ontology.json"
 91 | 
 92 |     # Hyperparameters for the demo
 93 |     random_memory_graph = False
 94 | 
 95 |     # Load parameters
 96 |     memory_graph_list = json.load(open(path_memory_graph_list, "r"))
 97 |     memory_graph_bank = {}
 98 | 
 99 |     for memory_graph in memory_graph_list:
100 |         memory_graph_id = memory_graph["memory_graph_id"]
101 | 
102 |         for i in range(len(memory_graph["memories"])):
103 |             memory_graph["memories"][i]["memory_graph_id"] = memory_graph_id
104 | 
105 |         memory_graph_bank[memory_graph_id] = memory_graph
106 | 
107 |     parameter_ontology = json.load(open(path_parameter_ontology, "r"))
108 | 
109 |     # Select a Memory Graph
110 |     if random_memory_graph:
111 |         memory_graph = MemoryGraph(
112 |             data=memory_graph_bank[random.choice(list(memory_graph_bank.keys()))]
113 |         )
114 | 
115 |     else:
116 |         memory_graph_id = "RbXAfFDz8r72"
117 |         memory_graph = MemoryGraph(data=memory_graph_bank[memory_graph_id])
118 | 
119 |     # Load the model parameters
120 |     gpt2_model, tokenizer, length = load_model(
121 |         model_type="gpt2", model_name_or_path=path_model, device="cpu", length=150
122 |     )
123 | 
124 |     # Instsantiate the dialog handler
125 |     model = PilotMemoryDialogModel(
126 |         model=gpt2_model,
127 |         tokenizer=tokenizer,
128 |         length=length,
129 |         parameter_ontology=parameter_ontology,
130 |     )
131 | 
132 |     api = MemoryServiceAPI()
133 |     dialog_handler = InteractiveDialogHandler(
134 |         model=model, memory_graph=memory_graph, api=api
135 |     )
136 | 
137 |     # Run loop
138 |     dialog_handler.run_loop_command_prompt()
139 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Navigating Connected Memories with a Task-oriented Dialog System
  2 | 
  3 | This repository contains the code to reproduce results from the following paper:
  4 | 
  5 | **Navigating Connected Memories with a Task-oriented Dialog System**  
  6 | Seungwhan Moon\*, Satwik Kottur\*, Alborz Geramifard, Babak Damavandi  
  7 | [[PDF][paper_workplace]][[Github][github]]  
  8 | *Empirical Methods in Natural Language Processing (EMNLP), 2022*
  9 | \*=equal contribution
 10 | 
 11 | ### Abstract
 12 | 
 13 | <figure>
 14 | <img src="./teaser_memory_dialog.png"
 15 |      alt="Teaser Figure"
 16 |      width="350px",
 17 |      style="float: left; margin-right: 10px;" 
 18 |  />
 19 | </figure> 
 20 | 
 21 | 
 22 | Recent years have seen an increasing trend in the volume of personal media captured by users, thanks 
 23 | to the advent of smartphones and smart glasses, resulting in large media collections.
 24 | Despite conversation being an intuitive human-computer interface, current efforts focus mostly
 25 | on single-shot natural language based media retrieval to aid users query their media and 
 26 | re-live their memories. This severely limits the search functionality as users can neither ask 
 27 | follow-up queries nor obtain information without first formulating a single-turn query.
 28 | 
 29 | In this work, we propose *dialogs for connected memories* as a powerful tool to empower
 30 | users to search their media collection through a multi-turn, interactive conversation.
 31 | Towards this, we collect a new task-oriented dialog dataset COMET, which contains $11.5k$ 
 32 | user↔assistant dialogs (totalling $103k$ utterances), grounded in simulated personal memory graphs.
 33 | We employ a resource-efficient, two-phase data collection pipeline that uses:
 34 | (1) a novel multimodal dialog simulator that generates synthetic dialog flows grounded in 
 35 | memory graphs, and,
 36 | (2) manual paraphrasing to obtain natural language utterances.
 37 | We analyze \dn, formulate four main tasks to benchmark meaningful progress, and adopt
 38 | state-of-the-art language models as strong baselines, in order to highlight the 
 39 | multimodal challenges captured by our dataset.
 40 | Our code \& data will be made publicly available.
 41 | 
 42 | 
 43 | 
 44 | ### Code Structure
 45 | 
 46 | The code is organized into two folders:
 47 | 
 48 | **A. Multimodal Dialog Simulator** (`dialog_simulator/`):  
 49 | Conditioned on the memory graphs generated, the multimoda dialog simulator produces synthetic
 50 | dialog flows between a user and an assistant. 
 51 | These flows are later paraphrased using human annotators to draw from natural language utterances.
 52 | 
 53 | * `AssistantSimulator.py`
 54 | * `Data.py`
 55 | * `DummyMemoryDialogModel.py`
 56 | * `GoalGenerator.py`
 57 | * `InteractiveDialogHandler.py`
 58 | * `MemoryDialogModel.py`
 59 | * `MemoryDialogSimulator.py`
 60 | * `MemoryServiceAPI.py`
 61 | * `SimulatorBase.py`
 62 | * `UserSimulator.py`
 63 | * `constants.py`
 64 | * `get_user_utterances.py`
 65 | * `main.py`
 66 | * `merge_data_json.py`
 67 | * `merge_synth_and_appen.py`
 68 | * `utils.py`
 69 | 
 70 | 
 71 | **B. Memory-grounded Dialog Models** (`models/`):
 72 | 
 73 | There are two type of models used in this work:
 74 | 
 75 | 1. Text-only GPT-2 model: Memories are represented using their ids.
 76 |     * `run_preprocess_gpt2.sh`: Preprocessing the memory dialog dataset to make it ingestible for GPT-2 model training
 77 |     * `run_train_gpt2.sh`: Trains GPT-2 model (text-only)
 78 |     * `gpt2_dst/`: Folder with GPT-2 model
 79 |     * `run_evaluate_gpt2.sh`: Contains commands to evaluate a trained GPT-2 model on memory dialogs
 80 |     * `run_evaluate.sh`: Contains commands to evaluate output prediction JSON of any model on memory dialogs
 81 |     * `utils/`: Additional utility functions to train and evaluation GPT-2 model
 82 | 
 83 | 2. Multimodal GPT-2 model: Memories are represented using their image features.
 84 |     * `run_me.sh`: Contains commands to train, evaluation, compile the results for GPT-2 (mm).
 85 |     * `utils/`: Additional utility functions to train and evaluation GPT-2 model(mm).
 86 | 
 87 | 
 88 | Please reach out to [Satwik Kottur][satwik_link] (skottur@fb.com) 
 89 | or [Suengwhan Moon][shane_link] (shanemoon@fb.com) for questions related to this repository.
 90 | 
 91 | 
 92 | If you find this repository useful, please cite our work:
 93 | 
 94 | ```
 95 | @inproceedings{moon-kottur-2022-navigating,
 96 |     title = "Navigating Connected Memories with a Task-oriented Dialog System",
 97 |     author = "Moon, Seungwhan and 
 98 |     Kottur, Satwik Kottur and
 99 |     Geramifard, Alborz and
100 |     Damavandi, Babak",
101 |     booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
102 |     month = dec,
103 |     year = "2022",
104 |     address = "Online and Abu Dhabi, United Arab Emirates",
105 |     publisher = "Association for Computational Linguistics",
106 | }
107 | ```
108 | 
109 | 
110 | ### LICENSE
111 | *The majority of comet\_memory\_dialog is licensed under CC-BY-NC, however 
112 | portions of the project are available under separate 
113 | license terms: https://github.com/ictnlp/DSTC8-AVSD is licensed 
114 | under the MIT license.*
115 | 
116 | [paper_pdf]: 
117 | [github]:https://github.com/facebookresearch/comet_memory_dialog
118 | [curated_lists]: https://drive.google.com/drive/folders/1V4RqUR0oSr2wwI4-ukx_V3NlP9IUHKoT?usp=sharing
119 | [satwik_link]: https://satwikkottur.github.io/
120 | [shane_link]: https://shanemoon.com/
121 | 


--------------------------------------------------------------------------------
/dialog_simulator/merge_data_json.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | """
  6 |     Merges multiple batches of SIMMC 2.0 files into one,
  7 |     and also outputs train, dev, devtest, and test sets.
  8 | """
  9 | import os
 10 | import json
 11 | import csv
 12 | import random
 13 | import pickle
 14 | import numpy as np
 15 | from utils import load_data_pickle
 16 | 
 17 | 
 18 | if __name__ == "__main__":
 19 |     random.seed(0)
 20 |     np.random.seed(0)
 21 | 
 22 |     # Paths for merge
 23 |     paths_to_merge = [
 24 |         #'/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_1_mem_dials_merged.p',
 25 |         "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_2_mem_dials_merged.p",
 26 |         "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_1_mem_dials_merged.p",
 27 |         "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_2_mem_dials_merged.p",
 28 |         "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_3_mem_dials_merged.p",
 29 |         "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials_merged.p",
 30 |     ]
 31 | 
 32 |     path_out_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_merged.json"
 33 |     path_out_pickle = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_merged.p"
 34 | 
 35 |     mm_dialogs = []
 36 | 
 37 |     for path_in_pickle in paths_to_merge:
 38 | 
 39 |         # Load original synth
 40 |         mm_dialogs.extend(load_data_pickle(path_in_pickle))
 41 | 
 42 |     # Output
 43 |     print("Total: %d dialogs" % len(mm_dialogs))
 44 | 
 45 |     json.dump(
 46 |         {
 47 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs],
 48 |             "split": "all",
 49 |             "year": 2021,
 50 |             "domain": "memory",
 51 |         },
 52 |         open(path_out_json, "w"),
 53 |         indent=4,
 54 |     )
 55 | 
 56 |     pickle.dump(mm_dialogs, open(path_out_pickle, "wb"))
 57 | 
 58 |     # Split
 59 |     r_train = 0.85
 60 |     r_dev = 0.10
 61 |     r_devtest = 0.04
 62 |     r_test = 0.01
 63 |     r_mini = 0.001
 64 | 
 65 |     path_out_train_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_train.json"
 66 |     path_out_dev_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_dev.json"
 67 |     path_out_devtest_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_devtest.json"
 68 |     path_out_test_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_test.json"
 69 |     path_out_mini_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/mem_dials_mini.json"
 70 | 
 71 |     n_dialogs = len(mm_dialogs)
 72 |     indices = np.arange(n_dialogs)
 73 |     np.random.shuffle(indices)
 74 |     n_train = int(n_dialogs * r_train)
 75 |     n_dev = int(n_dialogs * r_dev)
 76 |     n_devtest = int(n_dialogs * r_devtest)
 77 |     n_test = int(n_dialogs * r_test)
 78 |     n_mini = int(n_dialogs * r_mini)
 79 | 
 80 |     train_indices = indices[:n_train]
 81 |     dev_indices = indices[n_train : n_train + n_dev]
 82 |     devtest_indices = indices[n_train + n_dev : n_train + n_dev + n_devtest]
 83 |     test_indices = indices[n_train + n_dev + n_devtest :]
 84 |     mini_indices = test_indices[:n_mini]
 85 | 
 86 |     mm_dialogs_train = [mm_d for i, mm_d in enumerate(mm_dialogs) if i in train_indices]
 87 |     mm_dialogs_dev = [mm_d for i, mm_d in enumerate(mm_dialogs) if i in dev_indices]
 88 |     mm_dialogs_devtest = [
 89 |         mm_d for i, mm_d in enumerate(mm_dialogs) if i in devtest_indices
 90 |     ]
 91 |     mm_dialogs_test = [mm_d for i, mm_d in enumerate(mm_dialogs) if i in test_indices]
 92 |     mm_dialogs_mini = [mm_d for i, mm_d in enumerate(mm_dialogs) if i in mini_indices]
 93 | 
 94 |     json.dump(
 95 |         {
 96 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs_train],
 97 |             "split": "train",
 98 |             "year": 2021,
 99 |             "domain": "memory",
100 |         },
101 |         open(path_out_train_json, "w"),
102 |         indent=4,
103 |     )
104 | 
105 |     json.dump(
106 |         {
107 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs_dev],
108 |             "split": "dev",
109 |             "year": 2021,
110 |             "domain": "memory",
111 |         },
112 |         open(path_out_dev_json, "w"),
113 |         indent=4,
114 |     )
115 | 
116 |     json.dump(
117 |         {
118 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs_devtest],
119 |             "split": "devtest",
120 |             "year": 2021,
121 |             "domain": "memory",
122 |         },
123 |         open(path_out_devtest_json, "w"),
124 |         indent=4,
125 |     )
126 | 
127 |     json.dump(
128 |         {
129 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs_test],
130 |             "split": "test",
131 |             "year": 2021,
132 |             "domain": "memory",
133 |         },
134 |         open(path_out_test_json, "w"),
135 |         indent=4,
136 |     )
137 | 
138 |     json.dump(
139 |         {
140 |             "dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs_mini],
141 |             "split": "mini",
142 |             "year": 2021,
143 |             "domain": "memory",
144 |         },
145 |         open(path_out_mini_json, "w"),
146 |         indent=4,
147 |     )
148 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/utils/create_result_jsons.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  4 | 
  5 | Create API and MM-DST result JSONS from model result file.
  6 | 
  7 | Author(s): Satwik Kottur
  8 | """
  9 | 
 10 | from __future__ import absolute_import, division, print_function, unicode_literals
 11 | 
 12 | import argparse
 13 | import collections
 14 | import copy
 15 | import json
 16 | import ast
 17 | import re
 18 | 
 19 | 
 20 | def parse_flattened_result(to_parse):
 21 |     """
 22 |     Parse out the belief state from the raw text.
 23 |     Return an empty list if the belief state can't be parsed
 24 | 
 25 |     Input:
 26 |     - A single <str> of flattened result
 27 |       e.g. 'User: Show me something else => Belief State : DA:REQUEST ...'
 28 | 
 29 |     Output:
 30 |     - Parsed result in a JSON format, where the format is:
 31 |         [
 32 |             {
 33 |                 'act': <str>  # e.g. 'DA:REQUEST',
 34 |                 'slots': [
 35 |                     <str> slot_name,
 36 |                     <str> slot_value
 37 |                 ]
 38 |             }, ...  # End of a frame
 39 |         ]  # End of a dialog
 40 |     """
 41 |     dialog_act_regex = re.compile(r"([\w:?.?]*) *\[(.*)\] *\(([^\]]*)\) *\<([^\]]*)\>")
 42 |     slot_regex = re.compile(r"([A-Za-z0-9_.-:]*) *= *(\[([^\]]*)\]|[^,]*)")
 43 |     request_regex = re.compile(r"([A-Za-z0-9_.-:]+)")
 44 |     object_regex = re.compile(r"([A-Za-z0-9]+)")
 45 | 
 46 |     belief = []
 47 | 
 48 |     # Parse
 49 |     to_parse = to_parse.strip()
 50 |     # to_parse: 'DIALOG_ACT_1 : [ SLOT_NAME = SLOT_VALUE, ... ] ...'
 51 |     for dialog_act in dialog_act_regex.finditer(to_parse):
 52 |         d = {
 53 |             "act": dialog_act.group(1),
 54 |             "slots": {},
 55 |             "request_slots": [],
 56 |             "memories": [],
 57 |         }
 58 |         for slot in slot_regex.finditer(dialog_act.group(2)):
 59 |             # If parsing python list eval it else keep unique string.
 60 |             slot_name = slot.group(1).strip()
 61 |             slot_values = slot.group(2).strip()
 62 |             # If there are nones, replace them with Nones and later remove them.
 63 |             if re.match('\[.*\]', slot_values):
 64 |                 try:
 65 |                     slot_values = slot_values.replace("none", "None")
 66 |                     parsed_slot_values = ast.literal_eval(slot_values)
 67 |                     d["slots"][slot_name] = [ii for ii in parsed_slot_values if ii]
 68 |                 except:
 69 |                     # If error when parsing the slots add empty string   
 70 |                     print(f"Error parsing: {to_parse}")
 71 |                     d["slots"][slot_name] = ""
 72 |             else:
 73 |                 d["slots"][slot_name] = slot_values
 74 | 
 75 |         for request_slot in request_regex.finditer(dialog_act.group(3)):
 76 |             d["request_slots"].append(request_slot.group(1).strip())
 77 |         for object_id in object_regex.finditer(dialog_act.group(4)):
 78 |             d["memories"].append(object_id.group(1).strip())
 79 |         if d != {}:
 80 |             belief.append(d)
 81 |     return belief
 82 | 
 83 | 
 84 | def create_result_jsons(results, test_data):
 85 |     """Creates two JSON files from results.
 86 | 
 87 |     Args:
 88 |         results: List of generated results from the model.
 89 |         test_data: Raw JSON test file.
 90 | 
 91 |     Returns:
 92 |         response_results: Dict containing response results
 93 |         dst_results: Dict containing DST results
 94 |     """
 95 |     dst_results = copy.deepcopy(test_data)
 96 |     response_results = collections.defaultdict(list)
 97 |     dst_pool = {}
 98 |     for instance in results:
 99 |         dialog_id = instance["dialog_id"]
100 |         turn_id = instance["turn_id"]
101 |         if instance["type"] == "API":
102 |             index = (dialog_id, turn_id)
103 |             dst_pool[index] = instance
104 |         else:
105 |             if dialog_id not in response_results:
106 |                 response_results[dialog_id] = {
107 |                     "dialog_id": dialog_id,
108 |                     "predictions": [],
109 |                 }
110 |             response_results[dialog_id]["predictions"].append(
111 |                 {
112 |                     "turn_id": turn_id,
113 |                     "response": instance["model_prediction"],
114 |                 }
115 |             )
116 |     num_missing = 0
117 |     num_present = 0
118 | 
119 |     for dialog_datum in dst_results["dialogue_data"]:
120 |         del dialog_datum["mentioned_memory_ids"]
121 |         del dialog_datum["memory_graph_id"]
122 |         dialog_id = dialog_datum["dialogue_idx"]
123 |         for datum in dialog_datum["dialogue"]:
124 |             turn_id = datum["turn_idx"]
125 |             index = (dialog_id, turn_id)
126 |             if index in dst_pool:
127 |                 model_pred_datum = dst_pool[index]
128 |                 model_pred = model_pred_datum["model_prediction"].strip(" ")
129 |                 parsed_result = parse_flattened_result(model_pred)
130 |                 datum["transcript_annotated"] = parsed_result
131 |                 num_present += 1
132 |             else:
133 |                 del datum["transcript_annotated"]
134 |                 print(f"Missing! -- {index}")
135 |                 num_missing += 1
136 |     print(f"Missing: {num_missing} Present: {num_present}")
137 |     return list(response_results.values()), dst_results
138 | 
139 | 
140 | def main(args):
141 |     with open(args["memory_test_json"], "r") as file_id:
142 |         test_data = json.load(file_id)
143 |     with open(args["model_output_json"], "r") as file_id:
144 |         results = json.load(file_id)
145 |     response_results, dst_results = create_result_jsons(results, test_data)
146 | 
147 |     # Save the results.
148 |     response_results_path = args["model_output_json"].replace(
149 |         ".json", "_response_results.json"
150 |     )
151 |     with open(response_results_path, "w") as file_id:
152 |         json.dump(response_results, file_id)
153 |     dst_results_path = args["model_output_json"].replace(".json", "_dst_results.json")
154 |     with open(dst_results_path, "w") as file_id:
155 |         json.dump(dst_results, file_id)
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     parser = argparse.ArgumentParser(description=__doc__)
160 |     parser.add_argument(
161 |         "--memory_test_json",
162 |         required=True,
163 |         help="JSON file for test data",
164 |     )
165 |     parser.add_argument(
166 |         "--model_output_json", required=True, help="JSON file with model outputs"
167 |     )
168 | 
169 |     try:
170 |         parsed_args = vars(parser.parse_args())
171 |     except (IOError) as msg:
172 |         parser.error(str(msg))
173 |     main(parsed_args)
174 | 


--------------------------------------------------------------------------------
/models/gpt2_text/utils/response_evaluation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  3 | 
  4 | Script evaluates response generation using GT responses.
  5 | 
  6 | Expected JSON format:
  7 | 
  8 | [
  9 |     "dialog_id": <dialog_id>,
 10 |     "predictions": [
 11 |         {
 12 |             "turn_id": <turn_id>,
 13 |             "response": <str; model output>,
 14 |         }
 15 |         ...
 16 |     ]
 17 |     ...
 18 | ]
 19 | 
 20 | Author(s): Satwik Kottur
 21 | """
 22 | 
 23 | from __future__ import absolute_import, division, print_function, unicode_literals
 24 | 
 25 | import argparse
 26 | import json
 27 | 
 28 | import nltk
 29 | import numpy as np
 30 | import tqdm
 31 | 
 32 | 
 33 | def normalize_sentence(sentence):
 34 |     """Normalize the sentences and tokenize."""
 35 |     return nltk.tokenize.word_tokenize(sentence.lower())
 36 | 
 37 | 
 38 | def evaluate_response_generation(
 39 |     gt_responses,
 40 |     model_responses,
 41 |     single_round_eval=False,
 42 |     record_instance_results=None,
 43 |     compute_bert_score=False,
 44 | ):
 45 |     """Evaluates response generation using the raw data and model predictions.
 46 | 
 47 |     Args:
 48 |         gt_responses: Ground truth responses.
 49 |         model_responses: Generated responses.
 50 |         single_round_eval: Evaluate only for the last turn.
 51 |         record_instance_results: Save path for instance level metrics.
 52 |     """
 53 |     gt_responses_pool = {ii["dialogue_idx"]: ii for ii in gt_responses["dialogue_data"]}
 54 |     bleu_scores = []
 55 |     # Smoothing function.
 56 |     chencherry = nltk.translate.bleu_score.SmoothingFunction()
 57 | 
 58 |     # Lazy initialization for bert score.
 59 |     if compute_bert_score:
 60 |         import bert_score
 61 | 
 62 |         bert_scorer = bert_score.BERTScorer(lang="en")
 63 |         bert_scores = []
 64 | 
 65 |     num_evaluations = 0
 66 |     for model_datum in tqdm.tqdm(model_responses, desc="Evaluating"):
 67 |         dialog_id = model_datum["dialog_id"]
 68 |         num_gt_rounds = len(gt_responses_pool[dialog_id]["dialogue"])
 69 |         for round_datum in model_datum["predictions"]:
 70 |             round_id = round_datum["turn_id"]
 71 |             # Skip if single_round_eval and this is not the last round.
 72 |             if single_round_eval and round_id != num_gt_rounds - 1:
 73 |                 continue
 74 | 
 75 |             response = round_datum["response"]
 76 |             gt_datum = gt_responses_pool[dialog_id]["dialogue"][round_id]
 77 |             gt_response = gt_datum["system_transcript"]
 78 |             try:
 79 |                 gt_response_clean = normalize_sentence(gt_response)
 80 |                 response_clean = normalize_sentence(response)
 81 |                 bleu_score = nltk.translate.bleu_score.sentence_bleu(
 82 |                     [gt_response_clean],
 83 |                     response_clean,
 84 |                     smoothing_function=chencherry.method7,
 85 |                 )
 86 |                 bleu_scores.append(bleu_score)
 87 | 
 88 |                 if compute_bert_score:
 89 |                     _, _, bert_f1 = bert_scorer.score(
 90 |                         [" ".join(response_clean)], [" ".join(gt_response_clean)]
 91 |                     )
 92 |                     bert_scores.append(bert_f1.item())
 93 |             except:
 94 |                 print(f"Model: {response} -> GT: {gt_response}")
 95 | 
 96 |             # Add the result to datum and save it back.
 97 |             if record_instance_results:
 98 |                 round_datum["bleu"] = bleu_score
 99 |                 round_datum["response_len"] = len(normalize_sentence(gt_response))
100 |                 if compute_bert_score:
101 |                     round_datum["bert_score"] = bert_f1
102 | 
103 |     print("#Instances evaluated BLEU: {}".format(len(bleu_scores)))
104 |     if record_instance_results:
105 |         print(f"Saving per instance results: {record_instance_results}")
106 |         with open(record_instance_results, "w") as file_id:
107 |             json.dump(model_responses, file_id)
108 | 
109 |     bleu_str_mean = np.mean(bleu_scores)
110 |     bleu_str_err = np.std(bleu_scores) / np.sqrt(len(bleu_scores))
111 |     if compute_bert_score:
112 |         bert_score_mean = np.mean(bert_scores)
113 |         bert_score_err = np.std(bert_scores) / np.sqrt(len(bert_scores))
114 |     else:
115 |         bert_score_mean, bert_score_err = None, None
116 |     return bleu_str_mean, bleu_str_err, bert_score_mean, bert_score_err
117 | 
118 | 
119 | def main(args):
120 |     print("Reading: {}".format(args["data_json_path"]))
121 |     with open(args["data_json_path"], "r") as file_id:
122 |         gt_responses = json.load(file_id)
123 |     print("Reading: {}".format(args["model_response_path"]))
124 |     with open(args["model_response_path"], "r") as file_id:
125 |         model_responses = json.load(file_id)
126 | 
127 |     if args["record_instance_results"]:
128 |         instance_results_path = args["model_response_path"].replace(
129 |             ".json", "_results.json"
130 |         )
131 |     else:
132 |         instance_results_path = None
133 | 
134 |     bleu_score, bleu_std_err, bert_score, bert_score_err = evaluate_response_generation(
135 |         gt_responses,
136 |         model_responses,
137 |         args["single_round_evaluation"],
138 |         instance_results_path,
139 |         args["compute_bert_score"],
140 |     )
141 |     print(f"BLEU Score: {bleu_score:.4f} +- {bleu_std_err}")
142 |     if args["compute_bert_score"]:
143 |         print(f"BERT Score: {bert_score:.4f} +- {bert_score_err}")
144 |     report = {
145 |         "bleu_score": bleu_score,
146 |         "bleu_std_err": bleu_std_err,
147 |         "bert_score": bert_score,
148 |         "bert_score_err": bert_score_err,
149 |     }
150 |     return report
151 | 
152 | 
153 | if __name__ == "__main__":
154 |     parser = argparse.ArgumentParser(description="Response Generation Evaluation")
155 |     parser.add_argument(
156 |         "--data_json_path",
157 |         default="data/mem_dials_devtest.json",
158 |         help="Data with gold responses",
159 |     )
160 |     parser.add_argument(
161 |         "--model_response_path", default=None, help="Responses generated by the model"
162 |     )
163 |     parser.add_argument(
164 |         "--single_round_evaluation",
165 |         dest="single_round_evaluation",
166 |         action="store_true",
167 |         default=False,
168 |         help="Single round evaluation for hidden split",
169 |     )
170 |     parser.add_argument(
171 |         "--record_instance_results",
172 |         dest="record_instance_results",
173 |         action="store_true",
174 |         default=False,
175 |         help="Records per instance results and save it back",
176 |     )
177 |     parser.add_argument(
178 |         "--compute_bert_score",
179 |         dest="compute_bert_score",
180 |         action="store_true",
181 |         default=False,
182 |         help="Compute BERT score along with BLEU-4",
183 |     )
184 |     try:
185 |         parsed_args = vars(parser.parse_args())
186 |     except (IOError) as msg:
187 |         parser.error(str(msg))
188 |     main(parsed_args)
189 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/reformat_dst_response_outputs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  4 | 
  5 |     Scripts for evaluating the GPT-2 DST model predictions.
  6 | 
  7 |     First, we parse the line-by-line stringified format into responses
  8 |     and compute BLEU score.
  9 | """
 10 | import argparse
 11 | import ast
 12 | import copy
 13 | import json
 14 | import re
 15 | 
 16 | import numpy as np
 17 | import tqdm
 18 | from gpt2_dst.utils.convert import parse_flattened_result
 19 | 
 20 | 
 21 | def convert_slots_to_dict(api_call_json):
 22 |     """Converts the slots from list of lists to a dict.
 23 | 
 24 |     Args:
 25 |         api_call_json: JSON containing the parsed API call
 26 |     """
 27 |     for frame_ind, frame in enumerate(api_call_json):
 28 |         slot_dict = {}
 29 |         for slot_name, slot_value in frame["slots"]:
 30 |             if re.match("\[.*\]", slot_value):
 31 |                 try:
 32 |                     slot_dict[slot_name] = ast.literal_eval(slot_value)
 33 |                 except:
 34 |                     # If error when parsing the slots add empty string
 35 |                     print(f"Error parsing: {slot_value} -> {frame}")
 36 |                     slot_dict[slot_name] = ""
 37 |             else:
 38 |                 slot_dict[slot_name] = slot_value
 39 |         frame["slots"] = slot_dict
 40 |     return api_call_json
 41 | 
 42 | 
 43 | def parse_results_from_file(input_path, turn_info, original_data):
 44 |     """Parse targets from a flattened file to create response, dst evaluation files.
 45 | 
 46 |     Args:
 47 |         input_path: Path to read the responses from.
 48 |         turn_info: List of dialog, turn info.
 49 |         original_data: Original JSON target.
 50 | 
 51 |     Returns:
 52 |         dst_json: JSON file with DST results
 53 |         responses_json: JSON file with responses
 54 |     """
 55 |     # Collate all lines to ensure they start with either <USER> or <SYSTEM>.
 56 |     with open(input_path, "r") as file_id:
 57 |         lines = [ii.strip() for ii in file_id.readlines()]
 58 | 
 59 |     fixed_lines = []
 60 |     current_line = ""
 61 |     for line in lines:
 62 |         if line[:6] == "<USER>" or line[:8] == "<SYSTEM>":
 63 |             fixed_lines.append(line)
 64 |         else:
 65 |             fixed_lines[-1] += line
 66 |     print(f"Collating: {len(lines)} -> {len(fixed_lines)}")
 67 |     lines = fixed_lines
 68 | 
 69 |     # Identify API call string and response in each line.
 70 |     assert len(lines) == len(turn_info), "#lines and #turn_info do not match!"
 71 |     responses_json = {}
 72 |     dst_pool = {}
 73 |     for line_ind, line in enumerate(lines):
 74 |         dialog_id, turn_id, prediction_type = turn_info[line_ind]
 75 |         if prediction_type == "api_call":
 76 |             api_call_json = parse_flattened_result(line.split("<EOAC>")[0] + "<EOAC>")
 77 |             # Convert slots from list of list to dicts.
 78 |             api_call_json = convert_slots_to_dict(api_call_json)
 79 |             dst_index = (dialog_id, turn_id)
 80 |             assert dst_index not in dst_pool, "Result already exists!"
 81 |             dst_pool[dst_index] = api_call_json
 82 |             # Check if memories are integers, else skip.
 83 |             for frame_info in api_call_json:
 84 |                 memories = []
 85 |                 for ii in frame_info["memories"]:
 86 |                     try:
 87 |                         ii_int = int(ii)
 88 |                         memories.append(ii)
 89 |                     except:
 90 |                         pass
 91 |                 frame_info["memories"] = memories
 92 | 
 93 |         elif prediction_type == "response":
 94 |             response_str = line.split("<EOAR>")[-1].strip()
 95 |             if dialog_id not in responses_json:
 96 |                 responses_json[dialog_id] = {
 97 |                     "dialog_id": dialog_id,
 98 |                     "predictions": [],
 99 |                 }
100 |             responses_json[dialog_id]["predictions"].append(
101 |                 {
102 |                     "turn_id": turn_id,
103 |                     "response": response_str,
104 |                 }
105 |             )
106 | 
107 |         else:
108 |             raise ValueError(f"Invalid prediction_type: {prediction_type}!")
109 |     responses_json = list(responses_json.values())
110 | 
111 |     num_missing = 0
112 |     num_present = 0
113 |     dst_json = copy.deepcopy(original_data)
114 |     for dialog_datum in dst_json["dialogue_data"]:
115 |         del dialog_datum["mentioned_memory_ids"]
116 |         del dialog_datum["memory_graph_id"]
117 |         dialog_id = dialog_datum["dialogue_idx"]
118 |         for datum in dialog_datum["dialogue"]:
119 |             del datum["transcript_annotated"]
120 |             turn_id = datum["turn_idx"]
121 |             index = (dialog_id, turn_id)
122 |             if index in dst_pool:
123 |                 datum["transcript_annotated"] = dst_pool[index]
124 |                 num_present += 1
125 |             else:
126 |                 print(f"Missing! -- {index}")
127 |                 num_missing += 1
128 |     print(f"Missing: {num_missing} Present: {num_present}")
129 |     return dst_json, responses_json
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     # Parse input args
134 |     parser = argparse.ArgumentParser()
135 |     parser.add_argument(
136 |         "--input_target_json", required=True, help="Path to target JSON file"
137 |     )
138 |     parser.add_argument(
139 |         "--input_dialog_ids",
140 |         required=True,
141 |         help="Path for dialog, turn ids for input (.txt)",
142 |     )
143 |     parser.add_argument(
144 |         "--input_path_predicted",
145 |         required=True,
146 |         help="path for model prediction output, line-separated format (.txt)",
147 |     )
148 |     parser.add_argument(
149 |         "--output_path_report",
150 |         required=True,
151 |         help="Path to save evaluation summary (dst and response) (.json)",
152 |     )
153 |     args = parser.parse_args()
154 | 
155 |     input_path_predicted = args.input_path_predicted
156 |     output_path_report = args.output_path_report
157 |     # Read the input target JSON file.
158 |     with open(args.input_target_json, "r") as file_id:
159 |         original_data = json.load(file_id)
160 | 
161 |     # Read the dialog and turn ids.
162 |     with open(args.input_dialog_ids, "r") as file_id:
163 |         turn_info = [ast.literal_eval(ii.strip("\n")) for ii in file_id.readlines()]
164 |     # Convert the data from the GPT-2 friendly format to JSON formats.
165 |     dst_json, responses_json = parse_results_from_file(
166 |         input_path_predicted, turn_info, original_data
167 |     )
168 | 
169 |     # Saving both the DST and response JSON.
170 |     dst_json_path = args.output_path_report.replace(".json", "_dst_results.json")
171 |     print(f"Saving DST results: {dst_json_path}")
172 |     with open(dst_json_path, "w") as file_id:
173 |         json.dump(dst_json, file_id)
174 |     responses_json_path = args.output_path_report.replace(
175 |         ".json", "_response_results.json"
176 |     )
177 |     print(f"Saving responses: {responses_json_path}")
178 |     with open(responses_json_path, "w") as file_id:
179 |         json.dump(responses_json, file_id)
180 | 


--------------------------------------------------------------------------------
/dialog_simulator/merge_synth_and_appen.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | """
  6 |     Description: merges the synthetically generated dialogs (.json, .p)
  7 |     and the tab-separated Appen annotations (.txt)
  8 |     to putput the merged dialogs in both .json and .p formats
  9 | """
 10 | import os
 11 | import json
 12 | import csv
 13 | import random
 14 | import pickle
 15 | from utils import load_data_pickle
 16 | 
 17 | 
 18 | if __name__ == "__main__":
 19 |     # Parameters for generation
 20 |     path_tuples = [
 21 |         # Pilot 1: 50 dialogs
 22 |         # [
 23 |         #    '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_1_mem_dials.p',
 24 |         #    '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv',
 25 |         #    '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_1_mem_dials_merged.json',
 26 |         #    '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_1_mem_dials_merged.p',
 27 |         # ],
 28 |         # Pilot 2: 450 dialogs
 29 |         [
 30 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_2_mem_dials.p",
 31 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv",
 32 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_2_mem_dials_merged.json",
 33 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/pilot_2_mem_dials_merged.p",
 34 |         ],
 35 |         # Batch 1: 2000 dialogs
 36 |         [
 37 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_1_mem_dials.p",
 38 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv",
 39 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_1_mem_dials_merged.json",
 40 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_1_mem_dials_merged.p",
 41 |         ],
 42 |         # Batch 2: 500 dialogs
 43 |         [
 44 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_2_mem_dials.p",
 45 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv",
 46 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_2_mem_dials_merged.json",
 47 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_2_mem_dials_merged.p",
 48 |         ],
 49 |         # Batch 3: 2000 dialogs
 50 |         [
 51 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_3_mem_dials.p",
 52 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv",
 53 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_3_mem_dials_merged.json",
 54 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_3_mem_dials_merged.p",
 55 |         ],
 56 |         # Batch 4: 6000 dialogs
 57 |         [
 58 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials.p",
 59 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/paraphrased_0622.csv",
 60 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials_merged.json",
 61 |             "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials_merged.p",
 62 |         ],
 63 |     ]
 64 | 
 65 |     for path_tuple in path_tuples:
 66 |         path_in_synth = path_tuple[0]
 67 |         path_in_appen = path_tuple[1]
 68 |         path_out_json = path_tuple[2]
 69 |         path_out_pickle = path_tuple[3]
 70 | 
 71 |         # Load original synth
 72 |         original_dialogs = load_data_pickle(path_in_synth)
 73 |         mm_dialogs = []
 74 | 
 75 |         # Load paraphrased
 76 |         fieldname_to_turn_idx = {
 77 |             "turn0_paraphrase": 0,
 78 |             "turn1_paraphrase": 1,
 79 |             "turn2_paraphrase": 2,
 80 |             "turn3_paraphrase": 3,
 81 |             "turn4_paraphrase": 4,
 82 |             "turn5_paraphrase": 5,
 83 |             "turn6_paraphrase": 6,
 84 |             "turn7_paraphrase": 7,
 85 |             "turn8_paraphrase": 8,
 86 |             "turn9_paraphrase": 9,
 87 |             "turn10_paraphrase": 10,
 88 |             "turn11_paraphrase": 11,
 89 |             "turn12_paraphrase": 12,
 90 |             "turn13_paraphrase": 13,
 91 |             "turn14_paraphrase": 14,
 92 |             "turn15_paraphrase": 15,
 93 |             "turn16_paraphrase": 16,
 94 |             "turn17_paraphrase": 17,
 95 |             "turn18_paraphrase": 18,
 96 |             "turn19_paraphrase": 19,
 97 |             "turn20_paraphrase": 20,
 98 |             "turn21_paraphrase": 21,
 99 |             "turn22_paraphrase": 22,
100 |             "turn23_paraphrase": 23,
101 |         }
102 |         COL_DIALOG_ID = 88
103 | 
104 |         turn_idx_to_col = {}
105 |         dialog_id_to_utter = {}
106 | 
107 |         with open(path_in_appen, "r", encoding="mac_roman") as f:
108 |             reader = csv.reader(f, delimiter=",", quotechar='"')
109 |             for i, line in enumerate(reader):
110 |                 if i == 0:
111 |                     for col_id, fieldname in enumerate(line):
112 | 
113 |                         if fieldname in fieldname_to_turn_idx:
114 |                             turn_idx = fieldname_to_turn_idx[fieldname]
115 |                             turn_idx_to_col[turn_idx] = col_id
116 | 
117 |                 else:
118 |                     dialog_id = int(line[COL_DIALOG_ID])
119 |                     dialog_id_to_utter[dialog_id] = []
120 | 
121 |                     for turn_idx in range(len(turn_idx_to_col)):
122 |                         if turn_idx in turn_idx_to_col:
123 | 
124 |                             utter = line[turn_idx_to_col[turn_idx]]
125 |                             utter = utter.strip()
126 | 
127 |                             if utter != "":
128 |                                 dialog_id_to_utter[dialog_id].append(utter)
129 | 
130 |                             else:
131 |                                 if turn_idx < 16:
132 |                                     print(
133 |                                         "Check dialog id %d, turn %d"
134 |                                         % (dialog_id, turn_idx)
135 |                                     )
136 | 
137 |         # Merge
138 |         for i, mm_d in enumerate(original_dialogs):
139 |             d = mm_d.dialog
140 |             dialog_id = d.idx
141 | 
142 |             if dialog_id not in dialog_id_to_utter:
143 |                 print("Dialog %d is missing." % dialog_id)
144 |                 continue
145 | 
146 |             mm_dialogs.append(mm_d)
147 |             n_rounds = int(len(dialog_id_to_utter[dialog_id]) / 2)
148 | 
149 |             # TODO: discarding the utterances with missing paraphrases for now
150 |             # Causes: residuals & incompletes from annotations, etc.
151 |             mm_dialogs[-1].dialog.user_turns = mm_dialogs[-1].dialog.user_turns[
152 |                 :n_rounds
153 |             ]
154 |             mm_dialogs[-1].dialog.asst_turns = mm_dialogs[-1].dialog.asst_turns[
155 |                 :n_rounds
156 |             ]
157 | 
158 |             for j in range(n_rounds):
159 | 
160 |                 try:
161 |                     user_turn = d.user_turns[j]
162 |                     asst_turn = d.asst_turns[j]
163 | 
164 |                     user_turn_idx = j * 2
165 |                     asst_turn_idx = j * 2 + 1
166 | 
167 |                     user_paraphrase = dialog_id_to_utter[dialog_id][user_turn_idx]
168 |                     asst_paraphrase = dialog_id_to_utter[dialog_id][asst_turn_idx]
169 | 
170 |                     mm_dialogs[-1].dialog.user_turns[j].frames[
171 |                         -1
172 |                     ].uttr = user_paraphrase
173 |                     mm_dialogs[-1].dialog.asst_turns[j].frames[
174 |                         -1
175 |                     ].uttr = asst_paraphrase
176 | 
177 |                 except:
178 |                     print("Missing rounds %d from dialog %d" % (j, dialog_id))
179 |                     print(len(dialog_id_to_utter[dialog_id]))
180 |                     print(len(d.user_turns))
181 | 
182 |         # Output
183 |         print("Outputting JSON file at %s..." % path_out_json)
184 |         json.dump(
185 |             {"dialogue_data": [mm_d.to_dict() for mm_d in mm_dialogs]},
186 |             open(path_out_json, "w"),
187 |             indent=4,
188 |         )
189 | 
190 |         pickle.dump(mm_dialogs, open(path_out_pickle, "wb"))
191 | 


--------------------------------------------------------------------------------
/dialog_simulator/MemoryDialogSimulator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | import json, random, traceback, os
  6 | from typing import List, Tuple
  7 | from constants import TurnSpeaker, DialogAct, API_STATUS
  8 | from Data import Dialog, MemoryDialog, MemoryGraph, Turn, Goal
  9 | from UserSimulator import PilotUserSimulator
 10 | from AssistantSimulator import PilotAssistantSimulator
 11 | from GoalGenerator import RuleBasedGoalGenerator
 12 | from MemoryServiceAPI import MemoryServiceAPI
 13 | from utils import build_parameter_ontology
 14 | 
 15 | random.seed(0)
 16 | 
 17 | 
 18 | class MemoryDialogSimulator:
 19 |     def __init__(self, *args, **kwargs):
 20 |         # Initialize user simulator, assistant simulator, memory_graphs etc.
 21 |         self.domain = kwargs.pop("domain")
 22 |         self._memory_service_api = kwargs.pop("memory_service_api", MemoryServiceAPI())
 23 |         self._user_simulator = kwargs.pop("user_simulator", PilotUserSimulator())
 24 |         self._assistant_simulator = kwargs.pop(
 25 |             "assistant_simulator", PilotAssistantSimulator()
 26 |         )
 27 |         self._goal_generator = kwargs.pop(
 28 |             "goal_generator", RuleBasedGoalGenerator(domain=self.domain)
 29 |         )
 30 |         self._memory_graph_bank = kwargs.pop("memory_graph_bank", {})
 31 | 
 32 |         self._user_simulator.register_memory_service_api(self._memory_service_api)
 33 |         self._assistant_simulator.register_memory_service_api(self._memory_service_api)
 34 | 
 35 |     def set_user_simulator(self, user_simulator):
 36 |         self._user_simulator = user_simulator
 37 | 
 38 |     def set_assistant_simulator(self, assistant_simulator):
 39 |         self._assistant_simulator = assistant_simulator
 40 | 
 41 |     def set_goal_generator(self, goal_generator):
 42 |         self._goal_generator = goal_generator
 43 | 
 44 |     def set_memory_service_api(self, memory_service_api):
 45 |         self._memory_service_api = memory_service_api
 46 | 
 47 |     def sample_goals(self, memory_graph, goal_config) -> List[Goal]:
 48 |         return self._goal_generator.sample_goals(
 49 |             memory_graph=memory_graph, goal_config=goal_config
 50 |         )
 51 | 
 52 |     def sample_memory_graph(self) -> MemoryGraph:
 53 |         if self._memory_graph_bank == {}:
 54 |             # Empty memory graph
 55 |             return MemoryGraph()
 56 | 
 57 |         # Randomly sample a memory
 58 |         # TODO: allow for more organized way of sampling memories
 59 |         memory_graph_id = random.choice(list(self._memory_graph_bank.keys()))
 60 |         memory_graph = self._memory_graph_bank[memory_graph_id]
 61 | 
 62 |         return MemoryGraph(data=memory_graph)
 63 | 
 64 |     def batch_generate_dialog_flows(
 65 |         self,
 66 |         n_dialogs: int,
 67 |         n_max_turns: int,
 68 |         start_dialog_idx: int,
 69 |         goal_config: dict = {},
 70 |     ) -> List[MemoryGraph]:
 71 | 
 72 |         # Batch generate multiple dialogs using the same simulators
 73 |         memory_dialogs = []
 74 | 
 75 |         for i in range(n_dialogs):
 76 |             # Continue until generation is successful
 77 |             generation_success = False
 78 | 
 79 |             while not generation_success:
 80 |                 try:
 81 |                     # Sample a memory graph (user)
 82 |                     memory_graph = self.sample_memory_graph()
 83 | 
 84 |                     # Create an empty memory dialog
 85 |                     memory_dialog = MemoryDialog(memory_graph=memory_graph)
 86 | 
 87 |                     # Generate Goal Config
 88 |                     goal_config["parameter_ontology"] = build_parameter_ontology(
 89 |                         memory_dialog.memory_graph,
 90 |                         self._memory_service_api.metadata,
 91 |                         self.domain,
 92 |                     )
 93 | 
 94 |                     # Sample goals for this dialog
 95 |                     goals = self.sample_goals(
 96 |                         memory_graph=memory_dialog.memory_graph, goal_config=goal_config
 97 |                     )
 98 | 
 99 |                     # Generate dialog flow
100 |                     memory_dialog = self.generate_dialog_flow(
101 |                         goals, memory_dialog, n_max_turns
102 |                     )
103 |                     memory_dialog.dialog.idx = start_dialog_idx + i
104 | 
105 |                     # If everything is successful, append to memory_dialogs
106 |                     generation_success = True
107 |                     memory_dialogs.append(memory_dialog)
108 | 
109 |                 except:
110 |                     # TODO: Make a more robust abort strategy
111 |                     print("** Error in generating dialog. Ignoring this one. **")
112 |                     traceback.print_exc()
113 |                     print()
114 | 
115 |         return memory_dialogs
116 | 
117 |     def generate_dialog_flow(
118 |         self,
119 |         goals: List[Goal],
120 |         memory_dialog: MemoryDialog,
121 |         n_max_turns: int,
122 |         initialize=True,
123 |     ) -> MemoryDialog:
124 | 
125 |         if initialize:
126 |             # Initialize memory_dialog
127 |             memory_dialog.initialize()
128 | 
129 |         # Iterate and generate a dialog turn by turn
130 |         i = 0
131 |         while not goals == [] and i < n_max_turns:
132 | 
133 |             # Pick a goal
134 |             current_goal = goals.pop(0)
135 |             goal_met = False
136 |             print("Goal:", current_goal)
137 | 
138 |             while not goal_met and i < n_max_turns:
139 | 
140 |                 # Generate a turn
141 |                 memory_dialog = self.generate_turn(current_goal, memory_dialog)
142 | 
143 |                 # End of a turn: update dialog & goals
144 |                 i += 1
145 |                 goal_met = memory_dialog.is_goal_met(current_goal)
146 | 
147 |         is_valid_dialog = self.validate_dialog(memory_dialog)
148 |         if not is_valid_dialog:
149 |             # If something is not right about this dialog, abort.
150 |             # TODO: abort gracefully
151 |             assert False
152 | 
153 |         return memory_dialog
154 | 
155 |     def generate_turn(self, goal: Goal, memory_dialog: MemoryDialog) -> MemoryDialog:
156 | 
157 |         # TODO: extend it for multiple frames per turn
158 | 
159 |         # (1) Generate a User turn, given a target goal and a memory_dialog
160 |         # Generate dialog act and slots
161 |         user_frame = self._user_simulator.execute_turn(goal, memory_dialog)
162 | 
163 |         # Template based utterance generation
164 |         user_frame = self._user_simulator.generate_uttr(user_frame, goal)
165 | 
166 |         # Instantiate a user turn, and update the memory_dialog
167 |         user_turn = Turn([user_frame], TurnSpeaker.USER, goal)
168 |         memory_dialog.dialog.add_user_turn(user_turn)
169 |         print("U:", user_turn)
170 | 
171 |         # (2) Generate a Assistant turn, given a target goal and a memory_dialog
172 |         # Generate dialog act and slots
173 |         asst_frame, api_request, api_result = self._assistant_simulator.execute_turn(
174 |             goal, memory_dialog
175 |         )
176 | 
177 |         # Template based utterance generation
178 |         asst_frame = self._assistant_simulator.generate_uttr(asst_frame, goal)
179 | 
180 |         # Instantiate a user turn, and update the memory_dialog
181 |         asst_turn = Turn([asst_frame], TurnSpeaker.ASSISTANT, goal)
182 |         memory_dialog.dialog.add_asst_turn(asst_turn)
183 |         print("A:", asst_turn)
184 | 
185 |         # Add goals and api_calls
186 |         memory_dialog.dialog.add_goal(goal)
187 |         memory_dialog.dialog.add_api_call(api_request)
188 |         memory_dialog.dialog.add_api_result(api_result)
189 | 
190 |         return memory_dialog
191 | 
192 |     def validate_dialog(self, memory_dialog: MemoryDialog) -> bool:
193 |         # Check for any undesirable traits of a dialog
194 |         n_turns = len(memory_dialog.dialog.asst_turns)
195 | 
196 |         # (1) Multiple sharing of the same memory
197 |         set_shared_memory_ids = set()
198 |         for user_turn in memory_dialog.dialog.user_turns:
199 |             # TODO: Handle multiple frames per turn
200 |             dialog_act = user_turn.frames[-1].dialog_act
201 | 
202 |             if dialog_act == DialogAct.REQUEST_SHARE:
203 |                 memories_to_share = user_turn.frames[-1].act_attributes.memories
204 |                 for m in memories_to_share:
205 |                     memory_id = m.data["memory_id"]
206 |                     if memory_id in set_shared_memory_ids:
207 |                         # If this memory_id is already shared, abort
208 |                         return False
209 |                     set_shared_memory_ids.add(memory_id)
210 | 
211 |         # (2) Too frequent search fails
212 |         n_search_fails = 0
213 |         for api_result in memory_dialog.dialog.api_results:
214 |             status = api_result.status
215 |             if status == API_STATUS.SEARCH_NOT_FOUND:
216 |                 n_search_fails += 1
217 | 
218 |             if (n_turns <= 4 and n_search_fails >= 2) or (
219 |                 n_turns > 4 and n_search_fails >= 3
220 |             ):
221 |                 return False
222 | 
223 |         # Otherwise, this dialog is good.
224 |         return True
225 | 


--------------------------------------------------------------------------------
/dialog_simulator/GoalGenerator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | import random
  6 | from constants import (
  7 |     GoalType,
  8 |     GoalMemoryRefType,
  9 |     numeric_slots,
 10 |     non_visual_slots,
 11 |     visual_slots,
 12 |     all_slots,
 13 | )
 14 | from Data import Goal, GoalParameter, MemoryTime
 15 | from utils import weighted_choice
 16 | import copy
 17 | 
 18 | random.seed(0)
 19 | 
 20 | 
 21 | class RuleBasedGoalGenerator:
 22 |     def __init__(self, *args, **kwargs):
 23 |         self.non_visual_slots = non_visual_slots
 24 |         self.visual_slots = visual_slots
 25 |         self.all_slots = all_slots
 26 | 
 27 |     def sample_goals(self, *args, **kwargs):
 28 |         memory_graph = kwargs.pop("memory_graph", None)
 29 |         goal_config = kwargs.pop("goal_config", {})
 30 |         n_min_goals = goal_config.get("n_min_goals", 3)
 31 |         n_max_goals = goal_config.get("n_max_goals", 5)
 32 |         n_goals = random.randint(n_min_goals, n_max_goals)
 33 | 
 34 |         goal_type_list = [
 35 |             GoalType.SEARCH,
 36 |             GoalType.REFINE_SEARCH,
 37 |             GoalType.GET_RELATED,
 38 |             GoalType.GET_INFO,
 39 |             GoalType.GET_AGGREGATED_INFO,
 40 |             GoalType.SHARE,
 41 |             GoalType.CHITCHAT,
 42 |         ]
 43 |         goal_type_list_weights_start = [
 44 |             1,
 45 |             0,
 46 |             0,
 47 |             0,
 48 |             0,
 49 |             0,
 50 |             0,
 51 |             # 1, 0, 0, 0, 1, 0, 0,
 52 |         ]
 53 | 
 54 |         goal_type_list_weights_mid = [
 55 |             0.8,
 56 |             1.1,
 57 |             1.7,
 58 |             1.1,
 59 |             0,
 60 |             0.1,
 61 |             0,
 62 |             # 1, 0.8, 0.8, 1, 1, 0.5, 0.5,
 63 |         ]
 64 | 
 65 |         goal_type_list_weights_end = [
 66 |             0.3,
 67 |             0.5,
 68 |             0.6,
 69 |             0.5,
 70 |             0,
 71 |             3,
 72 |             0,
 73 |             # 0.5, 0.5, 0.5, 0.5, 0.5, 3, 1,
 74 |         ]
 75 | 
 76 |         # Randomly sample from the goal type list
 77 |         # For now, we enforce the goals to start with BROWSE
 78 |         # and end with ADD_TO_CART
 79 |         # TODO: allow for a more flexible way of generating
 80 |         # goal types
 81 |         goal_types = (
 82 |             random.choices(
 83 |                 population=goal_type_list, weights=goal_type_list_weights_start, k=1
 84 |             )
 85 |             + random.choices(
 86 |                 population=goal_type_list,
 87 |                 weights=goal_type_list_weights_mid,
 88 |                 k=n_goals - 2,
 89 |             )
 90 |             + random.choices(
 91 |                 population=goal_type_list, weights=goal_type_list_weights_end, k=1
 92 |             )
 93 |         )
 94 | 
 95 |         # Make a complete goal with an accompanying set of goal parameters
 96 |         # for each goal_type
 97 |         goals = []
 98 |         for goal_type in goal_types:
 99 |             # For now, we pass in a random set of goal_parameters
100 |             goal_parameters = self.sample_goal_parameters(
101 |                 goal_type, memory_graph, goal_config
102 |             )
103 |             goals.append(Goal(goal_type=goal_type, goal_parameters=goal_parameters))
104 | 
105 |         return goals
106 | 
107 |     def sample_goal_parameters(self, goal_type, memory_graph, goal_config):
108 |         # Sample goal parameters according to the input sample
109 | 
110 |         # TODO: IMPLEMENT **
111 |         goal_parameters = []
112 |         parameter_ontology = goal_config["parameter_ontology"]
113 | 
114 |         # (1) Pick a search filter
115 |         search_filter = {}
116 | 
117 |         if goal_type in set(
118 |             [GoalType.SEARCH, GoalType.REFINE_SEARCH, GoalType.GET_RELATED]
119 |         ):
120 | 
121 |             if goal_type == GoalType.GET_RELATED:
122 |                 n_slots = weighted_choice(population=[1, 2], weights=[0.93, 0.07])
123 |             else:
124 |                 n_slots = weighted_choice(population=[1, 2], weights=[0.75, 0.25])
125 | 
126 |             # Candidate slots: exclude a few slots that
127 |             # are semantically infeasible
128 |             # **** TODO ****: confirm that there is no slot to exclude
129 |             candidate_slots = self.all_slots - set([""])
130 | 
131 |             search_filter_slots = random.choices(
132 |                 population=list(candidate_slots), k=n_slots
133 |             )
134 | 
135 |             for search_filter_slot in search_filter_slots:
136 |                 # We first randomly assign a value for a randomly selected slot
137 |                 if search_filter_slot == "time":
138 |                     # Instead of choosing a specific datetime,
139 |                     # search by year or month instead.
140 |                     random_datetime = MemoryTime(
141 |                         str_datetime=random.choice(
142 |                             parameter_ontology["all"].get(search_filter_slot)
143 |                         )
144 |                     )
145 | 
146 |                     if random.random() > 0.1:
147 |                         search_filter_value = str(MemoryTime(year=random_datetime.year))
148 | 
149 |                     else:
150 |                         search_filter_value = str(
151 |                             MemoryTime(
152 |                                 year=random_datetime.year, month=random_datetime.month
153 |                             )
154 |                         )
155 | 
156 |                     if goal_type == GoalType.GET_RELATED:
157 |                         # A special value for refine_search: 'next' and 'prev'
158 |                         # e.g. "where did we go next?"
159 |                         if random.random() > 0.3:
160 |                             search_filter_value = random.choice(
161 |                                 ["right after", "right before", "on the same day"]
162 |                             )
163 | 
164 |                 elif search_filter_slot == "location":
165 |                     # TODO: Instead of choosing a specific location,
166 |                     # occasionally search with a coarser query.
167 |                     search_filter_value = random.choice(
168 |                         parameter_ontology["all"].get(search_filter_slot)
169 |                     )
170 | 
171 |                     if random.random() > 0.7:
172 |                         search_filter_value = copy.deepcopy(search_filter_value)
173 |                         search_filter_value["geo_tag"].get("place")
174 | 
175 |                 else:
176 |                     # TODO: handle subsampling of participants & activities
177 |                     search_filter_value = random.choice(
178 |                         parameter_ontology["all"].get(search_filter_slot)
179 |                     )
180 | 
181 |                 if search_filter_value != "":
182 |                     search_filter[search_filter_slot] = search_filter_value
183 | 
184 |         # (2) Pick an object reference type
185 |         object_reference_type = GoalMemoryRefType.NOT_SPECIFIED
186 | 
187 |         if goal_type in set([GoalType.GET_RELATED, GoalType.GET_INFO, GoalType.SHARE]):
188 | 
189 |             object_reference_type = weighted_choice(
190 |                 population=[
191 |                     GoalMemoryRefType.PREV_TURN,
192 |                     GoalMemoryRefType.DIALOG,
193 |                     GoalMemoryRefType.GRAPH,
194 |                 ],
195 |                 weights=[0.8, 0.2, 0.0],
196 |             )
197 | 
198 |         # (3) Pick slots to request (e.g. in questions)
199 |         request_slots = []
200 | 
201 |         if goal_type in set([GoalType.GET_INFO]):
202 |             # We randomly sample slots to ask
203 |             # ****** TODO *******: make sure it's not asking about
204 |             # the parameters that were already in search filter
205 | 
206 |             ask_from_visual_slot = random.random() > 0.9
207 | 
208 |             if ask_from_visual_slot:
209 |                 # ask about visual_slots (rare): people, activity
210 |                 n_request_slots = 1
211 |                 request_slots.extend(
212 |                     random.sample(self.non_visual_slots, n_request_slots)
213 |                 )
214 | 
215 |             else:
216 |                 # ask about non_visual_slots: time, location
217 |                 n_request_slots = weighted_choice(population=[1, 2], weights=[0.8, 0.2])
218 |                 request_slots.extend(
219 |                     random.sample(self.non_visual_slots, n_request_slots)
220 |                 )
221 | 
222 |         elif goal_type in set([GoalType.GET_RELATED]):
223 |             # We randomly sample slots to ask
224 |             # iff search_filter is empty
225 |             if len(search_filter) == 0:
226 |                 n_request_slots = weighted_choice(population=[0, 1], weights=[0.4, 0.6])
227 |                 request_slots.extend(random.sample(self.all_slots, n_request_slots))
228 | 
229 |         elif goal_type in set([GoalType.GET_AGGREGATED_INFO]):
230 |             # ****** TODO *******
231 |             pass
232 | 
233 |         # (4) Compile it into a goal parameter
234 |         goal_parameter = GoalParameter(
235 |             filter=search_filter,
236 |             reference_type=object_reference_type,
237 |             request_slots=request_slots,
238 |         )
239 |         goal_parameters.append(goal_parameter)
240 | 
241 |         return goal_parameters
242 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/dataset_memory.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | # coding: utf-8
  5 | """Dataset Loader for Memory Dialogs.
  6 | 
  7 | Author(s): noctli, skottur
  8 | (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
  9 | """
 10 | 
 11 | import json
 12 | import logging
 13 | import os
 14 | import pickle
 15 | import re
 16 | from itertools import chain
 17 | 
 18 | import numpy as np
 19 | import torch
 20 | import torch.utils.data
 21 | import tqdm
 22 | 
 23 | from dataset import tokenize
 24 | from torch.utils.data import Dataset
 25 | 
 26 | 
 27 | # from train import SPECIAL_TOKENS, MODEL_INPUTS, PADDED_INPUTS
 28 | # SPECIAL_TOKENS = ["<bos>", "<eos>", "<user>", "<system>", "<video>", "<pad>"]
 29 | # SPECIAL_TOKENS_DICT = {
 30 | #     "bos_token": "<bos>",
 31 | #     "eos_token": "<eos>",
 32 | #     "additional_special_tokens": ["<user>", "<system>", "<video>", "<cap>"],
 33 | #     "pad_token": "<pad>",
 34 | # }
 35 | MODEL_INPUTS = ["input_ids", "token_type_ids", "lm_labels"]
 36 | PADDED_INPUTS = ["input_ids", "token_type_ids", "lm_labels"]
 37 | MEMORY_BREAK = "<MM_BREAK>"
 38 | ANCHOR_TOKENS = ["<USER>", "<SYSTEM>", "<MM>", "<SOAC>", "<SOAR>", "<SOR>"]
 39 | 
 40 | 
 41 | def get_dataset(tokenizer, data_file, feature_path=None, feature_width=None):
 42 |     """Get dataset given tokenizer and data file."""
 43 |     with open(data_file, "r") as file_id:
 44 |         instance_data = json.load(file_id)
 45 | 
 46 |     # Read the features from the folder.
 47 |     if feature_path is not None:
 48 |         feature_map = {}
 49 |         feature_type = None
 50 |         listings = [ii for ii in os.listdir(feature_path) if ".npy" in ii]
 51 |         for file_name in listings:
 52 |             search_slots = re.findall(r"mscoco_([^_]*)_([\d]*).npy", file_name)
 53 |             extracted_type, memory_id = search_slots[0]
 54 |             if not feature_type:
 55 |                 feature_type = extracted_type
 56 |             else:
 57 |                 assert feature_type == extracted_type, (
 58 |                     f"Mismatch feature type: {feature_type} != {extracted_type}"
 59 |                 )
 60 |             file_path = os.path.join(feature_path, file_name)
 61 |             feature_map[memory_id] = file_path
 62 |     else:
 63 |         feature_map = None
 64 |         feature_type = None
 65 | 
 66 |     # instance_data = instance_data[:10]
 67 |     for datum in tqdm.tqdm(instance_data, desc="Preparing dataset"):
 68 |         context = datum["predict"]
 69 |         target = datum["target"]
 70 |         # Identify memory features (if any) in the context.
 71 |         # NOTE: Make this cleaner, slightly adhoc at the moment.
 72 |         split_str = context.split(MEMORY_BREAK)
 73 |         memory_ids = []
 74 |         for ii in split_str[:-1]:
 75 |             memory_ids.append(int(ii.rsplit(" ", 1)[-1]))
 76 |         assert len(memory_ids) + 1 == len(split_str), "Invalid MM breaks!"
 77 |         # Alternatively zip the two lists.
 78 |         zipped_context = [None for _ in range(len(memory_ids) + len(split_str))]
 79 |         zipped_context[::2] = split_str
 80 |         zipped_context[1::2] = [
 81 |             {
 82 |                 "memory_id": ii,
 83 |                 "memory_feature_path": os.path.join(
 84 |                     feature_path, f"mscoco_{feature_type}_{ii}.npy"
 85 |                 ),
 86 |             }
 87 |             for ii in memory_ids
 88 |         ]
 89 | 
 90 |         # Extract the token types.
 91 |         zipped_token_type_ids = []
 92 |         zipped_context_tokens = []
 93 |         current_type = None
 94 |         for context_part in zipped_context:
 95 |             if not isinstance(context_part, dict):
 96 |                 tokenized_substr, substr_type_ids, current_type = tokenize_by_type(
 97 |                     context_part, tokenizer, current_type
 98 |                 )
 99 |                 assert len(tokenized_substr) == len(
100 |                     substr_type_ids
101 |                 ), "String tokens and token ids should be of same length!"
102 |                 zipped_context_tokens.append(tokenized_substr)
103 |                 zipped_token_type_ids.extend(substr_type_ids)
104 |             else:
105 |                 assert "memory_id" in context_part, "Not a memory!"
106 |                 if feature_path:
107 |                     zipped_token_type_ids.extend(
108 |                         [tokenizer.convert_tokens_to_ids("<MM>")] * feature_width
109 |                     )
110 |                 zipped_context_tokens.append(context_part)
111 |         datum["context_tokens"] = zipped_context_tokens
112 |         datum["context_token_types"] = zipped_token_type_ids
113 | 
114 |         assert MEMORY_BREAK not in target, "Target cannot have multimodal entries!"
115 |         datum["target_tokens"] = tokenize(target, tokenizer)
116 |         if datum["type"] == "API":
117 |             target_token_type_ids = [tokenizer.convert_tokens_to_ids("<SOAC>")] * len(
118 |                 datum["target_tokens"]
119 |             )
120 |         else:
121 |             target_token_type_ids = [tokenizer.convert_tokens_to_ids("<SOR>")] * len(
122 |                 datum["target_tokens"]
123 |             )
124 |         datum["target_token_types"] = target_token_type_ids
125 | 
126 |         # Get input tokens by merging the two.
127 |         input_tokens, input_token_types, lm_labels = merge_context_target_tokens(datum)
128 |         datum["input_tokens"] = input_tokens
129 |         datum["input_token_types"] = input_token_types
130 |         datum["lm_labels"] = lm_labels
131 |     return instance_data, feature_map
132 | 
133 | 
134 | def merge_context_target_tokens(datum):
135 |     """Merge context and target tokens."""
136 |     input_tokens = datum["context_tokens"] + [datum["target_tokens"]]
137 |     input_token_types = datum["context_token_types"] + datum["target_token_types"]
138 |     lm_labels = [-1] * len(datum["context_token_types"]) + datum["target_tokens"]
139 |     return input_tokens, input_token_types, lm_labels
140 | 
141 | 
142 | def tokenize_by_type(string, tokenizer, start_type=None):
143 |     # Raw tokenization.
144 |     tokens = string.split(" ")
145 |     current_type = start_type
146 |     start_index = 0
147 |     token_splits = []
148 |     for index, token in enumerate(tokens):
149 |         if token in ANCHOR_TOKENS:
150 |             # First discovered token type, do nothing.
151 |             if current_type is not None:
152 |                 reconstructed_str = " ".join(tokens[start_index:index])
153 |                 token_splits.append((reconstructed_str, current_type))
154 |             start_index = index
155 |             current_type = token
156 |     # Repeat for the last section.
157 |     reconstructed_str = " ".join(tokens[start_index : index + 1])
158 |     token_splits.append((reconstructed_str, current_type))
159 | 
160 |     # Now tokenize the substrings.
161 |     tokenized_str = []
162 |     tokenized_type_ids = []
163 |     for substring, current_type in token_splits:
164 |         tokenized_substring = tokenize(substring, tokenizer)
165 |         tokenized_str.extend(tokenized_substring)
166 |         tokenized_type_ids.extend(
167 |             [
168 |                 tokenizer.convert_tokens_to_ids(current_type)
169 |                 for _ in range(len(tokenized_substring))
170 |             ]
171 |         )
172 |     return tokenized_str, tokenized_type_ids, current_type
173 | 
174 | 
175 | class MemoryDialogDataset(Dataset):
176 |     def __init__(self, dialogs, tokenizer, features=None, drop_rate=0.5, train=True):
177 |         self.dialogs = dialogs
178 |         self.features = features
179 |         self.tokenizer = tokenizer
180 |         self.drop_rate = drop_rate
181 |         self.train = train
182 | 
183 |     def __len__(self):
184 |         return len(self.dialogs)
185 | 
186 |     def __getitem__(self, index):
187 |         instance = self.dialogs[index]
188 |         input_ids = []
189 |         # TODO: Move this to initialization?
190 |         for ii in instance["input_tokens"]:
191 |             if isinstance(ii, list):
192 |                 input_ids.append(torch.Tensor(ii).long())
193 |             else:
194 |                 if self.features:
195 |                     memory_features = np.load(
196 |                         ii["memory_feature_path"], allow_pickle=True
197 |                     )[()]["features"]
198 |                     input_ids.append({"features": memory_features})
199 |         token_type_ids = torch.Tensor(instance["input_token_types"]).long()
200 |         lm_labels = torch.Tensor(instance["lm_labels"]).long()
201 |         return input_ids, token_type_ids, lm_labels
202 | 
203 | 
204 | def padding(seq, pad_token):
205 |     max_len = max([i.size(0) for i in seq])
206 |     input_mask = torch.zeros((len(seq), max_len)).long()
207 |     if len(seq[0].size()) == 1:
208 |         result = torch.ones((len(seq), max_len)).long() * pad_token
209 |     else:
210 |         result = torch.ones(
211 |             (len(seq), max_len, seq[0].size(-1)),
212 |             dtype=seq[0].dtype,
213 |             device=seq[0].device,
214 |         )
215 |     for i in range(len(seq)):
216 |         result[i, : seq[i].size(0)] = seq[i]
217 |         input_mask[i, : seq[i].size(0)] = 1.0
218 |     return result, input_mask
219 | 
220 | 
221 | def collate_fn(batch, pad_token, features=None):
222 |     input_ids_list, token_type_ids_list, lm_labels_list, i3d_list = [], [], [], []
223 |     for i in batch:
224 |         input_ids_list.append(i[0])
225 |         token_type_ids_list.append(i[1])
226 |         lm_labels_list.append(i[2])
227 | 
228 |     token_type_ids, input_mask = padding(token_type_ids_list, pad_token)
229 |     lm_labels, _ = padding(lm_labels_list, -1)
230 |     return input_ids_list, token_type_ids, lm_labels, input_mask
231 | 
232 | 
233 | def pad_dataset(dataset, padding=0):
234 |     """Pad the dataset.
235 |     This could be optimized by defining a Dataset class and pad only
236 |     batches but this is simpler.
237 |     """
238 |     max_l = max(len(x) for x in dataset["input_ids"])
239 |     for name in PADDED_INPUTS:
240 |         dataset[name] = [
241 |             x + [padding if name != "labels" else -1] * (max_l - len(x))
242 |             for x in dataset[name]
243 |         ]
244 |     return dataset
245 | 


--------------------------------------------------------------------------------
/dialog_simulator/MemoryDialogModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | from constants import API_CALL_TYPE, TurnSpeaker, DialogAct
  6 | from Data import Turn, Frame, ActAttributes, MemoryDialog, APIResponse, APIRequest
  7 | from typing import Dict, Tuple
  8 | import sys
  9 | 
 10 | sys.path.append("/Users/shanemoon/workspace/memory_dialog/models/")
 11 | from gpt2_dst.scripts.run_generation import generate_sequences
 12 | from gpt2_dst.utils.convert import (
 13 |     format_context,
 14 |     format_api_call,
 15 |     format_api_result,
 16 |     parse_flattened_result,
 17 |     TEMPLATE_PREDICT,
 18 |     TEMPLATE_PREDICT_RESPONSE,
 19 |     START_OF_API_CALL,
 20 |     END_OF_API_CALL,
 21 |     END_OF_API_RESULT,
 22 |     END_OF_SENTENCE,
 23 | )
 24 | from utils import resolve_sv_entities
 25 | 
 26 | 
 27 | class MemoryDialogModelBase:
 28 |     def __init__(self, *args, **kwargs):
 29 |         self.displayed_memories = []
 30 | 
 31 |     def predict_api_call(self, query: str, memory_dialog: MemoryDialog) -> Dict:
 32 | 
 33 |         return {
 34 |             "call_type": API_CALL_TYPE.UNDEFINED,
 35 |             "slot_values": {},
 36 |             "request_slots": [],
 37 |             "memories": [],  # <list> of <Memory> objects
 38 |         }
 39 | 
 40 |     def construct_api_request(
 41 |         self, query: str, memory_dialog: MemoryDialog
 42 |     ) -> Tuple[Turn, APIRequest]:
 43 | 
 44 |         # Predict / extract call_type and parameters from query
 45 |         predicted = self.predict_api_call(query, memory_dialog)
 46 | 
 47 |         # Cast user query into a Turn instance
 48 |         query_frame = Frame(
 49 |             uttr=query,
 50 |             dialog_act=predicted["dialog_act"],
 51 |             act_attributes=ActAttributes(
 52 |                 slot_values=predicted["slot_values"],
 53 |                 request_slots=predicted["request_slots"],
 54 |                 # <list> of <Memory> objects
 55 |                 memories=predicted["memories"],
 56 |             ),
 57 |         )
 58 | 
 59 |         # For now, we assume one frame per turn
 60 |         user_turn = Turn(frames=[query_frame], speaker=TurnSpeaker.USER, goal=None)
 61 | 
 62 |         # Gegenerate an API request from the predicted values
 63 |         str_call_type = predicted["call_type"]
 64 |         try:
 65 |             call_type = eval(str_call_type)
 66 |         except Exception:
 67 |             call_type = API_CALL_TYPE.UNDEFINED
 68 | 
 69 |         api_parameters = {
 70 |             "slot_values": predicted["slot_values"],
 71 |             "request_slots": predicted["request_slots"],
 72 |             "memories": predicted["memories"],  # <list> of <Memory> objects
 73 |             "n_max_results": 2,
 74 |         }
 75 | 
 76 |         # Call API
 77 |         api_request = APIRequest(
 78 |             call_type=call_type, parameters=api_parameters, memory_dialog=memory_dialog
 79 |         )
 80 | 
 81 |         return user_turn, api_request
 82 | 
 83 |     def update_display(self, api_response: APIResponse):
 84 | 
 85 |         if api_response.status is not None:
 86 |             retrieved_memories = (
 87 |                 api_response.to_dict().get("results", {}).get("retrieved_memories", [])
 88 |             )
 89 |             self.displayed_memories = retrieved_memories
 90 | 
 91 |     def predict_assistant_response(
 92 |         self,
 93 |         query: str,
 94 |         api_call: APIRequest,
 95 |         api_response: APIResponse,
 96 |         memory_dialog: MemoryDialog,
 97 |     ) -> Dict:
 98 | 
 99 |         return {
100 |             "uttr": "",
101 |             "dialog_act": DialogAct.UNKNOWN,
102 |             "slot_values": {},
103 |             "request_slots": [],
104 |             "memories": [],
105 |         }
106 | 
107 |     def construct_assistant_response(
108 |         self,
109 |         query: str,
110 |         api_call: APIRequest,
111 |         api_response: APIResponse,
112 |         memory_dialog: MemoryDialog,
113 |     ) -> Turn:
114 | 
115 |         predicted = self.predict_assistant_response(
116 |             query, api_call, api_response, memory_dialog
117 |         )
118 | 
119 |         response_frame = Frame(
120 |             uttr=predicted["uttr"],
121 |             dialog_act=predicted["dialog_act"],
122 |             act_attributes=ActAttributes(
123 |                 slot_values=predicted["slot_values"],
124 |                 slot_values_resolved={},
125 |                 request_slots=predicted["request_slots"],
126 |                 memories=predicted["memories"],
127 |             ),
128 |         )
129 | 
130 |         # For now, we assume one frame per turn
131 |         assistant_turn = Turn(
132 |             frames=[response_frame], speaker=TurnSpeaker.ASSISTANT, goal=None
133 |         )
134 | 
135 |         return assistant_turn
136 | 
137 | 
138 | class PilotMemoryDialogModel(MemoryDialogModelBase):
139 |     def __init__(self, *args, **kwargs):
140 |         super(PilotMemoryDialogModel, self).__init__(*args, **kwargs)
141 | 
142 |         self.model = kwargs.pop("model")
143 |         self.tokenizer = kwargs.pop("tokenizer")
144 |         self.length = kwargs.pop("length")
145 |         self.parameter_ontology = kwargs.pop("parameter_ontology")
146 | 
147 |         self.prev_asst_uttr = None
148 |         self.lst_context = []
149 |         self.turn_id = 0
150 | 
151 |     def predict_api_call(self, query: str, memory_dialog: MemoryDialog) -> Dict:
152 | 
153 |         # Form the prompt
154 |         to_predict = self.form_prompt_for_api_call(
155 |             self.lst_context, self.prev_asst_uttr, query
156 |         )
157 | 
158 |         # Generate the sequence
159 |         generated = generate_sequences(
160 |             self.model, self.tokenizer, to_predict, verbose=False
161 |         )[0]
162 | 
163 |         # Extract the api_call
164 |         parsed_api_call, _ = self.parse_assistant_response(generated)
165 | 
166 |         call_type = parsed_api_call.get("act", None)
167 |         slot_values = {k: v for k, v in parsed_api_call.get("slots", [])}
168 |         request_slots = parsed_api_call.get("request_slots", [])
169 |         memory_ids = parsed_api_call.get("memories", [])
170 |         memories = memory_dialog.memory_graph.get_memories_by_ids(memory_ids)
171 | 
172 |         # Entity Resolution for locations, etc.
173 |         slot_values = resolve_sv_entities(slot_values, self.parameter_ontology)
174 | 
175 |         # Form an API call
176 |         return {
177 |             "call_type": call_type,
178 |             "dialog_act": DialogAct.UNKNOWN,
179 |             "slot_values": slot_values,
180 |             "request_slots": request_slots,
181 |             "memories": memories,  # <list> of <Memory> objects
182 |         }
183 | 
184 |     def predict_assistant_response(
185 |         self,
186 |         query: str,
187 |         api_call: APIRequest,
188 |         api_response: APIResponse,
189 |         memory_dialog: MemoryDialog,
190 |     ) -> Dict:
191 | 
192 |         # Form the prompt
193 |         to_predict = self.form_prompt_for_response(
194 |             self.lst_context, self.prev_asst_uttr, query, api_call, api_response
195 |         )
196 | 
197 |         # Generate the sequence
198 |         generated = generate_sequences(
199 |             self.model, self.tokenizer, to_predict, verbose=False
200 |         )[0]
201 | 
202 |         _, response_text = self.parse_assistant_response(generated)
203 |         self.prev_asst_uttr = response_text
204 | 
205 |         if api_response.results is not None:
206 |             memories = api_response.results.get("retrieved_memories", [])
207 |         else:
208 |             memories = []
209 | 
210 |         return {
211 |             "uttr": response_text,
212 |             "dialog_act": DialogAct.UNKNOWN,
213 |             "slot_values": {},
214 |             "request_slots": [],
215 |             "memories": memories,  # <list> of <Memory> objects
216 |         }
217 | 
218 |     def form_prompt_for_api_call(
219 |         self, lst_context, prev_asst_uttr, user_uttr, len_context=2
220 |     ):
221 | 
222 |         # Format main input context
223 |         context = format_context(
224 |             prev_asst_uttr,
225 |             user_uttr,
226 |             self.displayed_memories,
227 |             use_multimodal_contexts=True,
228 |         )
229 | 
230 |         # Concat with previous contexts
231 |         lst_context.append(context)
232 |         context = " ".join(lst_context[-len_context:])
233 | 
234 |         # Format the main input
235 |         predict = TEMPLATE_PREDICT.format(
236 |             context=context,
237 |             START_OF_API_CALL=START_OF_API_CALL,
238 |         )
239 | 
240 |         print("============== Prompt Sequence ==============")
241 |         print(predict)
242 |         print("=============================================")
243 |         return predict
244 | 
245 |     def form_prompt_for_response(
246 |         self,
247 |         lst_context,
248 |         prev_asst_uttr,
249 |         user_uttr,
250 |         api_call,
251 |         api_response,
252 |         len_context=2,
253 |     ):
254 | 
255 |         # Format main input context
256 |         # Context should already have been formatted
257 |         context = " ".join(lst_context[-len_context:])
258 | 
259 |         # Format API call
260 |         json_api_call = api_call.to_dict(simple=True)
261 |         str_api_call = format_api_call(
262 |             json_api_call["call_type"], json_api_call["parameters"]
263 |         )
264 | 
265 |         # Format API result
266 |         json_api_response = api_response.to_dict()
267 |         str_api_result = format_api_result(json_api_response)
268 | 
269 |         # Format the main input
270 |         predict = TEMPLATE_PREDICT_RESPONSE.format(
271 |             context=context,
272 |             START_OF_API_CALL=START_OF_API_CALL,
273 |             belief_state=str_api_call,
274 |             END_OF_API_CALL=END_OF_API_CALL,
275 |             api_result=str_api_result,
276 |             END_OF_API_RESULT=END_OF_API_RESULT,
277 |         )
278 | 
279 |         print("============== Prompt Sequence ==============")
280 |         print(predict)
281 |         print("=============================================")
282 |         return predict
283 | 
284 |     def parse_assistant_response(self, generated):
285 |         print("============== Generated Sequence ==============")
286 |         print(generated)
287 |         print("================================================")
288 |         parsed = parse_flattened_result(generated)
289 | 
290 |         if parsed == []:
291 |             parsed_api_call = {}
292 | 
293 |         else:
294 |             # For now, we only consider one api_call per turn
295 |             parsed_api_call = parsed[-1]
296 | 
297 |         if parsed_api_call == {}:
298 |             response_text = "I could not understand. Could you repeat please?"
299 | 
300 |         if END_OF_API_RESULT in generated:
301 |             response_text = generated.split(END_OF_API_RESULT)[-1]
302 |             response_text = response_text.replace(END_OF_SENTENCE, "")
303 | 
304 |         else:
305 |             response_text = "(No system response)"
306 | 
307 |         return parsed_api_call, response_text
308 | 


--------------------------------------------------------------------------------
/dialog_simulator/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | import os
  6 | import copy
  7 | import json
  8 | import csv
  9 | import random
 10 | import pickle
 11 | from MemoryDialogSimulator import MemoryDialogSimulator
 12 | from UserSimulator import PilotUserSimulator
 13 | from AssistantSimulator import PilotAssistantSimulator
 14 | from GoalGenerator import RuleBasedGoalGenerator
 15 | from MemoryServiceAPI import MemoryServiceAPI
 16 | from utils import str_memory
 17 | 
 18 | 
 19 | if __name__ == "__main__":
 20 |     # Parameters for generation
 21 |     domain = "memory"
 22 |     random.seed(0)
 23 |     n_dialogs = 6000
 24 |     n_max_turns = 8  # 5, 8, 10
 25 |     goal_config = {
 26 |         "n_min_goals": 3,  # 4
 27 |         "n_max_goals": 6,  # 6
 28 |     }
 29 | 
 30 |     start_dialog_idx = 5500
 31 |     # path_memory_graph_list = '/Users/shanemoon/workspace/memory_dialog/dialog_simulator/memories/final/memory_may21_v1_100graphs.json'
 32 |     path_memory_graph_list = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/memories/final/mscoco_memory_graphs_1k.json"
 33 |     path_out_json = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials.json"
 34 |     path_out_csv = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials.tsv"
 35 |     path_out_pickle = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/results/batch_4_mem_dials.p"
 36 | 
 37 |     # Make sure we are not overwriting
 38 |     debug = False
 39 |     if not debug:
 40 |         assert not os.path.exists(path_out_json)
 41 |         assert not os.path.exists(path_out_csv)
 42 |         assert not os.path.exists(path_out_pickle)
 43 | 
 44 |     # Load parameters
 45 |     memory_graph_list = json.load(open(path_memory_graph_list, "r"))
 46 |     memory_graph_bank = {}
 47 | 
 48 |     for memory_graph in memory_graph_list:
 49 |         memory_graph_id = memory_graph["memory_graph_id"]
 50 | 
 51 |         for i in range(len(memory_graph["memories"])):
 52 |             memory_graph["memories"][i]["memory_graph_id"] = memory_graph_id
 53 | 
 54 |         memory_graph_bank[memory_graph_id] = memory_graph
 55 | 
 56 |     # Initialize the multimodal simulator
 57 |     sim = MemoryDialogSimulator(
 58 |         user_simulator=PilotUserSimulator(),
 59 |         assistant_simulator=PilotAssistantSimulator(),
 60 |         goal_generator=RuleBasedGoalGenerator(domain=domain),
 61 |         memory_service_api=MemoryServiceAPI(metadata={}),
 62 |         memory_graph_bank=memory_graph_bank,
 63 |         domain=domain,
 64 |     )
 65 | 
 66 |     # Generate dialogs
 67 |     memory_dialogs = sim.batch_generate_dialog_flows(
 68 |         n_dialogs=n_dialogs,
 69 |         n_max_turns=n_max_turns,
 70 |         start_dialog_idx=start_dialog_idx,
 71 |         goal_config=goal_config,
 72 |     )
 73 | 
 74 |     # Output dialogs
 75 |     # a. Pickle output
 76 |     pickle.dump(memory_dialogs, open(path_out_pickle, "wb"))
 77 | 
 78 |     # b. JSON output
 79 |     json.dump(
 80 |         {"dialogue_data": [m_d.to_dict() for m_d in memory_dialogs]},
 81 |         open(path_out_json, "w"),
 82 |         indent=4,
 83 |     )
 84 | 
 85 |     # c. print output
 86 |     for i, m_d in enumerate(memory_dialogs[:20]):
 87 |         d = m_d.dialog
 88 |         str_dialog = ""
 89 |         print(f"----- Dialog {d.idx} ----- ")
 90 |         for j in range(len(d.user_turns)):
 91 |             user_turn = d.user_turns[j]
 92 |             asst_turn = d.asst_turns[j]
 93 |             for user_frame in user_turn.frames:
 94 |                 str_dialog += "U: " + user_frame.uttr + "\n"
 95 |                 # str_dialog += 'U: ' + str(user_frame.nlu.act_attributes.slot_values.values()) + '\n'
 96 | 
 97 |             for asst_frame in asst_turn.frames:
 98 |                 str_dialog += "A: " + asst_frame.uttr + "\n"
 99 |                 # str_dialog += 'A: ' + str(asst_frame.nlu.act_attributes.slot_values.values()) + '\n'
100 | 
101 |         print(str_dialog)
102 | 
103 |     # d. TSV output for annotation
104 |     url_blank = "https://simmc2.s3-us-west-1.amazonaws.com/white.png"
105 | 
106 |     with open(path_out_csv, "w", newline="") as csvfile:
107 |         writer = csv.writer(csvfile, delimiter="\t", quotechar="'")
108 |         writer.writerow(
109 |             [
110 |                 "dialog_id",
111 |                 "dialog",
112 |                 "img_0_url",
113 |                 "img_1_url",
114 |                 "img_2_url",
115 |                 "img_3_url",
116 |                 "img_4_url",
117 |                 "img_5_url",
118 |                 "img_6_url",
119 |                 "img_7_url",
120 |                 "img_8_url",
121 |                 "img_9_url",
122 |                 "img_10_url",
123 |                 "img_11_url",
124 |                 "img_12_url",
125 |                 "img_13_url",
126 |                 "img_14_url",
127 |                 "img_15_url",
128 |                 "img_0_desc",
129 |                 "img_1_desc",
130 |                 "img_2_desc",
131 |                 "img_3_desc",
132 |                 "img_4_desc",
133 |                 "img_5_desc",
134 |                 "img_6_desc",
135 |                 "img_7_desc",
136 |                 "img_8_desc",
137 |                 "img_9_desc",
138 |                 "img_10_desc",
139 |                 "img_11_desc",
140 |                 "img_12_desc",
141 |                 "img_13_desc",
142 |                 "img_14_desc",
143 |                 "img_15_desc",
144 |                 "metadata",
145 |             ]
146 |         )
147 |         for _, m_d in enumerate(memory_dialogs):
148 |             mg = m_d.memory_graph
149 |             d = m_d.dialog
150 | 
151 |             dialog_data = []
152 |             image_id = 0
153 |             all_image_urls = [url_blank]
154 |             all_memories = [None]
155 | 
156 |             display_image_ids = [image_id]
157 | 
158 |             for i in range(len(d.user_turns)):
159 |                 # User turn
160 |                 user_turn = d.user_turns[i]
161 |                 user_utter = "USER: " + ". ".join(
162 |                     [frame.uttr for frame in user_turn.frames]
163 |                 )
164 | 
165 |                 user_turn_data = {
166 |                     "turn_id": i * 2,
167 |                     "speaker": "USER",
168 |                     "utterance": user_utter.replace("'", ""),
169 |                     "image_id": copy.deepcopy(display_image_ids),
170 |                     "validation": []
171 |                     #'validation': make_validation_tokens_for_turn(user_turn)
172 |                 }
173 | 
174 |                 # Assistant turn
175 |                 asst_turn = d.asst_turns[i]
176 |                 asst_utter = "ASSISTANT: " + ". ".join(
177 |                     [frame.uttr for frame in asst_turn.frames]
178 |                 )
179 | 
180 |                 memory_ids = asst_turn.frames[-1].act_attributes.to_dict()["memories"]
181 |                 if memory_ids != []:
182 |                     display_urls = []
183 |                     display_image_ids = []
184 | 
185 |                     for memory_id in memory_ids:
186 |                         display_urls.extend(mg.get_memory_url(memory_id))
187 |                         image_id += 1
188 |                         display_image_ids.append(image_id)
189 | 
190 |                     all_image_urls.extend(display_urls)
191 |                     all_memories.extend(mg.get_memories_by_ids(memory_ids))
192 | 
193 |                 asst_turn_data = {
194 |                     "turn_id": i * 2 + 1,
195 |                     "speaker": "ASSISTANT",
196 |                     "utterance": asst_utter.replace("'", ""),
197 |                     "image_id": copy.deepcopy(display_image_ids),
198 |                     "validation": []
199 |                     #'validation': make_validation_tokens_for_turn(asst_turn)
200 |                 }
201 | 
202 |                 dialog_data.append(user_turn_data)
203 |                 dialog_data.append(asst_turn_data)
204 | 
205 |             # This should be true, assuming each memory has one image.
206 |             assert len(all_image_urls) == len(all_memories)
207 | 
208 |             writer.writerow(
209 |                 [
210 |                     d.idx,
211 |                     str(json.dumps(dialog_data)),
212 |                     all_image_urls[0],  # url_0
213 |                     all_image_urls[1] if len(all_image_urls) > 1 else "",
214 |                     all_image_urls[2] if len(all_image_urls) > 2 else "",
215 |                     all_image_urls[3] if len(all_image_urls) > 3 else "",
216 |                     all_image_urls[4] if len(all_image_urls) > 4 else "",
217 |                     all_image_urls[5] if len(all_image_urls) > 5 else "",
218 |                     all_image_urls[6] if len(all_image_urls) > 6 else "",
219 |                     all_image_urls[7] if len(all_image_urls) > 7 else "",
220 |                     all_image_urls[8] if len(all_image_urls) > 8 else "",
221 |                     all_image_urls[9] if len(all_image_urls) > 9 else "",
222 |                     all_image_urls[10] if len(all_image_urls) > 10 else "",
223 |                     all_image_urls[11] if len(all_image_urls) > 11 else "",
224 |                     all_image_urls[12] if len(all_image_urls) > 12 else "",
225 |                     all_image_urls[13] if len(all_image_urls) > 13 else "",
226 |                     all_image_urls[14] if len(all_image_urls) > 14 else "",
227 |                     all_image_urls[15] if len(all_image_urls) > 15 else "",
228 |                     "",  # url_0
229 |                     str_memory(all_memories[1]) if len(all_image_urls) > 1 else "",
230 |                     str_memory(all_memories[2]) if len(all_image_urls) > 2 else "",
231 |                     str_memory(all_memories[3]) if len(all_image_urls) > 3 else "",
232 |                     str_memory(all_memories[4]) if len(all_image_urls) > 4 else "",
233 |                     str_memory(all_memories[5]) if len(all_image_urls) > 5 else "",
234 |                     str_memory(all_memories[6]) if len(all_image_urls) > 6 else "",
235 |                     str_memory(all_memories[7]) if len(all_image_urls) > 7 else "",
236 |                     str_memory(all_memories[8]) if len(all_image_urls) > 8 else "",
237 |                     str_memory(all_memories[9]) if len(all_image_urls) > 9 else "",
238 |                     str_memory(all_memories[10]) if len(all_image_urls) > 10 else "",
239 |                     str_memory(all_memories[11]) if len(all_image_urls) > 11 else "",
240 |                     str_memory(all_memories[12]) if len(all_image_urls) > 12 else "",
241 |                     str_memory(all_memories[13]) if len(all_image_urls) > 13 else "",
242 |                     str_memory(all_memories[14]) if len(all_image_urls) > 14 else "",
243 |                     str_memory(all_memories[15]) if len(all_image_urls) > 15 else "",
244 |                     {},  # mockup
245 |                 ]
246 |             )
247 |             # print(json.dumps(dialog_data))
248 | 
249 |     # (5) Summary
250 |     print("n_dialogs:", len(memory_dialogs))
251 |     print("n_turns:", sum([len(m_d.dialog.asst_turns) for m_d in memory_dialogs]))
252 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/utils/preprocess_memory_dataset.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  4 | 
  5 | Preprocess the memory dialog dataset.
  6 | 
  7 | Author(s): Satwik Kottur
  8 | """
  9 | 
 10 | from __future__ import absolute_import, division, print_function, unicode_literals
 11 | 
 12 | import argparse
 13 | import json
 14 | import os
 15 | 
 16 | 
 17 | MM_CONTEXT = "<MM>"
 18 | START_API_CALL = "<SOAC>"
 19 | END_API_CALL = "<EOAC>"
 20 | START_API_RESULT = "<SOAR>"
 21 | START_RESPONSE = "<SOR>"
 22 | END_SENTENCE = "<EOS>"
 23 | PAD_TOKEN = "<PAD>"
 24 | SYSTEM = "<SYSTEM>"
 25 | USER = "<USER>"
 26 | 
 27 | 
 28 | TEMPLATE_API_PREDICT = "{context} {START_API_CALL} "
 29 | TEMPLATE_API_TARGET = "{belief_state} {END_API_CALL}"
 30 | TEMPLATE_RESPONSE_PREDICT = (
 31 |     "{context} {START_API_CALL} {belief_state} {END_API_CALL} "
 32 |     "{START_API_RESULT} {api_result} {START_RESPONSE}"
 33 | )
 34 | TEMPLATE_RESPONSE_TARGET = "{response} {END_SENTENCE}"
 35 | 
 36 | 
 37 | def format_memory_dialog_json(json_path, context_length=2, train=False):
 38 |     """ """
 39 |     print(f"Reading: {json_path}")
 40 |     with open(json_path, "r") as file_id:
 41 |         data = json.load(file_id)
 42 | 
 43 |     if train:
 44 |         additional_special_tokens = set(
 45 |             [
 46 |                 SYSTEM,
 47 |                 USER,
 48 |                 START_API_CALL,
 49 |                 END_API_CALL,
 50 |                 START_RESPONSE,
 51 |                 START_API_RESULT,
 52 |                 MM_CONTEXT,
 53 |             ]
 54 |         )
 55 | 
 56 |     instances = []
 57 |     for dialog_datum in data["dialogue_data"]:
 58 |         prev_asst_uttr = None
 59 |         prev_turn = None
 60 |         context_history = []
 61 |         for turn in dialog_datum["dialogue"]:
 62 |             user_uttr = turn["transcript"].replace("\n", " ").strip()
 63 |             user_uttr_api_call_type = turn["api_call"]["call_type"]
 64 |             user_uttr_api_result = turn.get("api_result", {})
 65 |             user_uttr_parameters = turn["transcript_annotated"][-1]["act_attributes"]
 66 |             asst_uttr = turn["system_transcript"].replace("\n", " ").strip()
 67 | 
 68 |             # Format main input context
 69 |             if prev_asst_uttr:
 70 |                 memory_objects = prev_turn["system_transcript_annotated"][-1][
 71 |                     "act_attributes"
 72 |                 ]["memories"]
 73 |             else:
 74 |                 memory_objects = []
 75 | 
 76 |             context = format_context(
 77 |                 prev_asst_uttr,
 78 |                 user_uttr,
 79 |                 memory_objects,
 80 |             )
 81 | 
 82 |             prev_asst_uttr = asst_uttr
 83 |             prev_turn = turn
 84 | 
 85 |             # Concat with previous contexts
 86 |             context_history.append(context)
 87 |             context = " ".join(context_history[-context_length:])
 88 | 
 89 |             # Format belief state
 90 |             # Skip if the api_call is unknown
 91 |             if user_uttr_api_call_type == "None":
 92 |                 continue
 93 | 
 94 |             if (
 95 |                 user_uttr_api_result == {}
 96 |                 or user_uttr_api_result.get("status", "None") == "None"
 97 |             ):
 98 |                 continue
 99 | 
100 |             belief_state = []
101 |             # ***** Temp fix for null participant *****
102 |             if "participant" in user_uttr_parameters["slot_values"]:
103 |                 user_uttr_parameters["slot_values"]["participant"] = [
104 |                     p
105 |                     for p in user_uttr_parameters["slot_values"]["participant"]
106 |                     if p is not None
107 |                 ]
108 |             # ************************************************
109 | 
110 |             # Format for API Call.
111 |             str_belief_state = format_api_call(
112 |                 user_uttr_api_call_type, user_uttr_parameters
113 |             )
114 | 
115 |             # Track OOVs
116 |             if train:
117 |                 additional_special_tokens.add(user_uttr_api_call_type)
118 |                 for slot_name in user_uttr_parameters["slot_values"]:
119 |                     additional_special_tokens.add(str(slot_name))
120 | 
121 |             # Format for API Result
122 |             str_api_result = format_api_result(user_uttr_api_result)
123 | 
124 |             new_instance = {
125 |                 "dialog_id": dialog_datum["dialogue_idx"],
126 |                 "turn_id": turn["turn_idx"],
127 |             }
128 |             # Model two prediction problems.
129 |             # A: Context -> API call
130 |             api_predict = TEMPLATE_API_PREDICT.format(
131 |                 context=context,
132 |                 START_API_CALL=START_API_CALL,
133 |             )
134 |             api_target = TEMPLATE_API_TARGET.format(
135 |                 belief_state=str_belief_state,
136 |                 END_API_CALL=END_API_CALL,
137 |             )
138 |             instances.append(
139 |                 {
140 |                     "dialog_id": dialog_datum["dialogue_idx"],
141 |                     "turn_id": turn["turn_idx"],
142 |                     "predict": api_predict,
143 |                     "target": api_target,
144 |                     "type": "API",
145 |                 }
146 |             )
147 | 
148 |             # B: Context API call, API result --> Response
149 |             response_predict = TEMPLATE_RESPONSE_PREDICT.format(
150 |                 context=context,
151 |                 START_API_CALL=START_API_CALL,
152 |                 belief_state=str_belief_state,
153 |                 END_API_CALL=END_API_CALL,
154 |                 START_API_RESULT=START_API_RESULT,
155 |                 api_result=str_api_result,
156 |                 START_RESPONSE=START_RESPONSE,
157 |             )
158 |             response_target = TEMPLATE_RESPONSE_TARGET.format(
159 |                 response=asst_uttr, END_SENTENCE=END_SENTENCE
160 |             )
161 |             instances.append(
162 |                 {
163 |                     "dialog_id": dialog_datum["dialogue_idx"],
164 |                     "turn_id": turn["turn_idx"],
165 |                     "predict": response_predict,
166 |                     "target": response_target,
167 |                     "type": "RESPONSE",
168 |                 }
169 |             )
170 | 
171 |     if train:
172 |         special_tokens = {"eos_token": END_SENTENCE, "pad_token": PAD_TOKEN}
173 |         special_tokens["additional_special_tokens"] = list(additional_special_tokens)
174 |     else:
175 |         special_tokens = None
176 |     return instances, data["split"], special_tokens
177 | 
178 | 
179 | def format_context(prev_asst_uttr, user_uttr, memory_objects):
180 |     context = ""
181 |     if prev_asst_uttr:
182 |         context += f"{SYSTEM} {prev_asst_uttr} "
183 |         # Add multimodal contexts.
184 |         context += represent_memory_objects(memory_objects) + " "
185 | 
186 |     context += f"{USER} {user_uttr}"
187 |     return context
188 | 
189 | 
190 | def format_api_call(user_uttr_api_call_type, user_uttr_parameters):
191 |     str_belief_state_per_frame = (
192 |         "{act} [ {slot_values} ] ({request_slots}) < {objects} >".format(
193 |             act=user_uttr_api_call_type.strip(),
194 |             slot_values=", ".join(
195 |                 [
196 |                     f"{k.strip()} = {str(v).strip()}"
197 |                     for k, v in user_uttr_parameters["slot_values"].items()
198 |                 ]
199 |             ),
200 |             request_slots=", ".join(user_uttr_parameters["request_slots"]),
201 |             objects=", ".join([str(o) for o in user_uttr_parameters["memories"]]),
202 |         )
203 |     )
204 |     return str_belief_state_per_frame
205 | 
206 | 
207 | def format_api_result(user_uttr_api_result):
208 |     simple_retrieved_info = {}
209 | 
210 |     if user_uttr_api_result["results"]["retrieved_info"] != []:
211 | 
212 |         for memory_id, info in user_uttr_api_result["results"][
213 |             "retrieved_info"
214 |         ].items():
215 |             # memory_id: '[Memory ID: 1035119]'
216 |             simple_memory_id = memory_id.split("[Memory ID: ")[-1][:-1]
217 |             simple_retrieved_info[simple_memory_id] = {}
218 | 
219 |             for slot, value in info.items():
220 |                 if slot == "location":
221 |                     simple_retrieved_info[simple_memory_id][slot] = value["place"]
222 |                 else:
223 |                     simple_retrieved_info[simple_memory_id][slot] = value
224 | 
225 |     str_api_result = (
226 |         "{api_status} [ {retrieved_info} ] < {retrieved_memories} >".format(
227 |             api_status=user_uttr_api_result["status"],
228 |             retrieved_info=", ".join(
229 |                 [
230 |                     f"{k.strip()} = {str(v).strip()}"
231 |                     for k, v in simple_retrieved_info.items()
232 |                 ]
233 |             ).replace("'", ""),
234 |             retrieved_memories=", ".join(
235 |                 [str(o) for o in user_uttr_api_result["results"]["retrieved_memories"]]
236 |             ),
237 |         )
238 |     )
239 |     return str_api_result
240 | 
241 | 
242 | def represent_memory_objects(object_ids):
243 |     # Stringify visual objects (JSON)
244 |     str_objects = ", ".join([f"{oo}<MM_BREAK>" for oo in object_ids])
245 |     return f"{MM_CONTEXT} {str_objects}"
246 | 
247 | 
248 | def main(args):
249 |     instances, split, special_tokens = format_memory_dialog_json(
250 |         args["train_json_path"], train=True
251 |     )
252 |     save_file_path = os.path.join(args["save_folder"], "mem_dials_gpt2_train.json")
253 |     with open(save_file_path, "w") as file_id:
254 |         json.dump(instances, file_id)
255 | 
256 |     save_file_path = os.path.join(
257 |         args["save_folder"], "mem_dials_gpt2_special_tokens.json"
258 |     )
259 |     with open(save_file_path, "w") as file_id:
260 |         json.dump(special_tokens, file_id)
261 | 
262 |     for file_path in args["unseen_json_path"]:
263 |         instances, split, _ = format_memory_dialog_json(file_path)
264 |         save_file_path = os.path.join(
265 |             args["save_folder"], f"mem_dials_gpt2_{split}.json"
266 |         )
267 |         with open(save_file_path, "w") as file_id:
268 |             json.dump(instances, file_id)
269 | 
270 | 
271 | if __name__ == "__main__":
272 |     parser = argparse.ArgumentParser(description=__doc__)
273 |     parser.add_argument(
274 |         "--train_json_path",
275 |         required=True,
276 |         help="Path to the train dataset",
277 |     )
278 |     parser.add_argument(
279 |         "--unseen_json_path",
280 |         default=[],
281 |         required=False,
282 |         nargs="+",
283 |         help="Path to other unseen datsets (val|devtest|test)",
284 |     )
285 |     parser.add_argument(
286 |         "--predict_belief_state",
287 |         action="store_true",
288 |         help="Include belief state in the prediction",
289 |     )
290 |     parser.add_argument(
291 |         "--save_folder", required=True, help="Path to save the processed files"
292 |     )
293 | 
294 |     try:
295 |         parsed_args = vars(parser.parse_args())
296 |     except (IOError) as msg:
297 |         parser.error(str(msg))
298 |     main(parsed_args)
299 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/utils/extract_memory_features.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  4 | 
  5 | Extract BUTD features for memories.
  6 | 
  7 | Author(s): Satwik Kottur
  8 | """
  9 | 
 10 | from __future__ import absolute_import, division, print_function, unicode_literals
 11 | 
 12 | import argparse
 13 | 
 14 | import base64
 15 | import json
 16 | import os
 17 | import pickle
 18 | 
 19 | import lmdb
 20 | 
 21 | import numpy as np
 22 | from PIL import Image
 23 | import torch
 24 | import tqdm
 25 | 
 26 | 
 27 | FEATURE_REGISTRY = {}
 28 | def register(cls):
 29 |     FEATURE_REGISTRY[cls.label] = cls
 30 |     return cls
 31 | 
 32 | 
 33 | # Extracts top-down bottom-up image features.
 34 | @register
 35 | class ImageFeatureReader(object):
 36 |     label = "butd"
 37 |     def __init__(self, feature_path, max_bboxes=-1):
 38 |         """Reads BUTD image features.
 39 | 
 40 |         Args:
 41 |             feature_path: Path to read the image features.
 42 |             max_bboxes: Maximum number of bounding boxes.
 43 |         """
 44 |         self.reader = lmdb.open(
 45 |             feature_path,
 46 |             max_readers=1,
 47 |             readonly=True,
 48 |             lock=False,
 49 |             readahead=False,
 50 |             meminit=False,
 51 |         )
 52 |         with self.reader.begin(write=False) as file_ptr:
 53 |             self.image_id_list = pickle.loads(file_ptr.get(b"keys"))
 54 |         self.num_bboxes = max_bboxes
 55 | 
 56 |     def __getitem__(self, image_id):
 57 |         image_id = str(image_id).encode()
 58 |         assert image_id in self.image_id_list, "Missing image_id!"
 59 | 
 60 |         with self.reader.begin(write=False) as file_ptr:
 61 |             item = pickle.loads(file_ptr.get(image_id))
 62 |             num_boxes = int(item["num_boxes"])
 63 |             features = np.frombuffer(
 64 |                 base64.b64decode(item["features"]), dtype=np.float32
 65 |             ).reshape(num_boxes, 2048)
 66 |             boxes = np.frombuffer(
 67 |                 base64.b64decode(item["boxes"]), dtype=np.float32
 68 |             ).reshape(num_boxes, 4)
 69 |             class_probs = np.frombuffer(
 70 |                 base64.b64decode(item["cls_prob"]), dtype=np.float32
 71 |             ).reshape(num_boxes, 1601)
 72 |             features_dict = {
 73 |                 "features": features,
 74 |                 "bboxes": boxes,
 75 |                 "class_probs": class_probs,
 76 |                 "num_boxes": num_boxes,
 77 |                 "image_w": int(item["image_w"]),
 78 |                 "image_h": int(item["image_h"]),
 79 |             }
 80 |             if self.num_bboxes > 0:
 81 |                 features_dict = self.trim_butd_features(features_dict)
 82 |             return features_dict
 83 | 
 84 |     def trim_butd_features(self, features_dict):
 85 |         """Trim BUTD features based on class probability.
 86 | 
 87 |         Args:
 88 |             feature_dict: BUTD features for images
 89 |         """
 90 |         # Get top class in each bbox and pick ones with highest class probability.
 91 |         top_class_prob = np.max(features_dict["class_probs"], axis=1)
 92 |         top_bboxes = np.argsort(-top_class_prob)[: self.num_bboxes]
 93 |         # Modify the elements.
 94 |         features_dict["bboxes"] = features_dict["bboxes"][top_bboxes]
 95 |         features_dict["features"] = features_dict["features"][top_bboxes]
 96 |         features_dict["num_boxes"] = self.num_bboxes
 97 |         del features_dict["class_probs"]
 98 |         return self.augment_butd_features(features_dict)
 99 | 
100 |     def augment_butd_features(self, features_dict):
101 |         """Augment BUTD feature with spatial location relative to height x width."""
102 |         # Aliases.
103 |         image_w = features_dict["image_w"]
104 |         image_h = features_dict["image_h"]
105 |         location = np.zeros((features_dict["num_boxes"], 5), dtype=np.float32)
106 |         location[:, :4] = features_dict["bboxes"]
107 |         location[:, 4] = (
108 |             (location[:, 3] - location[:, 1])
109 |             * (location[:, 2] - location[:, 0])
110 |             / (float(image_w) * float(image_h))
111 |         )
112 |         location[:, 0] = location[:, 0] / float(image_w)
113 |         location[:, 1] = location[:, 1] / float(image_h)
114 |         location[:, 2] = location[:, 2] / float(image_w)
115 |         location[:, 3] = location[:, 3] / float(image_h)
116 |         features = np.concatenate([features_dict["features"], location], axis=-1)
117 |         features_dict["features"] = features
118 |         return features_dict
119 | 
120 | 
121 | # Extracts clip features.
122 | @register
123 | class CLIPFeatureExtractor(object):
124 |     """Extracts visual features using CLIP architecture."""
125 |     label = "clip"
126 |     def __init__(self, image_folder):
127 |         """Initializes the feature extractor.
128 | 
129 |         Args:
130 |             image_folder: Path to the raw COCO images.
131 |         """
132 |         self._device = "cuda" if torch.cuda.is_available() else "cpu"
133 |         self._model, self._preprocess = clip.load("ViT-B/32", device=self._device)
134 |         self._image_folder = image_folder
135 | 
136 |     def __getitem__(self, image_id):
137 |         """Extracts image features for a given image_id.
138 | 
139 |         Args:
140 |             image_id: Corresponding MSCOCO image_id
141 |         """
142 |         image_path = os.path.join(
143 |             self._image_folder, f"COCO_train2014_{image_id:012d}.jpg"
144 |         )
145 |         image = (
146 |             self._preprocess(PIL.Image.open(image_path)).unsqueeze(0).to(self._device)
147 |         )
148 |         with torch.no_grad():
149 |             image_feature = self._model.encode_image(image)
150 |         return {
151 |             "features": image_feature.cpu().numpy(),
152 |         }
153 | 
154 | 
155 | # Extracts clip features.
156 | @register
157 | class SWINFeatureExtractor(object):
158 |     """Extracts visual features using SWIN architecture."""
159 |     label = "swin"
160 |     def __init__(self, image_folder):
161 |         """Initializes the feature extractor.
162 | 
163 |         Args:
164 |             image_folder: Path to the raw COCO images.
165 |         """
166 |         self._use_gpu = torch.cuda.is_available()
167 |         self._model = timm.create_model(
168 |             "swin_base_patch4_window7_224",
169 |             pretrained=True,
170 |             num_classes=0,
171 |         )
172 |         self._image_folder = image_folder
173 | 
174 |     def _prepare_image(self, image_path):
175 |         """Given image path, load and prepare the image.
176 | 
177 |         Args:
178 |             image_path: Path to the image to load
179 | 
180 |         Returns:
181 |             image: Loaded image adjusted to the size
182 |         """
183 |         image = Image.open(image_path)
184 |         image = np.array(image.resize((224, 224)), dtype=np.float32)
185 |         if image.ndim != 3:
186 |             image = np.stack([image, image, image], axis=2)
187 |         image = torch.as_tensor(image).transpose(2, 0)[None]
188 |         return image
189 | 
190 |     def __getitem__(self, image_id):
191 |         """Extracts image features for a given image_id.
192 | 
193 |         Args:
194 |             image_id: Corresponding MSCOCO image_id
195 |         """
196 |         image_path = os.path.join(
197 |             self._image_folder, f"COCO_train2014_{image_id:012d}.jpg"
198 |         )
199 |         image = self._prepare_image(image_path)
200 |         with torch.no_grad():
201 |             image_feature = self._model(image)
202 |         return {
203 |             "features": image_feature.cpu().numpy(),
204 |         }
205 | 
206 | 
207 | def main(args):
208 |     memory_graphs = {}
209 |     for file_path in args["input_memory_json"]:
210 |         # print(f"Reading: {file_path}")
211 |         with open(file_path, "r") as file_id:
212 |             graph_data = json.load(file_id)
213 |             for datum in graph_data:
214 |                 if datum["memory_graph_id"] in memory_graphs:
215 |                     print("Multiple memory graph ids exist!")
216 |                 else:
217 |                     memory_graphs[datum["memory_graph_id"]] = datum
218 |     print(f"# memory dialogs: {len(memory_graphs)}")
219 | 
220 |     memory_dialogs = {}
221 |     for file_path in args["input_dialog_json"]:
222 |         # print(f"Reading: {file_path}")
223 |         with open(file_path, "r") as file_id:
224 |             dialog_data = json.load(file_id)
225 |         for datum in dialog_data["dialogue_data"]:
226 |             dialog_id = datum["dialogue_idx"]
227 |             memory_dialogs[dialog_id] = datum
228 |     print(f"# dialogs: {len(memory_dialogs)}")
229 | 
230 |     # Load image features and trim if necessary.
231 |     if args["feature_type"] == "butd":
232 |         feature_extractor = ImageFeatureReader(
233 |             args["input_feature_path"], args["max_bboxes"]
234 |         )
235 |     elif args["feature_type"] == "clip":
236 |         feature_extractor = CLIPFeatureExtractor(args["input_image_folder"])
237 |     elif args["feature_type"] == "swin":
238 |         feature_extractor = SWINFeatureExtractor(args["input_image_folder"])
239 |     else:
240 |         raise NotImplementedError(f"""Invalid type: {args["feature_type"]}!""")
241 | 
242 |     progress_bar = tqdm.tqdm(memory_dialogs.items(), desc="Getting relevant images")
243 |     relevant_image_ids = set()
244 |     for dialog_id, datum in progress_bar:
245 |         assert datum["memory_graph_id"] in memory_graphs, "Memory graph missing!"
246 |         graph = memory_graphs[datum["memory_graph_id"]]
247 |         sample_memories = {}
248 |         for ii in graph["memories"]:
249 |             if ii["memory_id"] in datum["mentioned_memory_ids"]:
250 |                 sample_memories[ii["memory_id"]] = ii
251 |         for mem_id, mem_datum in sample_memories.items():
252 |             relevant_image_ids.add((mem_id, mem_datum["media"][0]["media_id"]))
253 | 
254 |     progress_bar = tqdm.tqdm(relevant_image_ids, desc="Extracting features")
255 |     for memory_id, image_id in progress_bar:
256 |         feature_save_path = os.path.join(
257 |             args["feature_save_path"],
258 |             f"""mscoco_{args["feature_type"]}_{memory_id}.npy""",
259 |         )
260 |         np.save(feature_save_path, feature_extractor[image_id])
261 | 
262 | 
263 | if __name__ == "__main__":
264 |     parser = argparse.ArgumentParser(description=__doc__)
265 |     parser.add_argument(
266 |         "--input_dialog_json", nargs="+", required=True, help="Input memories JSON"
267 |     )
268 |     parser.add_argument(
269 |         "--input_memory_json", nargs="+", required=True, help="Input memories metadata"
270 |     )
271 |     parser.add_argument(
272 |         "--feature_save_path", required=True, help="Folder to save memory features"
273 |     )
274 |     parser.add_argument(
275 |         "--input_feature_path",
276 |         default=None,
277 |         help="Path to image features",
278 |     )
279 |     parser.add_argument(
280 |         "--input_image_folder",
281 |         default=None,
282 |         help="Path to raw input images",
283 |     )
284 |     parser.add_argument(
285 |         "--feature_type",
286 |         choices=["butd", "clip", "swin"],
287 |         required=True,
288 |         help="Type of visual features to extract",
289 |     )
290 |     parser.add_argument(
291 |         "--max_bboxes", default=-1, type=int, help="Maximum bounding boxes to retain"
292 |     )
293 | 
294 |     try:
295 |         parsed_args = vars(parser.parse_args())
296 |     except (IOError) as msg:
297 |         parser.error(str(msg))
298 |     # Conditional imports.
299 |     if parsed_args["feature_type"] == "clip":
300 |         import clip
301 |     if parsed_args["feature_type"] == "swin":
302 |         import timm
303 | 
304 |     main(parsed_args)
305 | 


--------------------------------------------------------------------------------
/models/gpt2_mm/dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  2 | 
  3 | 
  4 | # coding: utf-8
  5 | # author: noctli
  6 | import json
  7 | import pickle
  8 | from itertools import chain
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | import torch.utils.data
 13 | from torch.utils.data import Dataset
 14 | 
 15 | 
 16 | # from train import SPECIAL_TOKENS, MODEL_INPUTS, PADDED_INPUTS
 17 | SPECIAL_TOKENS = [
 18 |     "<bos>",
 19 |     "<eos>",
 20 |     "<speaker1>",
 21 |     "<speaker2>",
 22 |     "<cap>",
 23 |     "<video>",
 24 |     "<pad>",
 25 | ]
 26 | SPECIAL_TOKENS_DICT = {
 27 |     "bos_token": "<bos>",
 28 |     "eos_token": "<eos>",
 29 |     "additional_special_tokens": ["<speaker1>", "<speaker2>", "<video>", "<cap>"],
 30 |     "pad_token": "<pad>",
 31 | }
 32 | MODEL_INPUTS = ["input_ids", "token_type_ids", "lm_labels"]
 33 | PADDED_INPUTS = ["input_ids", "token_type_ids", "lm_labels"]
 34 | 
 35 | 
 36 | def tokenize(obj, tokenizer):
 37 |     if isinstance(obj, str):
 38 |         return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj))
 39 |     if isinstance(obj, dict):
 40 |         return dict((n, tokenize(o)) for n, o in obj.items())
 41 |     return list(tokenize(o) for o in obj)
 42 | 
 43 | 
 44 | def get_dataset(
 45 |     tokenizer, data_file, feature_path=None, undisclosed_only=False, n_history=3
 46 | ):
 47 |     dialog_data = json.load(open(data_file, "r"))
 48 |     dialog_list = []
 49 |     vid_set = set()
 50 |     for dialog in dialog_data["dialogs"]:
 51 |         caption = [tokenize(dialog["caption"], tokenizer)] + [
 52 |             tokenize(dialog["summary"], tokenizer)
 53 |         ]
 54 |         questions = [tokenize(d["question"], tokenizer) for d in dialog["dialog"]]
 55 |         answers = [tokenize(d["answer"], tokenizer) for d in dialog["dialog"]]
 56 |         vid = dialog["image_id"]
 57 |         vid_set.add(vid)
 58 |         if undisclosed_only:
 59 |             it = range(len(questions) - 1, len(questions))
 60 |         else:
 61 |             it = range(len(questions))
 62 |         qalist = []
 63 |         history = []
 64 |         if undisclosed_only:
 65 |             for n in range(len(questions) - 1):
 66 |                 qalist.append(questions[n])
 67 |                 qalist.append(answers[n])
 68 |             history = qalist[max(-len(qalist), -n_history * 2) :]
 69 |         for n in it:
 70 |             if undisclosed_only:
 71 |                 assert dialog["dialog"][n]["answer"] == "__UNDISCLOSED__"
 72 |             question = questions[n]
 73 |             answer = answers[n]
 74 |             history.append(question)
 75 |             if n_history == 0:
 76 |                 item = {
 77 |                     "vid": vid,
 78 |                     "history": [question],
 79 |                     "answer": answer,
 80 |                     "caption": caption,
 81 |                 }
 82 |             else:
 83 |                 item = {
 84 |                     "vid": vid,
 85 |                     "history": history,
 86 |                     "answer": answer,
 87 |                     "caption": caption,
 88 |                 }
 89 |             dialog_list.append(item)
 90 |             qalist.append(question)
 91 |             qalist.append(answer)
 92 |             history = qalist[max(-len(qalist), -n_history * 2) :]
 93 | 
 94 |     all_features = {}
 95 |     if feature_path is not None:
 96 |         fea_types = ["vggish", "i3d_flow", "i3d_rgb"]
 97 |         dataname = "<FeaType>/<ImageID>.npy"
 98 |         for ftype in fea_types:
 99 |             if undisclosed_only:
100 |                 basename = dataname.replace("<FeaType>", ftype + "_testset")
101 |             else:
102 |                 basename = dataname.replace("<FeaType>", ftype)
103 |             features = {}
104 |             for vid in vid_set:
105 |                 filename = basename.replace("<ImageID>", vid)
106 |                 filepath = feature_path + filename
107 |                 features[vid] = (filepath, filepath)
108 |             all_features[ftype] = features
109 |         return dialog_list, all_features
110 | 
111 |     return dialog_list
112 | 
113 | 
114 | class AVSDDataSet(Dataset):
115 |     def __init__(self, dialogs, tokenizer, features=None, drop_rate=0.5, train=True):
116 |         self.dialogs = dialogs
117 |         self.features = features
118 |         self.tokenizer = tokenizer
119 |         self.drop_rate = drop_rate
120 |         self.train = train
121 | 
122 |     def __len__(self):
123 |         return len(self.dialogs)
124 | 
125 |     def __getitem__(self, index):
126 |         dialog = self.dialogs[index]
127 |         vid = dialog["vid"]
128 |         his = self.dialogs[index]["history"]
129 |         cap = self.dialogs[index]["caption"]
130 |         ans = self.dialogs[index]["answer"]
131 | 
132 |         if np.random.rand() < self.drop_rate:
133 |             instance, _ = build_input_from_segments(
134 |                 cap,
135 |                 his,
136 |                 ans,
137 |                 self.tokenizer,
138 |                 video=False,
139 |                 drop_caption=True,
140 |                 train=self.train,
141 |             )
142 |         else:
143 |             instance, _ = build_input_from_segments(
144 |                 cap,
145 |                 his,
146 |                 ans,
147 |                 self.tokenizer,
148 |                 video=False,
149 |                 drop_caption=False,
150 |                 train=self.train,
151 |             )
152 |         input_ids = torch.Tensor(instance["input_ids"]).long()
153 |         token_type_ids = torch.Tensor(instance["token_type_ids"]).long()
154 |         lm_labels = torch.Tensor(instance["lm_labels"]).long()
155 | 
156 |         if self.features is not None:
157 |             try:
158 |                 vgg = np.load(self.features[0]["vggish"][vid][0])
159 |                 i3d_flow = np.load(self.features[0]["i3d_flow"][vid][0])
160 |                 i3d_rgb = np.load(self.features[0]["i3d_rgb"][vid][0])
161 |             except KeyError:
162 |                 vgg = np.load(self.features[1]["vggish"][vid][0])
163 |                 i3d_flow = np.load(self.features[1]["i3d_flow"][vid][0])
164 |                 i3d_rgb = np.load(self.features[1]["i3d_rgb"][vid][0])
165 | 
166 |             sample_i3d_flow = i3d_flow[range(1, i3d_flow.shape[0], 1)]
167 |             sample_i3d_rgb = i3d_rgb[range(1, i3d_rgb.shape[0], 1)]
168 | 
169 |             vgg = torch.from_numpy(vgg).float()
170 |             i3d_flow = torch.from_numpy(sample_i3d_flow).float()
171 |             i3d_rgb = torch.from_numpy(sample_i3d_rgb).float()
172 |             min_length = min([i3d_flow.size(0), i3d_rgb.size(0), vgg.size(0)])
173 |             i3d = torch.cat(
174 |                 [i3d_flow[:min_length], i3d_rgb[:min_length], vgg[:min_length]], dim=1
175 |             )
176 | 
177 |             return input_ids, token_type_ids, lm_labels, i3d
178 |         else:
179 |             return input_ids, token_type_ids, lm_labels
180 | 
181 | 
182 | def collate_fn(batch, pad_token, features=None):
183 |     def padding(seq, pad_token):
184 |         max_len = max([i.size(0) for i in seq])
185 |         if len(seq[0].size()) == 1:
186 |             result = torch.ones((len(seq), max_len)).long() * pad_token
187 |         else:
188 |             result = torch.ones((len(seq), max_len, seq[0].size(-1))).float()
189 |         for i in range(len(seq)):
190 |             result[i, : seq[i].size(0)] = seq[i]
191 |         return result
192 | 
193 |     input_ids_list, token_type_ids_list, lm_labels_list, i3d_list = [], [], [], []
194 |     for i in batch:
195 |         input_ids_list.append(i[0])
196 |         token_type_ids_list.append(i[1])
197 |         lm_labels_list.append(i[2])
198 |         if features is not None:
199 |             i3d_list.append(i[3])
200 | 
201 |     input_ids = padding(input_ids_list, pad_token)
202 |     token_type_ids = padding(token_type_ids_list, pad_token)
203 |     lm_labels = padding(lm_labels_list, -1)
204 |     input_mask = input_ids != pad_token
205 |     if features is not None:
206 |         i3d = padding(i3d_list, pad_token)
207 |         i3d_mask = torch.sum(i3d != 1, dim=2) != 0
208 |         input_mask = torch.cat([i3d_mask, input_mask], dim=1)
209 |         i3d_labels = torch.ones((i3d.size(0), i3d.size(1))).long() * -1
210 |         video_mask = torch.cat(
211 |             [torch.zeros((i3d.size(0), i3d.size(1))), torch.ones(lm_labels.size())], 1
212 |         )
213 |         reply_mask = torch.zeros(video_mask.size())
214 |         lm_labels = torch.cat([i3d_labels, lm_labels], dim=1)
215 |         return (
216 |             input_ids,
217 |             token_type_ids,
218 |             lm_labels,
219 |             input_mask,
220 |             i3d,
221 |             video_mask,
222 |             reply_mask,
223 |         )
224 |     else:
225 |         return input_ids, token_type_ids, lm_labels, input_mask
226 | 
227 | 
228 | def pad_dataset(dataset, padding=0):
229 |     """Pad the dataset. This could be optimized by defining a Dataset class and padd only batches but this is simpler."""
230 |     max_l = max(len(x) for x in dataset["input_ids"])
231 |     for name in PADDED_INPUTS:
232 |         dataset[name] = [
233 |             x + [padding if name != "labels" else -1] * (max_l - len(x))
234 |             for x in dataset[name]
235 |         ]
236 |     return dataset
237 | 
238 | 
239 | def build_input_from_segments(
240 |     caption,
241 |     history,
242 |     reply,
243 |     tokenizer,
244 |     with_eos=True,
245 |     video=False,
246 |     drop_caption=False,
247 |     train=True,
248 | ):
249 |     """Build a sequence of input from 3 segments: caption(caption+summary) history and last reply"""
250 |     bos, eos, speaker1, speaker2, cap = tokenizer.convert_tokens_to_ids(
251 |         SPECIAL_TOKENS[:-2]
252 |     )
253 |     if not drop_caption:
254 |         instance = {}
255 |         sequence = (
256 |             [[bos] + list(chain(*caption))]
257 |             + history
258 |             + [reply + ([eos] if with_eos else [])]
259 |         )
260 |         sequence = [[cap] + sequence[0] + [eos]] + [
261 |             [speaker2 if (len(sequence) - i) % 2 else speaker1] + s
262 |             for i, s in enumerate(sequence[1:])
263 |         ]
264 | 
265 |         instance["input_ids"] = list(chain(*sequence))
266 |         instance["token_type_ids"] = [cap] * len(sequence[0]) + [
267 |             speaker2 if i % 2 else speaker1
268 |             for i, s in enumerate(sequence[1:])
269 |             for _ in s
270 |         ]
271 |         if video and train:
272 |             # instance["lm_labels"] = sequence[0] + ([-1]*sum(len(s) for s in sequence[1:-1])) + sequence[-1]
273 |             instance["lm_labels"] = (
274 |                 sequence[0]
275 |                 + ([-1] * sum(len(s) for s in sequence[1:-1]))
276 |                 + sequence[-1]
277 |             )
278 |         else:
279 |             instance["lm_labels"] = (
280 |                 [-1] * sum(len(s) for s in sequence[:-1])
281 |             ) + sequence[-1]
282 |     else:
283 |         instance = {}
284 |         sequence = history + [reply + ([eos] if with_eos else [])]
285 |         sequence = [
286 |             [speaker2 if (len(sequence) - i) % 2 else speaker1] + s
287 |             for i, s in enumerate(sequence)
288 |         ]
289 | 
290 |         instance["input_ids"] = list(chain(*sequence))
291 |         instance["token_type_ids"] = [
292 |             speaker2 if i % 2 else speaker1 for i, s in enumerate(sequence) for _ in s
293 |         ]
294 |         if video:
295 |             instance["lm_labels"] = (
296 |                 [-1] * sum(len(s) for s in sequence[:-1])
297 |             ) + sequence[-1]
298 |         else:
299 |             instance["lm_labels"] = (
300 |                 [-1] * sum(len(s) for s in sequence[:-1])
301 |             ) + sequence[-1]
302 | 
303 |     return instance, sequence
304 | 


--------------------------------------------------------------------------------
/dialog_simulator/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | from constants import visual_slots, all_slots
  6 | import random
  7 | 
  8 | random.seed(0)
  9 | 
 10 | 
 11 | def build_parameter_ontology(memory_graph, metadata, domain=None, ontology=None):
 12 | 
 13 |     if ontology is None:
 14 |         ontology = {
 15 |             "visual": {},
 16 |             "non_visual": {},
 17 |             "all": {},
 18 |         }
 19 | 
 20 |     for memory in memory_graph.get_memories():
 21 | 
 22 |         for slot, value in memory.data.items():
 23 | 
 24 |             if slot not in all_slots:
 25 |                 continue
 26 | 
 27 |             slot_category = "visual" if slot in visual_slots else "non_visual"
 28 | 
 29 |             if slot not in ontology["all"]:
 30 |                 ontology["all"][slot] = []
 31 |                 ontology[slot_category][slot] = []
 32 | 
 33 |             if value not in ontology["all"][slot]:
 34 |                 ontology["all"][slot].append(value)
 35 |                 ontology[slot_category][slot].append(value)
 36 | 
 37 |     return ontology
 38 | 
 39 | 
 40 | def batch_build_parameter_ontology(memory_graph_bank):
 41 |     ontology = {
 42 |         "visual": {},
 43 |         "non_visual": {},
 44 |         "all": {},
 45 |     }
 46 | 
 47 |     for i, memory_graph in enumerate(memory_graph_bank.values()):
 48 |         if i % 100 == 0:
 49 |             print("Processing memory graph %d" % i)
 50 | 
 51 |         ontology = build_parameter_ontology(
 52 |             memory_graph=memory_graph, metadata={}, ontology=ontology
 53 |         )
 54 |     return ontology
 55 | 
 56 | 
 57 | def str_memory(memory, memory_service_api=None, verbose=True):
 58 |     """
 59 |     memory: <Memory> object
 60 |     """
 61 |     memory_index = str(memory.data["memory_id"])
 62 |     memory_activity = str(
 63 |         ", ".join([a["activity_name"] for a in memory.data["activity"]])
 64 |     )
 65 |     time = str(memory.data["time"])[:-3] + " (" + memory.data["time_part"] + ")"
 66 |     location = memory.data["location"]["geo_tag"].get("place", "")
 67 | 
 68 |     if verbose:
 69 |         template = (
 70 |             "[Memory ID: {memory_index} ({memory_activity}), {time}, @ {location}]"
 71 |         )
 72 |     else:
 73 |         template = "[Memory ID: {memory_index}]"
 74 | 
 75 |     return template.format(
 76 |         memory_index=memory_index,
 77 |         memory_activity=memory_activity,
 78 |         time=time,
 79 |         location=location,
 80 |     )
 81 | 
 82 | 
 83 | def str_slot_values(slot_values):
 84 |     return "{ " + ", ".join([f"{k}: {v}" for k, v in slot_values.items()]) + " }"
 85 | 
 86 | 
 87 | def str_request_slots(request_slots):
 88 |     return "{ " + ", ".join([s for s in request_slots]) + " }"
 89 | 
 90 | 
 91 | def str_memories(memories, memory_service_api=None, verbose=True):
 92 |     # memories: <list> of <Memory> objects
 93 |     return (
 94 |         "{ "
 95 |         + str([str_memory(o, memory_service_api, verbose) for o in memories])
 96 |         + " }"
 97 |     )
 98 | 
 99 | 
100 | def int_memory_ids(memories):
101 |     return [int(m.data["memory_id"]) for m in memories]
102 | 
103 | 
104 | def get_template(template_map, nlu_label):
105 |     return random.choice(template_map.get(nlu_label.dialog_act))
106 | 
107 | 
108 | def load_data_pickle(path_pickle):
109 |     import pickle
110 | 
111 |     return pickle.load(open(path_pickle, "rb"))
112 | 
113 | 
114 | def weighted_choice(population, weights):
115 |     return random.choices(population=population, weights=weights, k=1)[0]
116 | 
117 | 
118 | def get_slot_values_simple_from_json(
119 |     slot_values,
120 |     location_target="place",
121 |     participant_target="name",
122 |     activity_target="activity_name",
123 | ):
124 |     if slot_values == None:
125 |         return {}
126 | 
127 |     out = {}
128 | 
129 |     for slot, value in slot_values.items():
130 |         if slot == "location":
131 |             out[slot] = get_location_simple_from_json(value, target=location_target)
132 | 
133 |         elif slot == "participant":
134 |             out[slot] = get_participant_simple_from_json(
135 |                 value, target=participant_target
136 |             )
137 | 
138 |         elif slot == "activity":
139 |             out[slot] = get_activity_simple_from_json(value, target=activity_target)
140 | 
141 |         else:
142 |             out[slot] = str(value)
143 | 
144 |     return out
145 | 
146 | 
147 | def get_location_simple_from_json(location_json, target="place"):
148 |     """
149 |     JSON format:
150 |      "location":{
151 |         "gps":{
152 |            "lat":40.00,
153 |            "lon":100.00
154 |         },
155 |         "geo_tag":{
156 |            "place":"Summit at Snoqualmie",
157 |            "city":"Seattle",
158 |            "state":"Washington",
159 |            "country":"USA"
160 |         }
161 |     """
162 |     if target in location_json["geo_tag"]:
163 |         return location_json["geo_tag"][target]
164 | 
165 |     return location_json["geo_tag"].get("city")
166 | 
167 | 
168 | def get_participant_simple_from_json(participant_json, target="name"):
169 |     """
170 |     JSON format:
171 |      "participant":[
172 |         {
173 |            "name":"John",
174 |            "memory_graph_id":1
175 |         },
176 |         {
177 |            "name":"Mary",
178 |            "memory_graph_id":2
179 |         }
180 |      ],
181 |     """
182 |     return [p[target] for p in participant_json]
183 | 
184 | 
185 | def get_activity_simple_from_json(activity_json, target="activity_name"):
186 |     """
187 |     JSON format:
188 |      "activity":[
189 |         {
190 |            "activity_name":"skiing"
191 |         }
192 |      ]
193 |     """
194 |     return [a[target] for a in activity_json]
195 | 
196 | 
197 | def get_edit_distance(s1, s2):
198 |     if len(s1) > len(s2):
199 |         s1, s2 = s2, s1
200 | 
201 |     distances = range(len(s1) + 1)
202 |     for i2, c2 in enumerate(s2):
203 |         distances_ = [i2 + 1]
204 |         for i1, c1 in enumerate(s1):
205 |             if c1 == c2:
206 |                 distances_.append(distances[i1])
207 |             else:
208 |                 distances_.append(
209 |                     1 + min((distances[i1], distances[i1 + 1], distances_[-1]))
210 |                 )
211 |         distances = distances_
212 |     return distances[-1]
213 | 
214 | 
215 | def resolve_sv_entities(slot_values: dict, parameter_ontology: dict) -> dict:
216 | 
217 |     if "location" in slot_values:
218 |         str_location = slot_values["location"]
219 |         resolved_location_obj = resolve_location(
220 |             str_location, parameter_ontology["all"]["location"], True
221 |         )
222 |         slot_values["location"] = resolved_location_obj
223 | 
224 |     if "participant" in slot_values:
225 |         str_participant = slot_values["participant"]
226 |         resolved_participant_obj = resolve_participant(
227 |             str_participant, parameter_ontology["all"]["participant"], True
228 |         )
229 |         slot_values["participant"] = resolved_participant_obj
230 | 
231 |     if "activity" in slot_values:
232 |         str_activity = slot_values["activity"]
233 |         resolved_activity_obj = resolve_activity(
234 |             str_activity, parameter_ontology["all"]["activity"], True
235 |         )
236 |         slot_values["activity"] = resolved_activity_obj
237 | 
238 |     return slot_values
239 | 
240 | 
241 | def resolve_location(str_location: str, location_ontology: list, fuzzy: bool) -> dict:
242 | 
243 |     print("Resolving location: %s" % str_location)
244 | 
245 |     # Strict match
246 |     for target_location_obj in location_ontology:
247 |         if str_location.lower() == target_location_obj["geo_tag"]["place"].lower():
248 |             return target_location_obj
249 | 
250 |     # If strict match doesn't work & fuzzy == True:
251 |     if fuzzy:
252 |         print("Trying fuzzy match for location %s" % str_location)
253 |         for target_location_obj in location_ontology:
254 |             edit_distance = get_edit_distance(
255 |                 str_location.lower(), target_location_obj["geo_tag"]["place"].lower()
256 |             )
257 | 
258 |             if edit_distance < 7:
259 |                 print("Fuzzy match found for location %s" % str_location)
260 |                 return target_location_obj
261 | 
262 |     print("Match not found for location %s" % str_location)
263 |     return {}
264 | 
265 | 
266 | def resolve_list_entities(
267 |     str_entity: str, entity_ontology: list, fuzzy: bool, target_key: str
268 | ) -> dict:
269 |     """
270 |     (input) str_entities: [
271 |         'element_1', ...
272 |         e.g. 'skiing', 'snowboarding'
273 |     ]
274 | 
275 |     (target) list_entities: [
276 |         {
277 |             'target_key': <str>,
278 |             e.g. 'activity_name': 'skiing'
279 |         }
280 |     ]
281 |     """
282 |     # First, try converting the str to a list
283 |     try:
284 |         set_entity = set(name.lower() for name in eval(str_entity))
285 | 
286 |         # Strict match
287 |         for target_entity_obj in entity_ontology:
288 |             target_entity = set(
289 |                 str(p.get(target_key, "")).lower() for p in target_entity_obj
290 |             )
291 | 
292 |             if set_entity == target_entity:
293 |                 return target_entity_obj
294 | 
295 |         # Fuzzy match 1
296 |         if fuzzy and len(set_entity) > 1:
297 |             print("Trying fuzzy match for entity %s" % str_entity)
298 |             match_thershold = max(1, int(len(set_entity) / 2) - 1)
299 | 
300 |             for target_entity_obj in entity_ontology:
301 |                 target_entity = set(
302 |                     str(p.get(target_key, "")).lower() for p in target_entity_obj
303 |                 )
304 | 
305 |                 if len(set_entity.intersection(target_entity)) >= match_thershold:
306 |                     print("Fuzzy match found for %s" % str_entity)
307 |                     return target_entity_obj
308 |     except:
309 |         print("Can't convert to list.")
310 |         # Fuzzy match 2
311 |         if fuzzy:
312 |             print("Trying fuzzy match for entity %s" % str_entity)
313 |             for target_entity_obj in entity_ontology:
314 |                 edit_distance = get_edit_distance(
315 |                     str_entity.lower().replace("'", ""),
316 |                     str(
317 |                         [str(p.get(target_key, "")).lower() for p in target_entity_obj]
318 |                     ).replace("'", ""),
319 |                 )
320 | 
321 |                 if edit_distance < 9:
322 |                     print("Fuzzy match found for %s" % str_entity)
323 |                     return target_entity_obj
324 | 
325 |     print("Match not found for %s" % str_entity)
326 |     return {}
327 | 
328 | 
329 | def resolve_participant(
330 |     str_participant: str, participant_ontology: list, fuzzy: bool
331 | ) -> dict:
332 | 
333 |     print("Resolving participant: %s" % str_participant)
334 |     return resolve_list_entities(
335 |         str_entity=str_participant,
336 |         entity_ontology=participant_ontology,
337 |         fuzzy=fuzzy,
338 |         target_key="name",
339 |     )
340 | 
341 | 
342 | def resolve_activity(str_activity: str, activity_ontology: list, fuzzy: bool) -> dict:
343 | 
344 |     print("Resolving activity: %s" % str_activity)
345 |     return resolve_list_entities(
346 |         str_entity=str_activity,
347 |         entity_ontology=activity_ontology,
348 |         fuzzy=fuzzy,
349 |         target_key="activity_name",
350 |     )
351 | 
352 | 
353 | if __name__ == "__main__":
354 |     # Test resolve entities
355 |     import json
356 | 
357 |     path_parameter_ontology = "/Users/shanemoon/workspace/memory_dialog/dialog_simulator/final_data/all_parameter_ontology.json"
358 | 
359 |     parameter_ontology = json.load(open(path_parameter_ontology, "r"))
360 | 
361 |     list_slot_values = [
362 |         # Strict match
363 |         {
364 |             "location": "Seattle Downtown",
365 |             "participant": "['Carl', 'Bryan', 'Emily']",
366 |             "activity": "['cooking sausages']",
367 |         },
368 |         # Fuzzy match by set intersection
369 |         {
370 |             "location": "seattle downtow",
371 |             "participant": "['Carl', 'Shane']",
372 |             "activity": "['cooking sausages', 'peeling potatoes']",
373 |         },
374 |         # Fuzzy match with incomplete list formats
375 |         {
376 |             "location": "Bay Area",
377 |             "participant": "Carl Bryan Emily",
378 |             "activity": "[cooking sausages",
379 |         },
380 |     ]
381 | 
382 |     for slot_values in list_slot_values:
383 |         print("------------------------------------")
384 |         print(resolve_sv_entities(slot_values, parameter_ontology))
385 | 


--------------------------------------------------------------------------------
/dialog_simulator/MemoryServiceAPI.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
  2 | 
  3 | 
  4 | #!/usr/bin/env python3
  5 | import random
  6 | from typing import Dict, Tuple
  7 | from Data import APIRequest, APIResponse, MemoryTime, MemoryLocation
  8 | from constants import API_CALL_TYPE, API_STATUS, GoalType
  9 | from utils import str_memory
 10 | from datetime import datetime
 11 | 
 12 | random.seed(0)
 13 | 
 14 | 
 15 | class MemoryServiceAPI:
 16 |     def __init__(self, *args, **kwargs):
 17 |         self.metadata = kwargs.pop("metadata", {})
 18 | 
 19 |     def call_api(self, api_request: APIRequest) -> APIResponse:
 20 | 
 21 |         status = None
 22 |         results = None
 23 | 
 24 |         if api_request.call_type == API_CALL_TYPE.SEARCH:
 25 |             results, status = self.search(api_request)
 26 | 
 27 |         elif api_request.call_type == API_CALL_TYPE.REFINE_SEARCH:
 28 |             results, status = self.refine_search(api_request)
 29 | 
 30 |         elif api_request.call_type == API_CALL_TYPE.GET_RELATED:
 31 |             results, status = self.get_related(api_request)
 32 | 
 33 |         elif api_request.call_type == API_CALL_TYPE.GET_INFO:
 34 |             results, status = self.get_info(api_request)
 35 | 
 36 |         elif api_request.call_type == API_CALL_TYPE.SHARE:
 37 |             results, status = self.share(api_request)
 38 | 
 39 |         # Construct a response
 40 |         api_response = APIResponse(status=status, results=results, request=api_request)
 41 | 
 42 |         return api_response
 43 | 
 44 |     def search(self, api_request: APIRequest) -> Tuple[Dict, API_STATUS]:
 45 |         # Unpack API Request
 46 |         search_filter = api_request.parameters["slot_values"]
 47 |         memory_dialog = api_request.memory_dialog
 48 | 
 49 |         # Unpack more parameters
 50 |         n_max_results = api_request.parameters.get("n_max_results", 2)
 51 |         exclude_memory_ids = api_request.parameters.get("exclude_memory_ids", set())
 52 | 
 53 |         # Prepare search candidates
 54 |         search_candidates = memory_dialog.get_memories()
 55 | 
 56 |         # Prepare search output
 57 |         retrieved_memories = []
 58 | 
 59 |         # Execute search
 60 |         for memory in search_candidates:
 61 |             # If there was an exlusion request, modify the search candidates
 62 |             if int(memory.data["memory_id"]) in exclude_memory_ids:
 63 |                 continue
 64 | 
 65 |             # TODO: ****** implement *****
 66 |             meet_criteria = True
 67 | 
 68 |             for slot, value in search_filter.items():
 69 |                 # TODO: handle special cases
 70 |                 if slot == "time":
 71 | 
 72 |                     if search_filter.get("time", None) in {
 73 |                         "right before",
 74 |                         "right after",
 75 |                         "on the same day",
 76 |                     }:
 77 |                         # This is an error case -- that can happen
 78 |                         # due to the wrong model behaviors.
 79 |                         print("Wrong request ...")
 80 |                         meet_criteria = False
 81 |                         break
 82 | 
 83 |                     memory_time = MemoryTime(str_datetime=memory.data["time"])
 84 |                     search_time = MemoryTime(str_datetime=value)
 85 | 
 86 |                     if not memory_time.is_within(search_time):
 87 |                         meet_criteria = False
 88 |                         break
 89 | 
 90 |                 elif slot == "location":
 91 |                     memory_location = MemoryLocation(data=memory.data["location"])
 92 |                     search_location = MemoryLocation(data=value)
 93 | 
 94 |                     if not memory_location.is_within(search_location):
 95 |                         meet_criteria = False
 96 |                         break
 97 | 
 98 |                 elif slot == "participant":
 99 |                     memory_participants = {
100 |                         p["name"] for p in memory.data["participant"]
101 |                     }
102 |                     search_participants = [p["name"] for p in value]
103 | 
104 |                     for search_participant in search_participants:
105 |                         if search_participant not in memory_participants:
106 |                             meet_criteria = False
107 |                             break
108 | 
109 |                 elif slot == "activity":
110 |                     memory_activities = {
111 |                         a["activity_name"] for a in memory.data["activity"]
112 |                     }
113 |                     search_activities = [a["activity_name"] for a in value]
114 | 
115 |                     for search_activity in search_activities:
116 |                         if search_activity not in memory_activities:
117 |                             meet_criteria = False
118 |                             break
119 | 
120 |                 else:
121 |                     # General cases
122 |                     if type(memory.data[slot]) == list:
123 |                         pass
124 | 
125 |                         if value not in memory.data[slot]:
126 |                             meet_criteria = False
127 |                             break
128 | 
129 |                     else:
130 |                         if value != memory.data[slot]:
131 |                             meet_criteria = False
132 |                             break
133 | 
134 |             if meet_criteria:
135 |                 retrieved_memories.append(memory)
136 | 
137 |         # ** TODO: check if search_filter and retrieved_memories match **
138 |         # print('=====')
139 |         # print('search_filter', search_filter)
140 |         # print('-----')
141 |         # print('retrieved_memories', retrieved_memories)
142 | 
143 |         # Rank and return only n_results
144 |         n_results = random.randint(1, n_max_results)
145 | 
146 |         if len(retrieved_memories) > n_results:
147 |             random.shuffle(retrieved_memories)
148 |             retrieved_memories = retrieved_memories[:n_results]
149 | 
150 |         # Output
151 |         results = {"retrieved_memories": retrieved_memories}
152 | 
153 |         if results["retrieved_memories"] != []:
154 |             status = API_STATUS.SEARCH_FOUND
155 |         else:
156 |             status = API_STATUS.SEARCH_NOT_FOUND
157 | 
158 |         return (results, status)
159 | 
160 |     def refine_search(self, api_request: APIRequest) -> Tuple[Dict, API_STATUS]:
161 |         # Adjust the search based on the memory_dialog
162 |         memory_dialog = api_request.memory_dialog
163 | 
164 |         # Search for previous search filter
165 |         prev_filter = None
166 |         for i in reversed(range(len(memory_dialog.dialog.asst_turns))):
167 |             asst_turn = memory_dialog.dialog.asst_turns[i]
168 |             turn_goal = asst_turn.goal
169 | 
170 |             if turn_goal.goal_type in {GoalType.SEARCH, GoalType.GET_RELATED}:
171 |                 # TODO: change it to reflect multi goal parameters
172 |                 prev_filter = turn_goal.goal_parameters[0].filter
173 |                 break
174 | 
175 |         # Reconstruct the goal to include the previous search parameters
176 |         if prev_filter is not None:
177 |             search_filter = api_request.parameters["slot_values"]
178 | 
179 |             # Previous request
180 |             for k, v in prev_filter.items():
181 |                 search_filter[k] = v
182 | 
183 |             # New request
184 |             for k, v in api_request.parameters["slot_values"].items():
185 |                 search_filter[k] = v
186 | 
187 |             api_request.parameters["slot_values"] = search_filter
188 | 
189 |         else:
190 |             # This dialog is not allowed -- Refine should always
191 |             # happen after a Search or GET_RELATED. Hence abort.
192 |             ### TODO: abort gracefully
193 |             print("***** Refine error *****")
194 |             assert False
195 | 
196 |         # Exclude memories that are already discussed
197 |         api_request.parameters[
198 |             "exclude_memory_ids"
199 |         ] = memory_dialog.dialog.mentioned_memory_ids
200 | 
201 |         return self.search(api_request)
202 | 
203 |     def get_related(self, api_request: APIRequest) -> Tuple[Dict, API_STATUS]:
204 |         # Unpack API Request
205 |         search_filter = api_request.parameters["slot_values"]
206 | 
207 |         if search_filter.get("time", None) in {
208 |             "right before",
209 |             "right after",
210 |             "on the same day",
211 |         }:
212 |             # This is a special request to retrieve
213 |             # related memories in the same time group (from the same day)
214 |             return self.get_connected(api_request, search_filter.get("time"))
215 | 
216 |         else:
217 |             # Treat it as a modified search request
218 |             # where slot values are taken from the input memories
219 |             request_slots = api_request.parameters["request_slots"]
220 |             memories = api_request.parameters["memories"]
221 |             memory_dialog = api_request.memory_dialog
222 | 
223 |             # If request_slots is not specified, randomly sample a few slots
224 |             if request_slots == []:
225 | 
226 |                 request_slot_candidates = {
227 |                     "time",
228 |                     "location",
229 |                     "activity",
230 |                     "participant",
231 |                 }
232 | 
233 |                 # If a value is specified for a slot, exclude it
234 |                 # from the candidates
235 |                 request_slot_candidates -= search_filter.keys()
236 | 
237 |                 request_slots = random.choices(
238 |                     population=list(request_slot_candidates), k=random.randint(1, 1)
239 |                 )
240 | 
241 |             for request_slot in request_slots:
242 |                 for memory in memories:
243 |                     request_slot_value = memory.data[request_slot]
244 | 
245 |                     # TODO: make it take multiple values
246 |                     search_filter[request_slot] = request_slot_value
247 | 
248 |             # Make a search request with the updated filter
249 |             api_request.parameters["slot_values"] = search_filter
250 | 
251 |             # Exclude memories that are already discussed
252 |             api_request.parameters[
253 |                 "exclude_memory_ids"
254 |             ] = memory_dialog.dialog.mentioned_memory_ids
255 | 
256 |             return self.search(api_request)
257 | 
258 |     def get_connected(
259 |         self, api_request: APIRequest, time_constraint: str
260 |     ) -> Tuple[Dict, API_STATUS]:
261 | 
262 |         _ = api_request.parameters["slot_values"]
263 | 
264 |         ## TODO: handle multiple memories
265 |         target_memory = api_request.parameters["memories"][0]
266 |         memory_graph = api_request.memory_dialog.memory_graph
267 |         target_memory_index = -1
268 |         for i, memory in enumerate(memory_graph.memories):
269 |             if memory.data["memory_id"] == target_memory.data["memory_id"]:
270 |                 target_memory_index = i
271 |                 break
272 | 
273 |         # Get connected memories
274 |         connected_memory_indices = memory_graph.get_events(target_memory_index)[
275 |             "memories"
276 |         ]
277 |         connected_memories = []
278 | 
279 |         # Compare time
280 |         target_time = datetime.fromisoformat(target_memory.data["time"])
281 | 
282 |         for idx in connected_memory_indices:
283 |             if idx == target_memory_index:
284 |                 continue
285 | 
286 |             connected_memory = memory_graph.memories[idx]
287 |             connected_memory_time = datetime.fromisoformat(
288 |                 connected_memory.data["time"]
289 |             )
290 | 
291 |             if time_constraint == "right after":
292 |                 if target_time < connected_memory_time:
293 |                     connected_memories.append(connected_memory)
294 |             elif time_constraint == "right before":
295 |                 if target_time > connected_memory_time:
296 |                     connected_memories.append(connected_memory)
297 |             elif time_constraint == "on the same day":
298 |                 connected_memories.append(connected_memory)
299 | 
300 |         # Output
301 |         results = {"retrieved_memories": connected_memories}
302 | 
303 |         if results["retrieved_memories"] != []:
304 |             status = API_STATUS.SEARCH_FOUND
305 |         else:
306 |             status = API_STATUS.SEARCH_NOT_FOUND
307 | 
308 |         return (results, status)
309 | 
310 |     def get_info(self, api_request: APIRequest) -> Tuple[Dict, API_STATUS]:
311 |         # Unpack API Request
312 |         request_slots = api_request.parameters.get("request_slots", [])
313 |         memories = api_request.parameters.get("memories", [])
314 | 
315 |         # Unpack more parameters
316 |         # TODO
317 | 
318 |         # Prepare get_info output
319 |         lookup_results = {
320 |             "retrieved_memories": memories,
321 |             "retrieved_info": {},
322 |             "request_slots": request_slots,
323 |         }
324 | 
325 |         # If request_slots is not specified, randomly sample a few slots
326 |         if request_slots == []:
327 | 
328 |             if len(memories) > 0:
329 |                 memory = memories[0]
330 |                 request_slots = [k for k in memory.data if random.random() > 0.8]
331 | 
332 |         def summarize_info(memory_data, slot):
333 | 
334 |             if slot == "location":
335 |                 return memory_data[slot]["geo_tag"]
336 | 
337 |             else:
338 |                 return memory_data[slot]
339 | 
340 |         # Look up info
341 |         for memory in memories:
342 | 
343 |             # Add the requested info
344 |             s_memory = str_memory(memory, verbose=False)
345 | 
346 |             if request_slots == []:
347 |                 # Give all relevant information
348 |                 lookup_results["retrieved_info"][s_memory] = {
349 |                     slot: summarize_info(memory.data, slot)
350 |                     for slot in ["time", "location", "participant", "activity"]
351 |                 }
352 | 
353 |             else:
354 |                 lookup_results["retrieved_info"][s_memory] = {}
355 |                 for slot in request_slots:
356 |                     if slot in memory.data:
357 |                         lookup_results["retrieved_info"][s_memory][
358 |                             slot
359 |                         ] = summarize_info(memory.data, slot)
360 | 
361 |                 # Add extra info
362 |                 # TODO
363 | 
364 |         # TODO: status can be INFO_NOT_FOUND
365 |         status = API_STATUS.INFO_FOUND
366 | 
367 |         return (lookup_results, status)
368 | 
369 |     def share(self, api_request) -> Tuple[Dict, API_STATUS]:
370 |         # Unpack API Request
371 |         memories = api_request.parameters["memories"]
372 | 
373 |         # Prepare output
374 |         results = {"retrieved_memories": memories}
375 | 
376 |         status = API_STATUS.SHARED
377 | 
378 |         return (results, status)
379 | 


--------------------------------------------------------------------------------
/models/gpt2_text/gpt2_dst/scripts/run_generation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding=utf-8
  3 | # Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.
  4 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
  5 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | """ Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet)
 19 | 
 20 | Adapted from
 21 | https://github.com/huggingface/transformers/blob/master/examples/text-generation/run_generation.py
 22 | """
 23 | 
 24 | import argparse
 25 | import logging
 26 | import os
 27 | import numpy as np
 28 | import torch
 29 | 
 30 | from transformers import (
 31 |     CTRLLMHeadModel,
 32 |     CTRLTokenizer,
 33 |     GPT2LMHeadModel,
 34 |     GPT2Tokenizer,
 35 |     OpenAIGPTLMHeadModel,
 36 |     OpenAIGPTTokenizer,
 37 |     TransfoXLLMHeadModel,
 38 |     TransfoXLTokenizer,
 39 |     XLMTokenizer,
 40 |     XLMWithLMHeadModel,
 41 |     XLNetLMHeadModel,
 42 |     XLNetTokenizer,
 43 | )
 44 | 
 45 | 
 46 | logging.basicConfig(
 47 |     format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
 48 |     datefmt="%m/%d/%Y %H:%M:%S",
 49 |     level=logging.INFO,
 50 | )
 51 | logger = logging.getLogger(__name__)
 52 | 
 53 | MAX_LENGTH = int(10000)  # Hardcoded max length to avoid infinite loop
 54 | 
 55 | MODEL_CLASSES = {
 56 |     "gpt2": (GPT2LMHeadModel, GPT2Tokenizer),
 57 |     "ctrl": (CTRLLMHeadModel, CTRLTokenizer),
 58 |     "openai-gpt": (OpenAIGPTLMHeadModel, OpenAIGPTTokenizer),
 59 |     "xlnet": (XLNetLMHeadModel, XLNetTokenizer),
 60 |     "transfo-xl": (TransfoXLLMHeadModel, TransfoXLTokenizer),
 61 |     "xlm": (XLMWithLMHeadModel, XLMTokenizer),
 62 | }
 63 | 
 64 | # Padding text to help Transformer-XL and XLNet with short prompts as proposed by Aman Rusia
 65 | # in https://github.com/rusiaaman/XLNet-gen#methodology
 66 | # and https://medium.com/@amanrusia/xlnet-speaks-comparison-to-gpt-2-ea1a4e9ba39e
 67 | PADDING_TEXT = """In 1991, the remains of Russian Tsar Nicholas II and his family
 68 | (except for Alexei and Maria) are discovered.
 69 | The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
 70 | remainder of the story. 1883 Western Siberia,
 71 | a young Grigori Rasputin is asked by his father and a group of men to perform magic.
 72 | Rasputin has a vision and denounces one of the men as a horse thief. Although his
 73 | father initially slaps him for making such an accusation, Rasputin watches as the
 74 | man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
 75 | the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
 76 | with people, even a bishop, begging for his blessing. <eod> </s> <eos>"""
 77 | 
 78 | 
 79 | def set_seed(args):
 80 |     np.random.seed(args.seed)
 81 |     torch.manual_seed(args.seed)
 82 |     if args.n_gpu > 0:
 83 |         torch.cuda.manual_seed_all(args.seed)
 84 | 
 85 | 
 86 | #
 87 | # Functions to prepare models' input
 88 | #
 89 | 
 90 | 
 91 | def prepare_ctrl_input(args, _, tokenizer, prompt_text):
 92 |     if args.temperature > 0.7:
 93 |         logger.info(
 94 |             "CTRL typically works better with lower temperatures (and lower top_k)."
 95 |         )
 96 | 
 97 |     encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=True)
 98 |     if not any(encoded_prompt[0] == x for x in tokenizer.control_codes.values()):
 99 |         logger.info(
100 |             "WARNING! You are not starting your generation from a control code so you won't get good results"
101 |         )
102 |     return prompt_text
103 | 
104 | 
105 | def prepare_xlm_input(args, model, tokenizer, prompt_text):
106 |     # kwargs = {"language": None, "mask_token_id": None}
107 | 
108 |     # Set the language
109 |     use_lang_emb = hasattr(model.config, "use_lang_emb") and model.config.use_lang_emb
110 |     if hasattr(model.config, "lang2id") and use_lang_emb:
111 |         available_languages = model.config.lang2id.keys()
112 |         if args.xlm_language in available_languages:
113 |             language = args.xlm_language
114 |         else:
115 |             language = None
116 |             while language not in available_languages:
117 |                 language = input(
118 |                     "Using XLM. Select language in "
119 |                     + str(list(available_languages))
120 |                     + " >>> "
121 |                 )
122 | 
123 |         model.config.lang_id = model.config.lang2id[language]
124 |         # kwargs["language"] = tokenizer.lang2id[language]
125 | 
126 |     # TODO fix mask_token_id setup when configurations will be synchronized between models and tokenizers
127 |     # XLM masked-language modeling (MLM) models need masked token
128 |     # is_xlm_mlm = "mlm" in args.model_name_or_path
129 |     # if is_xlm_mlm:
130 |     #     kwargs["mask_token_id"] = tokenizer.mask_token_id
131 | 
132 |     return prompt_text
133 | 
134 | 
135 | def prepare_xlnet_input(args, _, tokenizer, prompt_text):
136 |     prompt_text = (
137 |         args.padding_text if args.padding_text else PADDING_TEXT
138 |     ) + prompt_text
139 |     return prompt_text
140 | 
141 | 
142 | def prepare_transfoxl_input(args, _, tokenizer, prompt_text):
143 |     prompt_text = (
144 |         args.padding_text if args.padding_text else PADDING_TEXT
145 |     ) + prompt_text
146 |     return prompt_text
147 | 
148 | 
149 | PREPROCESSING_FUNCTIONS = {
150 |     "ctrl": prepare_ctrl_input,
151 |     "xlm": prepare_xlm_input,
152 |     "xlnet": prepare_xlnet_input,
153 |     "transfo-xl": prepare_transfoxl_input,
154 | }
155 | 
156 | 
157 | def adjust_length_to_model(length, max_sequence_length):
158 |     if length < 0 and max_sequence_length > 0:
159 |         length = max_sequence_length
160 |     elif 0 < max_sequence_length < length:
161 |         length = max_sequence_length  # No generation bigger than model size
162 |     elif length < 0:
163 |         length = MAX_LENGTH  # avoid infinite loop
164 |     return length
165 | 
166 | 
167 | def load_model(model_type, model_name_or_path, device, length=100):
168 | 
169 |     try:
170 |         model_class, tokenizer_class = MODEL_CLASSES[model_type]
171 |     except KeyError:
172 |         raise KeyError(
173 |             "the model {} you specified is not supported. You are welcome to add it and open a PR :)"
174 |         )
175 | 
176 |     tokenizer = tokenizer_class.from_pretrained(model_name_or_path)
177 |     model = model_class.from_pretrained(model_name_or_path)
178 |     model.to(device)
179 | 
180 |     length = adjust_length_to_model(
181 |         length, max_sequence_length=model.config.max_position_embeddings
182 |     )
183 | 
184 |     return model, tokenizer, length
185 | 
186 | 
187 | def generate_sequences(
188 |     model,
189 |     tokenizer,
190 |     prompt,
191 |     device="cpu",
192 |     length=100,
193 |     temperature=1.0,
194 |     k=0,
195 |     p=0.9,
196 |     repetition_penalty=1.0,
197 |     num_return_sequences=1,
198 |     stop_token="<EOS>",
199 |     verbose=True,
200 | ):
201 | 
202 |     output_sequences, encoded_prompt = generate_sequence_tokens(
203 |         model,
204 |         tokenizer,
205 |         prompt,
206 |         device,
207 |         length,
208 |         temperature,
209 |         k,
210 |         p,
211 |         repetition_penalty,
212 |         num_return_sequences,
213 |     )
214 | 
215 |     generated_sequences = []
216 | 
217 |     for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
218 |         if verbose:
219 |             print(
220 |                 "=== GENERATED SEQUENCE {sequence_idx} ===".format(
221 |                     sequence_idx=generated_sequence_idx + 1,
222 |                 )
223 |             )
224 |         generated_sequence = generated_sequence.tolist()
225 | 
226 |         # Decode text
227 |         text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
228 | 
229 |         # Remove all text after the stop token
230 |         text = text[: text.find(stop_token) if stop_token else None]
231 | 
232 |         # Add the prompt at the beginning of the sequence. Remove the
233 |         # excess text that was used for pre-processing
234 |         generated_text = (
235 |             prompt
236 |             + text[
237 |                 len(
238 |                     tokenizer.decode(
239 |                         encoded_prompt[0], clean_up_tokenization_spaces=True
240 |                     )
241 |                 ) :
242 |             ]
243 |         )
244 |         # generated_text = text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)):]
245 | 
246 |         generated_sequences.append(generated_text)
247 | 
248 |         if verbose:
249 |             print(prompt)
250 |             print("-")
251 |             print(generated_text)
252 | 
253 |     return generated_sequences
254 | 
255 | 
256 | def generate_sequence_tokens(
257 |     model,
258 |     tokenizer,
259 |     prompt_text,
260 |     device="cpu",
261 |     length=100,
262 |     temperature=1.0,
263 |     k=0,
264 |     p=0.9,
265 |     repetition_penalty=1.0,
266 |     num_return_sequences=1,
267 | ):
268 | 
269 |     # Assumes model_type not in PREPROCESSING_FUNCTIONS
270 | 
271 |     # Strip any trailing \n if provided
272 |     prompt_text = prompt_text.strip("\n")
273 | 
274 |     # Enode prompt
275 |     encoded_prompt = tokenizer.encode(
276 |         prompt_text, add_special_tokens=True, return_tensors="pt"
277 |     )
278 |     encoded_prompt = encoded_prompt.to(device)
279 | 
280 |     output_sequences = model.generate(
281 |         input_ids=encoded_prompt,
282 |         max_length=length + len(encoded_prompt[0]),
283 |         temperature=temperature,
284 |         top_k=k,
285 |         top_p=p,
286 |         repetition_penalty=repetition_penalty,
287 |         do_sample=True,
288 |         num_return_sequences=num_return_sequences,
289 |     )
290 | 
291 |     # Remove the batch dimension when returning multiple sequences
292 |     if len(output_sequences.shape) > 2:
293 |         output_sequences.squeeze_()
294 | 
295 |     return output_sequences, encoded_prompt
296 | 
297 | 
298 | def main():
299 |     parser = argparse.ArgumentParser()
300 |     parser.add_argument(
301 |         "--model_type",
302 |         default=None,
303 |         type=str,
304 |         required=True,
305 |         help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
306 |     )
307 |     parser.add_argument(
308 |         "--model_name_or_path",
309 |         default=None,
310 |         type=str,
311 |         required=True,
312 |         help="Path to pre-trained model or shortcut name selected in the list: "
313 |         + ", ".join(MODEL_CLASSES.keys()),
314 |     )
315 | 
316 |     parser.add_argument("--prompt", type=str, default="")
317 |     parser.add_argument(
318 |         "--prompts_from_file",
319 |         type=str,
320 |         default=None,
321 |         help="""
322 | read prompts from a file and generate, overrides any prompt given on the
323 | command line""",
324 |     )
325 |     parser.add_argument("--length", type=int, default=20)
326 |     parser.add_argument(
327 |         "--stop_token",
328 |         type=str,
329 |         default=None,
330 |         help="Token at which text generation is stopped",
331 |     )
332 | 
333 |     parser.add_argument(
334 |         "--temperature",
335 |         type=float,
336 |         default=1.0,
337 |         help="temperature of 1.0 has no effect, lower tend toward greedy sampling",
338 |     )
339 |     parser.add_argument(
340 |         "--repetition_penalty",
341 |         type=float,
342 |         default=1.0,
343 |         help="primarily useful for CTRL model; in that case, use 1.2",
344 |     )
345 |     parser.add_argument("--k", type=int, default=0)
346 |     parser.add_argument("--p", type=float, default=0.9)
347 | 
348 |     parser.add_argument(
349 |         "--padding_text",
350 |         type=str,
351 |         default="",
352 |         help="Padding text for Transfo-XL and XLNet.",
353 |     )
354 |     parser.add_argument(
355 |         "--xlm_language",
356 |         type=str,
357 |         default="",
358 |         help="Optional language when used with the XLM model.",
359 |     )
360 | 
361 |     parser.add_argument(
362 |         "--seed", type=int, default=42, help="random seed for initialization"
363 |     )
364 |     parser.add_argument(
365 |         "--no_cuda", action="store_true", help="Avoid using CUDA when available"
366 |     )
367 |     parser.add_argument(
368 |         "--num_return_sequences",
369 |         type=int,
370 |         default=1,
371 |         help="The number of samples to generate.",
372 |     )
373 |     parser.add_argument(
374 |         "--path_output",
375 |         type=str,
376 |         default=None,
377 |         help="Path to output predictions in a line separated text file.",
378 |     )
379 |     args = parser.parse_args()
380 | 
381 |     args.device = torch.device(
382 |         "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
383 |     )
384 |     args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
385 | 
386 |     set_seed(args)
387 | 
388 |     if args.prompts_from_file and not os.path.exists(args.prompts_from_file):
389 |         raise Exception(f"prompt file '{args.prompts_from_file}' not found")
390 | 
391 |     # Initialize the model and tokenizer
392 |     args.model_type = args.model_type.lower()
393 | 
394 |     # Load model
395 |     model, tokenizer, args.length = load_model(
396 |         args.model_type, args.model_name_or_path, args.device, args.length
397 |     )
398 | 
399 |     logger.info(args)
400 | 
401 |     results = []
402 |     prompts = []
403 |     if args.prompts_from_file:
404 |         with open(args.prompts_from_file) as handle:
405 |             prompts = handle.readlines()
406 | 
407 |     while True:
408 |         if not prompts:
409 |             prompts = [args.prompt if args.prompt else input("Model prompt >>> ")]
410 |             if not args.prompt and (
411 |                 len(prompts) == 0
412 |                 or prompts[0].strip() == ""
413 |                 or prompts[0].lower() == "quit"
414 |             ):
415 |                 break  # break while True loop
416 | 
417 |         n_prompts = len(prompts)
418 |         for i, prompt_text in enumerate(prompts):
419 | 
420 |             generated_sequences = generate_sequences(
421 |                 model,
422 |                 tokenizer,
423 |                 prompt_text,
424 |                 args.device,
425 |                 args.length,
426 |                 args.temperature,
427 |                 args.k,
428 |                 args.p,
429 |                 args.repetition_penalty,
430 |                 args.num_return_sequences,
431 |                 args.stop_token,
432 |             )
433 | 
434 |             results.append(generated_sequences)
435 | 
436 |         prompts = []
437 |         if args.prompt or args.prompts_from_file:
438 |             break  # break while True loop
439 | 
440 |     if args.path_output is not None:
441 | 
442 |         # Create a directory if it does not exist
443 |         directory = os.path.dirname(args.path_output)
444 |         if not os.path.exists(directory):
445 |             os.makedirs(directory, exist_ok=True)
446 | 
447 |         # Format results into a line-separated string file
448 |         str_results = "\n".join(
449 |             [" || ".join(generated_sequences) for generated_sequences in results]
450 |         )
451 | 
452 |         # Save to a file
453 |         with open(args.path_output, "w") as f_out:
454 |             f_out.write(str_results)
455 | 
456 |     return results
457 | 
458 | 
459 | if __name__ == "__main__":
460 |     main()
461 | 


--------------------------------------------------------------------------------