├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── chat_viewer ├── chat.html ├── css │ └── my.css └── js │ └── my.js ├── cocoa ├── __init__.py ├── analysis │ ├── __init__.py │ ├── analyzer.py │ ├── html_visualizer.py │ ├── utils.py │ └── visualizer.py ├── core │ ├── __init__.py │ ├── controller.py │ ├── dataset.py │ ├── entity.py │ ├── event.py │ ├── kb.py │ ├── sample_utils.py │ ├── scenario_db.py │ ├── schema.py │ ├── tokenizer.py │ └── util.py ├── io │ ├── __init__.py │ └── utils.py ├── lib │ ├── __init__.py │ ├── bleu.py │ ├── logstats.py │ └── multi_bleu.py ├── model │ ├── __init__.py │ ├── counter.py │ ├── dialogue_parser.py │ ├── dialogue_state.py │ ├── generator.py │ ├── manager.py │ ├── ngram.py │ ├── parser.py │ ├── util.py │ └── vocab.py ├── neural │ ├── __init__.py │ ├── attention.py │ ├── beam.py │ ├── embeddings_to_torch.py │ ├── evaluator.py │ ├── generator.py │ ├── loss.py │ ├── models.py │ ├── rl_trainer.py │ ├── symbols.py │ ├── trainer.py │ ├── utterance.py │ └── vocab_builder.py ├── options.py ├── sessions │ ├── __init__.py │ ├── human_session.py │ ├── rulebased_session.py │ ├── session.py │ └── timed_session.py ├── systems │ ├── __init__.py │ ├── human_system.py │ ├── rulebased_system.py │ └── system.py ├── turk │ ├── __init__.py │ ├── eval_data.py │ ├── task.py │ ├── templates │ │ ├── compare_question.html │ │ ├── frame.html │ │ ├── multi_question.html │ │ └── question.html │ └── utils.py └── web │ ├── README.md │ ├── __init__.py │ ├── main │ ├── __init__.py │ ├── backend.py │ ├── db_reader.py │ ├── logger.py │ ├── states.py │ └── utils.py │ └── views │ ├── __init__.py │ ├── chat.py │ └── utils.py ├── craigslistbargain ├── README.md ├── analysis │ ├── __init__.py │ ├── analyze_strategy.py │ ├── dialogue.py │ ├── get_data_statistics.py │ ├── html_visualizer.py │ ├── speech_acts.py │ ├── utils.py │ └── visualizer.py ├── core │ ├── __init__.py │ ├── controller.py │ ├── event.py │ ├── kb.py │ ├── price_tracker.py │ ├── scenario.py │ └── tokenizer.py ├── data │ └── craigslist-schema.json ├── evaluate.py ├── main.py ├── model │ ├── __init__.py │ ├── dialogue_state.py │ ├── generator.py │ ├── manager.py │ ├── parser.py │ └── templates.py ├── neural │ ├── __init__.py │ ├── batcher.py │ ├── evaluator.py │ ├── generator.py │ ├── model_builder.py │ ├── models.py │ ├── preprocess.py │ ├── rl_trainer.py │ ├── symbols.py │ ├── trainer.py │ ├── utterance.py │ └── vocab_builder.py ├── onmt ├── options.py ├── parse_dialogue.py ├── reinforce.py ├── results.txt ├── scraper │ ├── data │ │ └── negotiation │ │ │ ├── craigslist_bike.json │ │ │ ├── craigslist_car.json │ │ │ ├── craigslist_electronics.json │ │ │ ├── craigslist_furniture.json │ │ │ ├── craigslist_housing.json │ │ │ └── craigslist_phone.json │ ├── scraper │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── craigslist_spider.py │ └── scrapy.cfg ├── scripts │ ├── generate_scenarios.py │ ├── turk_eval.py │ └── visualize_transcripts.py ├── sessions │ ├── __init__.py │ ├── cmd_session.py │ ├── hybrid_session.py │ ├── neural_session.py │ ├── rulebased_session.py │ └── session.py ├── systems │ ├── __init__.py │ ├── cmd_system.py │ ├── hybrid_system.py │ ├── neural_system.py │ └── rulebased_system.py └── web │ ├── __init__.py │ ├── app_params.json │ ├── app_params_allsys.json │ ├── chat_app.py │ ├── main │ ├── __init__.py │ ├── backend.py │ ├── db_reader.py │ └── utils.py │ ├── static │ ├── css │ │ └── survey.css │ └── img │ │ └── handshake.jpg │ ├── templates │ ├── chat.html │ ├── craigslist-instructions.html │ ├── finished.html │ ├── report.html │ ├── task_survey.html │ ├── third_party_eval_finished.html │ ├── visualize.html │ └── waiting.html │ └── views │ ├── __init__.py │ └── action.py ├── data ├── turk │ └── sample_aws_config.json └── web │ ├── matchmaking-instructions.html │ ├── matchmaking_params.json │ └── negotiation │ ├── app_params.json │ ├── craiglist-instructions.html │ └── rent-instructions.html ├── dealornodeal ├── README.md ├── analysis │ ├── __init__.py │ ├── html_visualizer.py │ └── visualizer.py ├── core │ ├── __init__.py │ ├── controller.py │ ├── event.py │ ├── kb.py │ ├── lexicon.py │ ├── scenario.py │ └── tokenizer.py ├── data │ ├── bookhatball-schema.json │ ├── test.json │ ├── test.txt │ ├── train.json │ ├── train.txt │ ├── val.json │ └── val.txt ├── evaluate.py ├── fb_model │ ├── __init__.py │ ├── agent.py │ ├── avg_rank.py │ ├── chat.py │ ├── config.py │ ├── data.py │ ├── dialog.py │ ├── domain.py │ ├── engine.py │ ├── eval_selfplay.py │ ├── metric.py │ ├── models │ │ ├── __init__.py │ │ ├── dialog_model.py │ │ └── modules.py │ ├── reinforce.py │ ├── selfplay.py │ ├── split.py │ ├── test.py │ ├── train.py │ ├── utils.py │ └── vis.py ├── main.py ├── model │ ├── __init__.py │ ├── dialogue_state.py │ ├── generator.py │ ├── manager.py │ └── parser.py ├── neural │ ├── __init__.py │ ├── batcher.py │ ├── evaluator.py │ ├── generator.py │ ├── model_builder.py │ ├── models.py │ ├── preprocess.py │ ├── rl_trainer.py │ ├── symbols.py │ ├── trainer.py │ ├── utterance.py │ └── vocab_builder.py ├── onmt ├── options.py ├── parse_dialogue.py ├── reinforce.py ├── scripts │ ├── analyze.py │ ├── create_scenarios.py │ ├── fb_data_to_int_scenario.py │ └── fb_data_to_json.py ├── sessions │ ├── __init__.py │ ├── cmd_session.py │ ├── hybrid_session.py │ ├── neural_session.py │ ├── rulebased_session.py │ └── session.py ├── systems │ ├── __init__.py │ ├── cmd_system.py │ ├── hybrid_system.py │ ├── neural_system.py │ └── rulebased_system.py ├── turk │ ├── __init__.py │ └── eval_data.py └── web │ ├── __init__.py │ ├── app_params.json │ ├── chat_app.py │ ├── main │ ├── __init__.py │ ├── backend.py │ ├── db_reader.py │ └── utils.py │ ├── static │ ├── css │ │ ├── chat.css │ │ └── survey.css │ └── img │ │ ├── ball.png │ │ ├── book.png │ │ ├── handshake.jpg │ │ └── hat.png │ ├── templates │ ├── chat.html │ ├── finished.html │ ├── instructions.html │ ├── report.html │ ├── task_survey.html │ ├── visualize.html │ └── waiting.html │ └── views │ ├── __init__.py │ └── action.py ├── environment.yml ├── mutualfriends ├── analysis │ ├── __init__.py │ ├── html_visualizer.py │ └── visualizer.py ├── core │ ├── __init__.py │ ├── controller.py │ ├── entity_ranker.py │ ├── event.py │ ├── inverse_lexicon.py │ ├── kb.py │ ├── lexicon.py │ ├── lexicon_utils.py │ ├── scenario.py │ └── tokenizer.py ├── data │ ├── common_words.txt │ ├── friends-schema-old.json │ ├── friends-schema.json │ ├── inverse_lexicon_data.txt │ └── schema.json ├── model │ ├── __init__.py │ ├── dialogue_state.py │ ├── encdec.py │ ├── evaluate.py │ ├── generator.py │ ├── graph.py │ ├── graph_embedder.py │ ├── graph_embedder_config.py │ ├── learner.py │ ├── manager.py │ ├── parser.py │ ├── preprocess.py │ └── rnn_cell.py ├── parse_dialogue.py ├── results.txt ├── scripts │ ├── analyze.py │ ├── compile_eval_results.py │ ├── dataset_statistics.py │ ├── eval_lexicon.py │ ├── generate_entity_ranker_data.py │ ├── generate_inverse_lexicon_data.py │ ├── generate_scenarios.py │ ├── generate_schema.py │ ├── get_data_statistics.py │ ├── get_eval_statistics.py │ ├── make_eval_result_plots.py │ ├── plot.py │ └── summarize_eval.py ├── sessions │ ├── __init__.py │ ├── cmd_session.py │ ├── heuristic_session.py │ ├── neural_session.py │ ├── rulebased_session.py │ └── session.py ├── systems │ ├── __init__.py │ ├── cmd_system.py │ ├── heuristic_system.py │ ├── neural_system.py │ └── rulebased_system.py └── web │ ├── __init__.py │ ├── app_params.json │ ├── chat_app.py │ ├── lexicon_annotator_app.py │ ├── main │ ├── __init__.py │ ├── backend.py │ ├── db_reader.py │ └── utils.py │ ├── static │ ├── css │ │ ├── bootstrap.min.css │ │ ├── single_task_lexicon.css │ │ ├── survey.css │ │ └── third_party_eval.css │ └── img │ │ └── handshake.jpg │ ├── templates │ ├── chat.html │ ├── finished.html │ ├── instructions.html │ ├── single_task_lexicon.html │ ├── survey.html │ ├── task_survey.html │ ├── third_party_eval.html │ ├── third_party_eval_finished.html │ ├── visualize.html │ └── waiting.html │ ├── third_party_backend.py │ ├── third_party_eval_app.py │ └── views │ ├── __init__.py │ └── action.py ├── onmt ├── Loss.py ├── ModelConstructor.py ├── Models.py ├── Optim.py ├── Trainer.py ├── Utils.py ├── __init__.py ├── io │ ├── AudioDataset.py │ ├── DatasetBase.py │ ├── IO.py │ ├── ImageDataset.py │ ├── TextDataset.py │ └── __init__.py ├── modules │ ├── AudioEncoder.py │ ├── Conv2Conv.py │ ├── ConvMultiStepAttention.py │ ├── CopyGenerator.py │ ├── Embeddings.py │ ├── Gate.py │ ├── GlobalAttention.py │ ├── ImageEncoder.py │ ├── MultiHeadedAttn.py │ ├── SRU.py │ ├── StackedRNN.py │ ├── StructuredAttention.py │ ├── Transformer.py │ ├── UtilClass.py │ ├── WeightNorm.py │ └── __init__.py └── translate │ ├── Beam.py │ ├── Translation.py │ ├── Translator.py │ └── __init__.py ├── requirements.txt ├── scripts ├── analyze.py ├── bot_bot_chat.py ├── chat_to_scenarios.py ├── combine_json.py ├── generate_dataset.py ├── split_dataset.py ├── split_transcripts.py ├── turk │ ├── accept_negotiation_hits.py │ ├── assign_qualification.py │ ├── award_time_bonuses.py │ └── eval_dialogue.py ├── visualize_transcripts.py └── web │ ├── cleanup_db.py │ └── dump_db.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | web_output 3 | output 4 | scr 5 | data 6 | log 7 | .ipynb_checkpoints 8 | Makefile 9 | aws_config.json 10 | cocoa.egg-info 11 | cocoa/tags 12 | *.pyc 13 | *.log 14 | *.pkl 15 | *.json 16 | *.png 17 | *.pdf 18 | cl*.sh 19 | aws_config.json 20 | *.DS_Store 21 | *.txt 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends \ 4 | build-essential \ 5 | locales \ 6 | cmake \ 7 | git \ 8 | curl \ 9 | vim \ 10 | unzip \ 11 | ca-certificates \ 12 | libjpeg-dev \ 13 | libpng-dev \ 14 | libfreetype6-dev \ 15 | libxft-dev &&\ 16 | rm -rf /var/lib/apt/lists/* 17 | 18 | 19 | RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 20 | chmod +x ~/miniconda.sh && \ 21 | ~/miniconda.sh -b -p /opt/conda && \ 22 | rm ~/miniconda.sh && \ 23 | /opt/conda/bin/conda install -y python=2.7 numpy pyyaml scipy ipython mkl mkl-include cython typing && \ 24 | /opt/conda/bin/conda install -y -c pytorch magma-cuda90 && \ 25 | /opt/conda/bin/conda clean -ya 26 | ENV PATH /opt/conda/bin:$PATH 27 | 28 | RUN conda install -c pytorch pytorch=0.4.1 cuda90 29 | 30 | RUN conda install flask=0.12.2=py27_0 && \ 31 | conda install flask-socketio=2.8.5=py27_0 && \ 32 | conda install nltk=3.2.4=py27_0 && \ 33 | conda install numpy=1.13.3=py27hdbf6ddf_4 && \ 34 | conda install pandas=0.20.3=py27_0 && \ 35 | conda install ujson=1.35=py27_0 && \ 36 | conda install decorator=4.1.2=py27_0 && \ 37 | conda install matplotlib=2.0.2=np113py27_0 38 | 39 | RUN pip install future==0.16.0 && \ 40 | pip install nose==1.3.7 && \ 41 | pip install scikit-learn==0.19.0 && \ 42 | pip install sklearn==0.0 && \ 43 | pip install torchtext==0.2.1 && \ 44 | pip install visdom==0.1.6.1 45 | 46 | RUN python -m nltk.downloader punkt && \ 47 | python -m nltk.downloader stopwords 48 | 49 | RUN DUMMY3=${DUMMY3} git clone https://github.com/stanfordnlp/cocoa.git && \ 50 | cd cocoa && \ 51 | python setup.py develop 52 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Stanford NLP 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /chat_viewer/css/my.css: -------------------------------------------------------------------------------- 1 | * { 2 | font-size: 1em; 3 | font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; 4 | } 5 | #filter td { 6 | padding: 0 20px 5px 0; 7 | } 8 | .scenario { 9 | width: 100%; 10 | display: inline-block; 11 | text-align: left; 12 | } 13 | .scenario table { 14 | border-collapse: collapse; 15 | text-align: left; 16 | } 17 | .survey { 18 | width: 100%; 19 | } 20 | .survey table { 21 | border-collapse: collapse; 22 | } 23 | .survey table td, .scenario table td { 24 | padding: 0 5px; 25 | } 26 | .survey .response { 27 | padding: 0 0 20px 0; 28 | } 29 | .agentLabel { 30 | padding: 10px; 31 | } 32 | .scenario td, .scenario th, .scenario tr, .survey td, .survey th, .survey tr { 33 | border-bottom: 1px solid #ddd; 34 | padding: 0 5px 0 0; 35 | } 36 | .divTitle { 37 | padding: 20px 0px 10px 0px; 38 | font-weight: bold; 39 | margin-bottom: 10px; 40 | } 41 | .count-title { 42 | margin: 5px 20px; 43 | width: 30px; 44 | display: inline; 45 | font-weight: bold; 46 | text-align: center; 47 | } 48 | .count { 49 | margin: 5px 20px; 50 | padding: 38px; 51 | display: inline; 52 | text-align: center; 53 | } 54 | .kb0 { 55 | width: 48%; 56 | float: left; 57 | } 58 | .kb1 { 59 | width: 48%; 60 | margin-left: 4%; 61 | float: left; 62 | } 63 | .chatLog .chat { 64 | width: auto; 65 | border-collapse: collapse; 66 | } 67 | .chat td { 68 | padding: 2px 15px 2px 15px; 69 | } 70 | .kb0 th { 71 | background-color: #80ced6; 72 | } 73 | .chatLog .agent0 { 74 | background-color: #80ced6; 75 | } 76 | .survey .response0 th { 77 | background-color: #80ced6; 78 | } 79 | .kb1 th { 80 | background-color: #ffef96; 81 | } 82 | .chatLog .agent1 { 83 | background-color: #ffef96; 84 | } 85 | .survey .response1 th { 86 | background-color: #ffef96; 87 | } 88 | -------------------------------------------------------------------------------- /cocoa/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | -------------------------------------------------------------------------------- /cocoa/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/analysis/__init__.py -------------------------------------------------------------------------------- /cocoa/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/core/__init__.py -------------------------------------------------------------------------------- /cocoa/core/entity.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | class CanonicalEntity(namedtuple('CanonicalEntity', ['value', 'type'])): 4 | __slots__ = () 5 | 6 | def __str__(self): 7 | return '[%s]' % str(self.value) 8 | 9 | class Entity(namedtuple('Entity', ['surface', 'canonical'])): 10 | __slots__ = () 11 | 12 | @classmethod 13 | def from_elements(cls, surface=None, value=None, type=None): 14 | if value is None: 15 | value = surface 16 | return super(cls, Entity).__new__(cls, surface, CanonicalEntity(value, type)) 17 | 18 | def __str__(self): 19 | return '[%s|%s]' % (str(self.surface), str(self.canonical.value)) 20 | 21 | def is_entity(x): 22 | return isinstance(x, Entity) or isinstance(x, CanonicalEntity) 23 | -------------------------------------------------------------------------------- /cocoa/core/event.py: -------------------------------------------------------------------------------- 1 | class Event(object): 2 | """ 3 | An atomic event of a dialogue, which could be someone talking or making a selection. 4 | 5 | Params: 6 | agent: The index of the agent triggering the event 7 | time: Time at which event occurred 8 | action: The action this event corresponds to ('select', 'message', ..) 9 | data: Any data that is part of the event 10 | start_time: The time at which the event action was started (e.g. the time at which an agent starting typing a 11 | message to send) 12 | """ 13 | 14 | decorative_events = ('join', 'leave', 'typing', 'eval') 15 | 16 | def __init__(self, agent, time, action, data, start_time=None, metadata=None): 17 | self.agent = agent 18 | self.time = time 19 | self.action = action 20 | self.data = data 21 | self.start_time = start_time 22 | self.metadata = metadata 23 | 24 | @staticmethod 25 | def from_dict(raw): 26 | return Event(raw['agent'], raw['time'], raw['action'], raw['data'], start_time=raw.get('start_time'), metadata=raw.get('metadata')) 27 | 28 | def to_dict(self): 29 | return {'agent': self.agent, 'time': self.time, 'action': self.action, 'data': self.data, 30 | 'start_time': self.start_time, 'metadata': self.metadata} 31 | 32 | @classmethod 33 | def MessageEvent(cls, agent, data, time=None, start_time=None, metadata=None): 34 | return cls(agent, time, 'message', data, start_time=start_time, metadata=metadata) 35 | 36 | @classmethod 37 | def JoinEvent(cls, agent, userid=None, time=None): 38 | return cls(agent, time, 'join', userid) 39 | 40 | @classmethod 41 | def LeaveEvent(cls, agent, userid=None, time=None): 42 | return cls(agent, time, 'leave', userid) 43 | 44 | @classmethod 45 | def TypingEvent(cls, agent, data, time=None): 46 | return cls(agent, time, 'typing', data) 47 | 48 | @classmethod 49 | def EvalEvent(cls, agent, data, time): 50 | return cls(agent, time, 'eval', data) 51 | 52 | @staticmethod 53 | def gather_eval(events): 54 | event_dict = {e.time: e for e in events if e.action != 'eval'} 55 | for e in events: 56 | if e.action == 'eval': 57 | event_dict[e.time].tags = [k for k, v in e.data['labels'].iteritems() if v != 0] 58 | else: 59 | event_dict[e.time].tags = [] 60 | events_with_eval = [v for k, v in sorted(event_dict.iteritems(), key=lambda x: x[0])] 61 | return events_with_eval 62 | -------------------------------------------------------------------------------- /cocoa/core/kb.py: -------------------------------------------------------------------------------- 1 | class KB(object): 2 | ''' 3 | Represents an agent's knowledge. 4 | ''' 5 | def __init__(self, attributes): 6 | self.attributes = attributes 7 | 8 | def dump(self): 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /cocoa/core/sample_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import math 4 | 5 | def normalize_weights(weights): 6 | ''' 7 | [3, 2] => [0.6, 0.4] 8 | ''' 9 | if len(weights) == 0: 10 | return [] 11 | s = sum(weights) 12 | if s == 0: 13 | print 'WARNING: zero normalization' 14 | return weights 15 | return [1.0 * weight / s for weight in weights] 16 | 17 | def exp_normalize_weights(weights): 18 | m = max(weights) 19 | weights = [math.exp(w - m) for w in weights] # Ensure no underflow 20 | return normalize_weights(weights) 21 | 22 | def normalize_candidates(candidates): 23 | ''' 24 | [('a', 2), ('b', 8)] => [('a', 0.2), ('b', 0.8)] 25 | ''' 26 | s = sum([weight for token, weight in candidates]) 27 | return [(k, weight / s) for k, weight in candidates] 28 | 29 | #def sample_candidates(candidates): 30 | # ''' 31 | # [('a', 2), ('b', 8)] => 'a' or 'b' 32 | # ''' 33 | # weights = [weight for token, weight in candidates] 34 | # sums = numpy.array(weights).cumsum() 35 | # i = sums.searchsorted(random.random() * sums[-1]) 36 | # return candidates[i] 37 | 38 | def sorted_candidates(candidates): 39 | ''' 40 | [('a', 2), ('b', 8)] => [('b', 8), ('a', 2)] 41 | ''' 42 | return sorted(candidates, key=lambda (token, weight) : weight, reverse=True) 43 | 44 | def softmax(x): 45 | """Compute softmax values for each sets of scores in x.""" 46 | return np.exp(x) / np.sum(np.exp(x), axis=0) 47 | 48 | def sample_candidates(candidates, n=1): 49 | n = min(n, len(candidates)) 50 | weights = softmax([weight for value, weight in candidates]) 51 | values = [value for value, weight in candidates] 52 | samples = np.random.choice(range(len(values)), n, replace=False, p=weights) 53 | return [values[i] for i in samples] 54 | -------------------------------------------------------------------------------- /cocoa/core/scenario_db.py: -------------------------------------------------------------------------------- 1 | class Scenario(object): 2 | ''' 3 | A scenario represents a situation to be played out where each agent has a KB. 4 | ''' 5 | def __init__(self, uuid, attributes, kbs): 6 | self.uuid = uuid 7 | self.attributes = attributes 8 | self.kbs = kbs 9 | 10 | @staticmethod 11 | def from_dict(schema, raw): 12 | raise NotImplementedError 13 | 14 | def to_dict(self): 15 | return {'uuid': self.uuid, 16 | 'attributes': [attr.to_json() for attr in self.attributes], 17 | 'kbs': [kb.to_dict() for kb in self.kbs] 18 | } 19 | 20 | def get_kb(self, agent): 21 | return self.kbs[agent] 22 | 23 | 24 | class ScenarioDB(object): 25 | ''' 26 | Consists a list of scenarios (specifies the pair of KBs). 27 | ''' 28 | def __init__(self, scenarios_list): 29 | self.scenarios_list = scenarios_list # Keep things in order 30 | self.scenarios_map = {} # Map from uuid to scenario 31 | self.selected_scenarios = set() 32 | for scenario in scenarios_list: 33 | self.scenarios_map[scenario.uuid] = scenario 34 | self.size = len(self.scenarios_map) 35 | 36 | def get(self, uuid): 37 | return self.scenarios_map[uuid] 38 | 39 | def select_random(self, exclude_seen=True): 40 | scenarios = set(self.scenarios_map.keys()) 41 | 42 | if exclude_seen: 43 | scenarios = scenarios - self.selected_scenarios 44 | if len(scenarios) == 0: 45 | scenarios = set(self.scenarios_map.keys()) 46 | self.selected_scenarios = set() 47 | uuid = np.random.choice(list(scenarios)) 48 | 49 | return self.scenarios_map[uuid] 50 | 51 | @staticmethod 52 | def from_dict(schema, raw, scenario_class): 53 | return ScenarioDB([scenario_class.from_dict(schema, s) for s in raw]) 54 | 55 | def to_dict(self): 56 | return [s.to_dict() for s in self.scenarios_list] 57 | -------------------------------------------------------------------------------- /cocoa/core/schema.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A schema specifies information about a domain (types, entities, relations). 3 | ''' 4 | 5 | import json 6 | from itertools import izip 7 | 8 | 9 | class Attribute(object): 10 | def __init__(self, name, value_type, unique=False, multivalued=False, entity=True): 11 | self.name = name 12 | self.value_type = value_type 13 | self.unique = unique 14 | self.multivalued = multivalued 15 | # Whether the value of this attribute is an entity 16 | self.entity = entity 17 | 18 | @staticmethod 19 | def from_json(raw): 20 | return Attribute(raw['name'], raw['value_type'], raw.get('unique', False), raw.get('multivalued', False), raw.get('entity', True)) 21 | 22 | def to_json(self): 23 | return {'name': self.name, 'value_type': self.value_type, 'unique': self.unique, 'multivalued': self.multivalued, 'entity': self.entity} 24 | 25 | 26 | class Schema(object): 27 | ''' 28 | A schema contains information about possible entities and relations. 29 | ''' 30 | def __init__(self, path, domain=None): 31 | raw = json.load(open(path)) 32 | # Mapping from type (e.g., hobby) to list of values (e.g., hiking) 33 | values = raw['values'] 34 | # List of attributes (e.g., place_of_birth) 35 | attributes = [Attribute.from_json(a) for a in raw['attributes']] 36 | self.attr_names = [attr.name for attr in attributes] 37 | 38 | self.values = values 39 | self.attributes = attributes 40 | self.domain = domain 41 | 42 | def get_attributes(self): 43 | ''' 44 | Return a dict {name: value_type} of all attributes. 45 | ''' 46 | return {attr.name: attr.value_type for attr in self.attributes} 47 | 48 | def get_ordered_attribute_subset(self, attribute_subset): 49 | """ 50 | Order a subset of this schema's attributes using the original order of attributes in the schema. 51 | attribute_subset: A list containing the names of the attributes present in the subset 52 | :return The same list, preserving the original order of attributes in this schema 53 | """ 54 | 55 | subset_ordered = sorted([(attr, self.attributes.index(attr)) for attr in attribute_subset], key=lambda x: x[1]) 56 | 57 | return [x[0] for x in subset_ordered] 58 | 59 | def get_ordered_item(self, item): 60 | ''' 61 | Order attrs in item according to get_ordered_attribute_subset and return a list. 62 | ''' 63 | ordered_item = [] 64 | for name in self.attr_names: 65 | try: 66 | ordered_item.append((name, item[name])) 67 | except KeyError: 68 | continue 69 | return ordered_item 70 | -------------------------------------------------------------------------------- /cocoa/core/tokenizer.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | nltk.download('punkt') 3 | from nltk.tokenize import word_tokenize 4 | from nltk.tokenize.moses import MosesDetokenizer 5 | 6 | detokenizer = MosesDetokenizer() 7 | 8 | def detokenize(tokens): 9 | return detokenizer.detokenize(tokens, return_str=True) 10 | 11 | def tokenize(utterance, lowercase=True): 12 | if lowercase: 13 | utterance = utterance.lower() 14 | tokens = word_tokenize(utterance) 15 | return tokens 16 | -------------------------------------------------------------------------------- /cocoa/core/util.py: -------------------------------------------------------------------------------- 1 | import random 2 | import ujson as json 3 | import string 4 | import cPickle as pickle 5 | import numpy as np 6 | 7 | def random_multinomial(probs): 8 | target = random.random() 9 | i = 0 10 | accum = 0 11 | while True: 12 | accum += probs[i] 13 | if accum >= target: 14 | return i 15 | i += 1 16 | 17 | def generate_uuid(prefix): 18 | return prefix + '_' + ''.join([random.choice(string.digits + string.letters) for _ in range(16)]) 19 | 20 | def read_json(path): 21 | return json.load(open(path)) 22 | 23 | def write_json(raw, path): 24 | with open(path, 'w') as out: 25 | print >>out, json.dumps(raw) 26 | 27 | def read_pickle(path): 28 | with open(path, 'rb') as fin: 29 | return pickle.load(fin) 30 | 31 | def write_pickle(obj, path): 32 | with open(path, 'wb') as fout: 33 | pickle.dump(obj, fout) 34 | 35 | def normalize(a): 36 | ma = np.max(a) 37 | mi = np.min(a) 38 | assert ma > mi 39 | a = (a - mi) / (ma - mi) 40 | return a 41 | -------------------------------------------------------------------------------- /cocoa/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/io/__init__.py -------------------------------------------------------------------------------- /cocoa/io/utils.py: -------------------------------------------------------------------------------- 1 | """Basic IO utils. 2 | """ 3 | 4 | import os 5 | import ujson as json 6 | import cPickle as pickle 7 | 8 | def create_path(path): 9 | dirname = os.path.dirname(path) 10 | if not os.path.isdir(dirname): 11 | os.makedirs(dirname) 12 | 13 | def read_json(path): 14 | try: 15 | return json.load(open(path)) 16 | except: 17 | raise Exception('Error reading JSON from %s' % path) 18 | 19 | def write_json(raw, path, ensure_path=False): 20 | if ensure_path: 21 | create_path(path) 22 | with open(path, 'w') as out: 23 | print >>out, json.dumps(raw) 24 | 25 | def read_pickle(path): 26 | with open(path, 'rb') as fin: 27 | return pickle.load(fin) 28 | 29 | def write_pickle(obj, path, ensure_path=False): 30 | if ensure_path: 31 | create_path(path) 32 | with open(path, 'wb') as fout: 33 | pickle.dump(obj, fout) 34 | 35 | -------------------------------------------------------------------------------- /cocoa/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/lib/__init__.py -------------------------------------------------------------------------------- /cocoa/lib/bleu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copied from https://github.com/anoopsarkar/nlp-class-hw/blob/master/reranker/bleu.py 3 | 4 | ''' 5 | 6 | import math 7 | from collections import Counter 8 | 9 | 10 | # Collect BLEU-relevant statistics for a single sentence/reference pair. 11 | # Return value is a generator yielding: 12 | # (c, r, numerator1, denominator1, ... numerator4, denominator4) 13 | # Summing the columns across calls to this function on an entire corpus will 14 | # produce a vector of statistics that can be used to compute BLEU (below) 15 | def bleu_stats(sentence, reference): 16 | yield len(sentence) 17 | yield len(reference) 18 | for n in xrange(1,5): 19 | s_ngrams = Counter([tuple(sentence[i:i+n]) for i in xrange(len(sentence)+1-n)]) 20 | r_ngrams = Counter([tuple(reference[i:i+n]) for i in xrange(len(reference)+1-n)]) 21 | yield max([sum((s_ngrams & r_ngrams).values()), 0]) 22 | yield max([len(sentence)+1-n, 0]) 23 | 24 | 25 | # Compute BLEU from collected statistics obtained by call(s) to bleu_stats 26 | def bleu(stats): 27 | if len(filter(lambda x: x==0, stats)) > 0: 28 | return 0 29 | (c, r) = stats[:2] 30 | bleu_prec = sum([math.log(float(x)/y) for x,y in zip(stats[2::2],stats[3::2])]) 31 | return math.exp(min([0, 1-float(r)/c]) + 0.25 * bleu_prec) 32 | 33 | 34 | # A modification of BLEU that returns a positive value even when some 35 | # higher-order precisions are zero. From Liang et al. 2006 (Footnote 5): 36 | # http://aclweb.org/anthology-new/P/P06/P06-1096.pdf 37 | def smoothed_bleu(stats): 38 | return sum([bleu(stats[:2+2*i])/math.pow(2,4-i+1) for i in xrange(1,5)]) 39 | 40 | 41 | # Returns either smoothed or raw BLEU 42 | def compute_bleu(candidate, reference, smoothed=True): 43 | stats = [0 for i in xrange(10)] 44 | stats = [sum(scores) for scores in zip(stats, bleu_stats(candidate,reference))] 45 | if smoothed: 46 | return smoothed_bleu(stats) 47 | else: 48 | return bleu(stats) 49 | 50 | 51 | def test(): 52 | candidate = 'a b c' 53 | candidate = candidate.split() 54 | reference = 'a b c' 55 | reference = reference.split() 56 | 57 | print compute_bleu(candidate, reference) 58 | 59 | if __name__ == "__main__": 60 | test() 61 | -------------------------------------------------------------------------------- /cocoa/lib/logstats.py: -------------------------------------------------------------------------------- 1 | import json 2 | from cocoa.core.util import read_json 3 | 4 | # Global statistics that we can output to monitor the run. 5 | 6 | stats_path = None 7 | STATS = {} 8 | 9 | def init(path, verbose=False): 10 | global stats_path, STATS 11 | stats_path = path 12 | try: 13 | STATS = read_json(stats_path) 14 | if verbose: 15 | print("Stats file loaded from {}".format(stats_path)) 16 | except Exception: 17 | STATS = {} 18 | if verbose: 19 | print("New stats file created, will be stored in {}".format(stats_path)) 20 | 21 | def add(*args): 22 | # Example: add_stats('data', 'num_examples', 3) 23 | s = STATS 24 | prefix = args[:-2] 25 | for k in prefix: 26 | if k not in s: 27 | s[k] = {} 28 | s = s[k] 29 | s[args[-2]] = args[-1] 30 | flush() 31 | 32 | def add_args(key, args): 33 | add(key, dict((arg, getattr(args, arg)) for arg in vars(args))) 34 | 35 | def update(stats): 36 | for k in stats: 37 | STATS[k] = stats[k] 38 | flush() 39 | 40 | def flush(): 41 | if stats_path: 42 | out = open(stats_path, 'w') 43 | print >>out, json.dumps(STATS) 44 | out.close() 45 | 46 | ############################################################ 47 | 48 | # summary: {'mean': ...} 49 | # summary_map: {key: summary} 50 | 51 | def summary_to_str(s): 52 | return '%g / %g / %g (%g)' % (s['min'], s['mean'], s['max'], s['count']) 53 | 54 | def summary_map_to_str(m): 55 | return ' '.join('%s=%g' % (k, s['mean'] if isinstance(s, dict) else s) for k, s in sorted(m.items())) 56 | 57 | def update_summary_map(m1, m2): 58 | for k, s in m2.items(): 59 | if k not in m1: 60 | m1[k] = {} 61 | update_summary(m1[k], s) 62 | 63 | def update_summary(s1, s2): 64 | if isinstance(s2, dict): 65 | s1['min'] = min(s1.get('min', s2['min']), s2['min']) 66 | s1['max'] = max(s1.get('max', s2['max']), s2['max']) 67 | s1['sum'] = s1.get('sum', 0) + s2['sum'] 68 | s1['count'] = s1.get('count', 0) + s2['count'] 69 | else: 70 | s1['min'] = min(s1.get('min', s2), s2) 71 | s1['max'] = max(s1.get('max', s2), s2) 72 | s1['sum'] = s1.get('sum', 0) + s2 73 | s1['count'] = s1.get('count', 0) + 1 74 | s1['mean'] = 1.0 * s1['sum'] / s1['count'] 75 | 76 | def dump_summary_map(m): 77 | for k, s in m.items(): 78 | print k, '=', summary_to_str(s) 79 | -------------------------------------------------------------------------------- /cocoa/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/model/__init__.py -------------------------------------------------------------------------------- /cocoa/model/dialogue_parser.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from model.parser import Parser 4 | from model.dialogue_state import DialogueState 5 | 6 | def parse_example(example, lexicon, templates=None): 7 | """Parse example and collect templates. 8 | """ 9 | kbs = example.scenario.kbs 10 | parsers = [Parser(agent, kbs[agent], lexicon) for agent in (0, 1)] 11 | states = [DialogueState(agent, kbs[agent]) for agent in (0, 1)] 12 | # Add init utterance 13 | parsed_utterances = [states[0].utterance[0], states[1].utterance[1]] 14 | for event in example.events: 15 | writing_agent = event.agent # Speaking agent 16 | reading_agent = 1 - writing_agent 17 | 18 | received_utterance = parsers[reading_agent].parse(event, states[reading_agent]) 19 | if received_utterance: 20 | sent_utterance = copy.deepcopy(received_utterance) 21 | if sent_utterance.tokens: 22 | sent_utterance.template = parsers[writing_agent].extract_template(sent_utterance.tokens, states[writing_agent]) 23 | 24 | if templates is not None: 25 | templates.add_template(sent_utterance, states[writing_agent]) 26 | received_utterance.agent = example.agents[writing_agent] 27 | parsed_utterances.append(received_utterance) 28 | 29 | # Update states 30 | states[reading_agent].update(writing_agent, received_utterance) 31 | states[writing_agent].update(writing_agent, sent_utterance) 32 | return parsed_utterances 33 | -------------------------------------------------------------------------------- /cocoa/model/dialogue_state.py: -------------------------------------------------------------------------------- 1 | from parser import LogicalForm as LF, Utterance 2 | 3 | class DialogueState(object): 4 | def __init__(self, agent, kb): 5 | self.agent = agent 6 | self.partner = 1 - agent 7 | self.kb = kb 8 | self.time = 0 9 | init_utterance = Utterance(logical_form=LF(''), template=['']) 10 | self.utterance = [init_utterance, init_utterance] 11 | self.done = set() 12 | 13 | @property 14 | def my_act(self): 15 | return self.utterance[self.agent].lf.intent 16 | 17 | @property 18 | def partner_act(self): 19 | return self.utterance[self.partner].lf.intent 20 | 21 | @property 22 | def partner_utterance(self): 23 | return self.utterance[self.partner] 24 | 25 | @property 26 | def partner_template(self): 27 | try: 28 | return self.utterance[self.partner].template 29 | except: 30 | return None 31 | 32 | def update(self, agent, utterance): 33 | if not utterance: 34 | return 35 | self.time += 1 36 | self.utterance[agent] = utterance 37 | if agent == self.agent: 38 | self.done.add(utterance.lf.intent) 39 | 40 | -------------------------------------------------------------------------------- /cocoa/model/manager.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.util import read_pickle, write_pickle 2 | from cocoa.model.counter import build_vocabulary, count_ngrams 3 | from cocoa.model.ngram import MLENgramModel 4 | from cocoa.model.util import entropy 5 | 6 | class Manager(object): 7 | def __init__(self, model, actions): 8 | self.model = model 9 | self.actions = actions 10 | 11 | @classmethod 12 | def from_train(cls, sequences, n=3): 13 | vocab = build_vocabulary(1, *sequences) 14 | counter = count_ngrams(n, vocab, sequences, pad_left=True, pad_right=False) 15 | model = MLENgramModel(counter) 16 | actions = vocab.keys() 17 | #print model.score('init-price', ('',)) 18 | #print model.ngrams.most_common(10) 19 | return cls(model, actions) 20 | 21 | def available_actions(self, state): 22 | actions = [a for a in self.actions if a != 'unknown'] 23 | return actions 24 | 25 | def most_likely_action(self, context, freqdist): 26 | best_action = max(freqdist, key=lambda x: x[1])[0] 27 | return best_action 28 | 29 | def min_entropy_action(self, context, freqdist): 30 | ent = [] 31 | for a, _ in freqdist: 32 | c = (context[-1], a) 33 | f = self.model.freqdist(c) 34 | e = entropy([x[1] for x in f], normalized=False) 35 | ent.append((a, e)) 36 | best_action = min(ent, key=lambda x: x[1])[0] 37 | return best_action 38 | 39 | def choose_action(self, state, context=None): 40 | if not context: 41 | context = (state.my_act, state.partner_act) 42 | freqdist = self.model.freqdist(context) 43 | actions = self.available_actions(state) 44 | freqdist = [x for x in freqdist if x[0] in actions] 45 | # TODO: backoff 46 | if len(freqdist) == 0: 47 | return None 48 | best_action = max(freqdist, key=lambda x: x[1])[0] 49 | print 'context:', context 50 | #print 'dist:', freqdist 51 | print 'available actions:', actions 52 | print 'action:', best_action 53 | return best_action 54 | 55 | def save(self, output): 56 | data = {'model': self.model, 'actions': self.actions} 57 | write_pickle(data, output) 58 | 59 | @classmethod 60 | def from_pickle(cls, path): 61 | data = read_pickle(path) 62 | return cls(data['model'], data['actions']) 63 | -------------------------------------------------------------------------------- /cocoa/model/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | EPS = 1e-12 4 | 5 | def safe_div(numerator, denominator): 6 | return numerator / (denominator + EPS) 7 | 8 | def entropy(p, normalized=True): 9 | p = np.array(p, dtype=np.float32) 10 | if not normalized: 11 | p /= np.sum(p) 12 | ent = -1. * np.sum(p * np.log(p)) 13 | return ent 14 | -------------------------------------------------------------------------------- /cocoa/neural/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/neural/__init__.py -------------------------------------------------------------------------------- /cocoa/neural/beam.py: -------------------------------------------------------------------------------- 1 | from onmt.translate.Beam import Beam 2 | 3 | 4 | class Scorer(object): 5 | """ 6 | Re-ranking score. 7 | """ 8 | def __init__(self, length_alpha): 9 | self.alpha = length_alpha 10 | 11 | def score(self, beam, logprobs): 12 | """ 13 | Additional term add to log probability 14 | See https://arxiv.org/pdf/1609.08144.pdf. 15 | """ 16 | l_term = (((5 + len(beam.next_ys)) ** self.alpha) / 17 | ((5 + 1) ** self.alpha)) 18 | return (logprobs / l_term) 19 | 20 | def update_global_state(self, beam): 21 | return 22 | 23 | -------------------------------------------------------------------------------- /cocoa/neural/evaluator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from itertools import count 4 | 5 | from onmt.Utils import use_gpu 6 | 7 | from utterance import UtteranceBuilder 8 | from symbols import markers 9 | 10 | 11 | class Evaluator(object): 12 | def __init__(self, model, mappings, generator, builder, gt_prefix=1): 13 | self.model = model 14 | self.gt_prefix = gt_prefix 15 | self.mappings = mappings 16 | self.generator = generator 17 | self.builder = builder 18 | 19 | def evaluate(self, opt, model_opt, data, split='test'): 20 | text_generator = self.generator 21 | 22 | # Statistics 23 | counter = count(1) 24 | pred_score_total, pred_words_total = 0, 0 25 | gold_score_total, gold_words_total = 0, 0 26 | 27 | data_iter = data.generator(split, shuffle=False) 28 | num_batches = data_iter.next() 29 | dec_state = None 30 | for batch in data_iter: 31 | if batch is None: 32 | dec_state = None 33 | continue 34 | elif not self.model.stateful: 35 | dec_state = None 36 | # TODO: this is not really stateful! 37 | enc_state = dec_state.hidden if dec_state is not None else None 38 | batch_data = text_generator.generate_batch(batch, 39 | gt_prefix=self.gt_prefix, enc_state=enc_state) 40 | utterances = self.builder.from_batch(batch_data) 41 | 42 | for i, response in enumerate(utterances): 43 | pred_score_total += response.pred_scores[0] 44 | pred_words_total += len(response.pred_sents[0]) 45 | gold_score_total += response.gold_score 46 | gold_words_total += len(response.gold_sent) 47 | 48 | if opt.verbose: 49 | counter = self.print_results(model_opt, batch, counter, utterances) 50 | 51 | def print_results(self, model_opt, batch, utterances): 52 | for i, response in enumerate(utterances): 53 | sent_number = next(counter) 54 | print("--------- {0}: {1} -----------".format(sent_number, title)) 55 | output = response.log(sent_number) 56 | os.write(1, output.encode('utf-8')) 57 | -------------------------------------------------------------------------------- /cocoa/neural/loss.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | 5 | from onmt.Loss import LossComputeBase 6 | from onmt.Utils import aeq 7 | 8 | from symbols import markers 9 | #from utterance import UtteranceBuilder 10 | 11 | class SimpleLossCompute(LossComputeBase): 12 | """ 13 | Simpler Loss Computation class - does not perform Truncated BPTT, 14 | removes label_smoothing, confidence-scores and sharding 15 | """ 16 | def __init__(self, generator, tgt_vocab): 17 | super(LossComputeBase, self).__init__() 18 | self.generator = generator 19 | self.padding_idx = tgt_vocab.to_ind(markers.PAD) 20 | weight = torch.ones(tgt_vocab.size) 21 | weight[self.padding_idx] = 0 22 | self.criterion = nn.NLLLoss(weight, size_average=False) 23 | 24 | def compute_loss(self, target, output): 25 | # generator: RNN outputs to vocab_size scores/logprobs 26 | # output: (seq_len, batch_size, rnn_size) 27 | scores = self.generator(self._bottle(output)) 28 | gtruth = target.contiguous().view(-1) 29 | loss = self.criterion(scores, gtruth) 30 | loss_data = loss.data.clone() 31 | stats = self._stats(loss_data, scores.data, target.view(-1).data) 32 | return loss, stats 33 | 34 | class ReinforceLossCompute(SimpleLossCompute): 35 | """Compute loss/reward for REINFORCE. 36 | """ 37 | def __init__(self, generator, tgt_vocab): 38 | super(LossComputeBase, self).__init__() 39 | self.generator = generator 40 | self.padding_idx = tgt_vocab.to_ind(markers.PAD) 41 | weight = torch.ones(tgt_vocab.size) 42 | weight[self.padding_idx] = 0 43 | self.criterion = nn.NLLLoss(weight, size_average=False, reduce=False) 44 | #self.builder = UtteranceBuilder(tgt_vocab) 45 | 46 | def compute_loss(self, target, output): 47 | # output: (seq_len, batch_size, rnn_size) 48 | # reward: (batch_size,) 49 | batch_size = output.size(1) 50 | #aeq(batch_size, reward.size(0)) 51 | scores = self.generator(self._bottle(output)) 52 | gtruth = target.contiguous().view(-1) 53 | loss = self.criterion(scores, gtruth).view(-1, batch_size) # (seq_len, batch_size) 54 | return loss, None 55 | -------------------------------------------------------------------------------- /cocoa/neural/rl_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | import random 5 | import json 6 | import numpy as np 7 | import copy 8 | import sys 9 | 10 | import torch 11 | import torch.nn as nn 12 | from torch.autograd import Variable 13 | 14 | from onmt.Trainer import Statistics as BaseStatistics 15 | 16 | from core.controller import Controller 17 | from utterance import UtteranceBuilder 18 | from trainer import Trainer 19 | 20 | 21 | class Statistics(BaseStatistics): 22 | def __init__(self, episode=0, loss=0, reward=0): 23 | self.episode = episode 24 | self.loss = loss 25 | self.reward = reward 26 | self.total_rewards = [] 27 | 28 | def update(self, stat): 29 | self.loss += stat.loss 30 | self.reward += stat.reward 31 | self.episode += 1 32 | 33 | def mean_loss(self): 34 | return self.loss / self.episode 35 | 36 | def mean_reward(self): 37 | return self.reward / self.episode 38 | 39 | def output(self, episode): 40 | print ("Episode %2d; loss: %6.2f; reward: %6.2f;" % 41 | (episode, 42 | self.mean_loss(), 43 | self.mean_reward())) 44 | sys.stdout.flush() 45 | 46 | # TODO: refactor 47 | class RLTrainer(Trainer): 48 | pass 49 | -------------------------------------------------------------------------------- /cocoa/neural/symbols.py: -------------------------------------------------------------------------------- 1 | #from collections import namedtuple 2 | # 3 | #SpecialSymbols = namedtuple('SpecialSymbols', 4 | # ['EOS', 'END_SUM', 'GO_S', 'GO_B', 'OFFER', 'QUIT', 'ACCEPT', 'REJECT', 'PAD', 'C_car', 'C_phone', 'C_housing', 'C_electronics', 'C_furniture', 'C_bike']) 5 | # 6 | #markers = SpecialSymbols(EOS='', END_SUM='', GO_S='', GO_B='', OFFER='', QUIT='', ACCEPT='', REJECT='', PAD='', C_car='', C_phone='', C_housing='', C_electronics='', C_furniture='', C_bike='') 7 | # 8 | #category_markers = [markers.C_car, markers.C_phone, markers.C_housing, markers.C_electronics, markers.C_furniture, markers.C_bike] 9 | # 10 | #action_markers = [markers.ACCEPT, markers.REJECT, markers.OFFER, markers.QUIT] 11 | # 12 | #sequence_markers = [markers.EOS, markers.GO_S, markers.GO_B, markers.PAD] 13 | 14 | class Marker(object): 15 | EOS = '' 16 | PAD = '' 17 | GO = '' 18 | 19 | markers = Marker 20 | -------------------------------------------------------------------------------- /cocoa/neural/vocab_builder.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.vocab import Vocabulary 2 | from cocoa.core.entity import is_entity 3 | 4 | def get_entity_form(entity, form): 5 | assert len(entity) == 2 6 | if form == 'surface': 7 | return entity.surface 8 | elif form == 'type': 9 | return '<%s>' % entity.canonical.type 10 | elif form == 'canonical': 11 | return entity._replace(surface='') 12 | else: 13 | raise ValueError('Unknown entity form %s' % form) 14 | 15 | def build_utterance_vocab(dialogues, special_symbols=[], entity_forms=[]): 16 | vocab = Vocabulary(offset=0, unk=True) 17 | 18 | def _add_entity(entity): 19 | for entity_form in entity_forms: 20 | word = get_entity_form(entity, entity_form) 21 | vocab.add_word(word) 22 | 23 | # Add words 24 | for dialogue in dialogues: 25 | assert dialogue.is_int is False 26 | for turn in dialogue.token_turns: 27 | for token in turn: 28 | if is_entity(token): 29 | _add_entity(token) 30 | else: 31 | vocab.add_word(token) 32 | 33 | # Add special symbols 34 | vocab.add_words(special_symbols, special=True) 35 | vocab.finish(size_threshold=10000) 36 | print 'Utterance vocab size:', vocab.size 37 | return vocab 38 | -------------------------------------------------------------------------------- /cocoa/sessions/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | 3 | -------------------------------------------------------------------------------- /cocoa/sessions/human_session.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | from session import Session 3 | 4 | 5 | class HumanSession(Session): 6 | """ 7 | HumanSession represents a single human agent in a dialogue. This class can be used to enqueue messages sent by the 8 | agent and retrieve messages received from the other agent in the dialogue. 9 | """ 10 | def __init__(self, agent): 11 | super(HumanSession, self).__init__(agent) 12 | self.outbox = [] 13 | self.inbox = [] 14 | self.cached_messages = [] 15 | # todo implement caching to store message history 16 | 17 | def send(self): 18 | if len(self.outbox) > 0: 19 | return self.outbox.pop(0) 20 | return None 21 | 22 | def poll_inbox(self): 23 | if len(self.inbox) > 0: 24 | return self.inbox.pop(0) 25 | return None 26 | 27 | def receive(self, event): 28 | self.inbox.append(event) 29 | 30 | def enqueue(self, event): 31 | self.outbox.append(event) 32 | 33 | 34 | -------------------------------------------------------------------------------- /cocoa/sessions/session.py: -------------------------------------------------------------------------------- 1 | import time 2 | import string 3 | from cocoa.core.event import Event 4 | 5 | 6 | class Session(object): 7 | """An abstarct class for instantiating an agent. 8 | 9 | A session maintains the dialogue state and receive/send dialogue events. 10 | 11 | """ 12 | def __init__(self, agent, config=None): 13 | """Construct a session for an agent. 14 | 15 | Args: 16 | agent (int): agent id (0 or 1). 17 | 18 | """ 19 | self.agent = agent # 0 or 1 (which player are we?) 20 | self.partner = 1 - agent 21 | self.config = config 22 | 23 | def receive(self, event): 24 | """Parse the received event and update the dialogue state. 25 | 26 | Args: 27 | event (Event) 28 | 29 | """ 30 | raise NotImplementedError 31 | 32 | def send(self): 33 | """Send an event. 34 | 35 | Returns: 36 | event (Event) 37 | 38 | """ 39 | raise NotImplementedError 40 | 41 | @staticmethod 42 | def remove_nonprintable(raw_tokens): 43 | tokens = [] 44 | for token in raw_tokens: 45 | all_valid_characters = True 46 | for char in token: 47 | if not char in string.printable: 48 | all_valid_characters = False 49 | if all_valid_characters: 50 | tokens.append(token) 51 | return tokens 52 | 53 | @staticmethod 54 | def timestamp(): 55 | return str(time.time()) 56 | 57 | def message(self, text, metadata=None): 58 | return Event.MessageEvent(self.agent, text, time=self.timestamp(), metadata=metadata) 59 | 60 | def wait(self): 61 | return None 62 | -------------------------------------------------------------------------------- /cocoa/systems/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/systems/__init__.py -------------------------------------------------------------------------------- /cocoa/systems/human_system.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | from system import System 3 | from cocoa.sessions.human_session import HumanSession 4 | 5 | 6 | class HumanSystem(System): 7 | def __init__(self): 8 | super(HumanSystem, self).__init__() 9 | 10 | @classmethod 11 | def name(cls): 12 | return 'human' 13 | 14 | def new_session(self, agent, kb): 15 | return HumanSession(agent) 16 | -------------------------------------------------------------------------------- /cocoa/systems/rulebased_system.py: -------------------------------------------------------------------------------- 1 | from system import System 2 | from cocoa.sessions.timed_session import TimedSessionWrapper 3 | 4 | class RulebasedSystem(System): 5 | def __init__(self, lexicon, generator, manager, timed_session): 6 | super(RulebasedSystem, self).__init__() 7 | self.timed_session = timed_session 8 | self.lexicon = lexicon 9 | self.generator = generator 10 | self.manager = manager 11 | 12 | @classmethod 13 | def name(cls): 14 | return 'rulebased' 15 | 16 | def new_session(self, agent, kb, config=None): 17 | session = self._new_session(agent, kb, config) 18 | if self.timed_session: 19 | session = TimedSessionWrapper(session) 20 | return session 21 | 22 | def _new_session(self, agent, kb, config=None): 23 | raise NotImplementedError 24 | -------------------------------------------------------------------------------- /cocoa/systems/system.py: -------------------------------------------------------------------------------- 1 | class System(object): 2 | """An abstract class for building a Session object. 3 | """ 4 | def new_session(self, agent, kb): 5 | raise NotImplementedError 6 | 7 | @classmethod 8 | def name(cls): 9 | return 'base' 10 | -------------------------------------------------------------------------------- /cocoa/turk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/turk/__init__.py -------------------------------------------------------------------------------- /cocoa/turk/templates/compare_question.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |

Context:

6 |

{context}

7 |
8 |
9 | 10 |
11 |

Please select the more appropriate response given the above context. If they are equally good or bad, select "Both are appropriate" or "Both are NOT appropriate".

12 | 13 | 14 | 15 | 16 |
17 | 18 |
19 |
20 | -------------------------------------------------------------------------------- /cocoa/turk/templates/frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 21 | 22 | 23 | 24 |
25 |

{title}

26 |
27 |
Instructions
28 |
{instructions}
29 |
30 | 31 |
32 | 33 | 34 | {questions} 35 | 36 |
37 | 38 | 39 |
40 | 41 | 42 |
43 | 44 | 45 | 46 | {script} 47 | 48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /cocoa/turk/templates/multi_question.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |

Context:

6 |

{context}

7 |
8 |
9 | 10 |

{response}

11 | 12 |
13 |

Please select all problems presented in the response given the above context.

14 |
15 | 16 |
17 |
18 | 19 |
20 |
21 | 22 |
23 |
24 | 25 |
26 |
27 | 28 |
29 |
30 | 31 |
32 |

Please rate how appropriate the response is given the conversation above.

33 | 34 | 35 | 36 | 37 | 38 |
39 | 40 |
41 |
42 | -------------------------------------------------------------------------------- /cocoa/turk/templates/question.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |

Context:

6 |

{context}

7 |
8 |
9 | 10 |

{response}

11 | 12 |
13 |

Please rate how likely the response continues from the conversation above.

14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 |
22 |
23 | -------------------------------------------------------------------------------- /cocoa/turk/utils.py: -------------------------------------------------------------------------------- 1 | from boto.mturk.connection import MTurkConnection 2 | import boto.mturk.qualification as mtqual 3 | 4 | def get_mturk_connection(config, debug=False): 5 | """Connect to MTurk account. 6 | 7 | Args: 8 | config (dict): {'access_key': str, 'secret_key': str} 9 | debug (bool): if true, use sandbox 10 | 11 | Returns: 12 | MTrukConnection 13 | 14 | """ 15 | if debug: 16 | host = 'mechanicalturk.sandbox.amazonaws.com' 17 | else: 18 | host = 'mechanicalturk.amazonaws.com' 19 | 20 | mturk_connection = MTurkConnection(aws_access_key_id=config["access_key"], 21 | aws_secret_access_key=config["secret_key"], 22 | host=host) 23 | return mturk_connection 24 | 25 | def default_qualifications(): 26 | quals = mtqual.Qualifications() 27 | quals.add(mtqual.LocaleRequirement("EqualTo", "US")) 28 | quals.add(mtqual.PercentAssignmentsApprovedRequirement("GreaterThan", 95)) 29 | quals.add(mtqual.NumberHitsApprovedRequirement("GreaterThan", 10)) 30 | return quals 31 | 32 | def xml_safe(string): 33 | string = string.replace("&", "&") 34 | string = string.replace("<", "<") 35 | string = string.replace(">", ">") 36 | string = string.replace("\"", "\\\"") 37 | return string 38 | -------------------------------------------------------------------------------- /cocoa/web/README.md: -------------------------------------------------------------------------------- 1 | ### Main classes/modules 2 | `cocoa.web` provides basic backend functions follows the structure of a Flask application. 3 | - **Backend** (`main/backend.py`): Manage the database that records user information and the chat log. 4 | - **Routing** (`views/`): Handle requests, render templates, and interact with the backend. 5 | 6 | To build you own chat interface, add HTML templates (based on [Jinja2](http://jinja.pocoo.org/docs/2.9/)) in `task/templates`. 7 | -------------------------------------------------------------------------------- /cocoa/web/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | 3 | from flask import Flask 4 | from flask import g 5 | 6 | from flask_socketio import SocketIO 7 | 8 | 9 | # from multiprocessing import Process, Queue 10 | socketio = SocketIO() 11 | controller_process = None 12 | 13 | 14 | def close_connection(exception): 15 | backend = getattr(g, '_backend', None) 16 | if backend is not None: 17 | backend.close() 18 | 19 | 20 | # def dump_events_to_json(): 21 | def create_app(debug=False, templates_dir='templates'): 22 | """Create an application.""" 23 | global controller_process 24 | 25 | app = Flask(__name__, template_folder=templates_dir) 26 | app.debug = debug 27 | app.config['SECRET_KEY'] = 'gjr39dkjn344_!67#' 28 | app.config['PROPAGATE_EXCEPTIONS'] = True 29 | 30 | from .main import main as main_blueprint 31 | app.register_blueprint(main_blueprint) 32 | 33 | # controller_queue = Queue() 34 | # app.config['controller_queue'] = controller_queue 35 | # controller_process = Process(target=run_controllers, args=(controller_queue,)) 36 | # controller_process.start() 37 | app.teardown_appcontext_funcs = [close_connection] 38 | 39 | socketio.init_app(app) 40 | return app 41 | 42 | -------------------------------------------------------------------------------- /cocoa/web/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/web/main/__init__.py -------------------------------------------------------------------------------- /cocoa/web/main/logger.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | 3 | import logging 4 | from logging import Logger 5 | 6 | logger = None 7 | 8 | 9 | class WebLogger(object): 10 | @classmethod 11 | def initialize(cls, log_file): 12 | logging.basicConfig(filename=log_file, filemode='w', level=logging.DEBUG) 13 | global logger 14 | logger = logging.getLogger("web") 15 | 16 | @classmethod 17 | def get_logger(cls): 18 | global logger 19 | if not logger: 20 | logging.basicConfig(filename="web.log", filemode='w') 21 | logger = logging.getLogger("web") 22 | return logger 23 | -------------------------------------------------------------------------------- /cocoa/web/main/states.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | 3 | 4 | class FinishedState(object): 5 | def __init__(self, message, num_seconds, mturk_code=None): 6 | self.message = message 7 | self.num_seconds = num_seconds 8 | self.mturk_code = mturk_code 9 | 10 | 11 | class WaitingState(object): 12 | def __init__(self, message, num_seconds): 13 | if message and len(message) > 0: 14 | self.message = message 15 | else: 16 | self.message = "Please wait while we try to find someone to pair you up with.." 17 | self.num_seconds = num_seconds 18 | 19 | 20 | class SurveyState(object): 21 | def __init__(self, message, agent_idx, scenario_id, kb, partner_kb, attributes, result): 22 | self.message = message 23 | self.agent_idx = agent_idx 24 | self.kb = kb 25 | self.partner_kb = partner_kb 26 | self.attributes = attributes 27 | self.result = result 28 | self.scenario_id = scenario_id 29 | 30 | 31 | class UserChatState(object): 32 | def __init__(self, agent_index, scenario_id, chat_id, kb, attributes, num_seconds, partner_kb=None): 33 | self.agent_index = agent_index 34 | self.scenario_id = scenario_id 35 | self.chat_id = chat_id 36 | self.kb = kb 37 | self.attributes = attributes 38 | self.num_seconds = num_seconds 39 | self.partner_kb = partner_kb 40 | 41 | def to_dict(self): 42 | return {"agent_index": self.agent_index, 43 | "scenario_id": self.scenario_id, 44 | "chat_id": self.chat_id, 45 | "kb": self.kb.to_dict(), 46 | "num_seconds": self.num_seconds, 47 | "partner_kb": self.partner_kb.to_dict()} -------------------------------------------------------------------------------- /cocoa/web/main/utils.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | import time 3 | import datetime 4 | #import src.config as config 5 | 6 | 7 | class Status(object): 8 | Waiting = "waiting" 9 | Chat = "chat" 10 | Finished = "finished" 11 | Survey = "survey" 12 | Redirected = "redirected" 13 | Incomplete = "incomplete" 14 | Reporting = "reporting" 15 | 16 | 17 | class UnexpectedStatusException(Exception): 18 | def __init__(self, found_status, expected_status): 19 | self.expected_status = expected_status 20 | self.found_status = found_status 21 | 22 | 23 | class ConnectionTimeoutException(Exception): 24 | pass 25 | 26 | 27 | class InvalidStatusException(Exception): 28 | pass 29 | 30 | 31 | class StatusTimeoutException(Exception): 32 | pass 33 | 34 | 35 | class NoSuchUserException(Exception): 36 | pass 37 | 38 | 39 | class Messages(object): 40 | ChatExpired = 'You ran out of time!' 41 | PartnerConnectionTimeout = "Your partner's connection has timed out! Waiting for a new chat..." 42 | ConnectionTimeout = "Your connection has timed out. Please reenter this website using the original URL provided to " \ 43 | "you to start a new chat." 44 | YouLeftRoom = 'You skipped the chat. ' 45 | PartnerLeftRoom = 'Your partner has left the chat!' 46 | WaitingTimeExpired = "Sorry, no other users appear to be active at the moment. Please come back later!" 47 | ChatCompleted = "Great, you've completed the chat!" 48 | ChatIncomplete = ConnectionTimeout 49 | HITCompletionWarning = "Please note that you will only get credit for this HIT if you made a good attempt to complete the chat." 50 | Waiting = 'Waiting for a new chat...' 51 | 52 | 53 | def current_timestamp_in_seconds(): 54 | return int(time.mktime(datetime.datetime.now().timetuple())) 55 | 56 | 57 | class User(object): 58 | def __init__(self, row): 59 | self.name = row[0] 60 | self.status = row[1] 61 | self.status_timestamp = row[2] 62 | self.connected_status = row[3] 63 | self.connected_timestamp = row[4] 64 | self.message = row[5] 65 | self.partner_type = row[6] 66 | self.partner_id = row[7] 67 | self.scenario_id = row[8] 68 | self.agent_index = row[9] 69 | self.selected_index = row[10] 70 | self.chat_id = row[11] 71 | -------------------------------------------------------------------------------- /cocoa/web/views/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/web/views/__init__.py -------------------------------------------------------------------------------- /cocoa/web/views/utils.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from flask import request, g 4 | 5 | def generate_userid(prefix="U_"): 6 | return prefix + uuid.uuid4().hex 7 | 8 | def userid(): 9 | return request.args.get('uid') 10 | 11 | def format_message(message, status_message): 12 | """Format the message string. 13 | 14 | Args: 15 | message (str) 16 | status_message (bool): Whether the message is an action (e.g. select) or an utterance 17 | 18 | """ 19 | timestamp = datetime.now().strftime(u'%x %X') 20 | left_delim = u"<" if status_message else u"" 21 | right_delim = u">" if status_message else u"" 22 | return u"[{}] {}{}{}".format(timestamp, left_delim, message, right_delim) 23 | -------------------------------------------------------------------------------- /craigslistbargain/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/analysis/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/analysis/analyze_strategy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import utils 4 | 5 | MAX_MARGIN = 2.4 6 | MIN_MARGIN = -2.0 7 | 8 | class StrategyAnalyzer(object): 9 | @classmethod 10 | def valid_margin(cls, margin): 11 | return margin <= MAX_MARGIN and margin >= MIN_MARGIN 12 | 13 | @classmethod 14 | def get_margin(cls, ex, price, agent, role, remove_outlier=True): 15 | agent_target = ex.scenario.kbs[agent].facts["personal"]["Target"] 16 | partner_target = ex.scenario.kbs[1 - agent].facts["personal"]["Target"] 17 | midpoint = (agent_target + partner_target) / 2. 18 | norm_factor = np.abs(midpoint - agent_target) 19 | if role == utils.SELLER: 20 | margin = (price - midpoint) / norm_factor 21 | else: 22 | margin = (midpoint - price) / norm_factor 23 | if remove_outlier and not cls.valid_margin(margin): 24 | return None 25 | return margin 26 | 27 | @classmethod 28 | def has_deal(cls, ex): 29 | if ex.outcome is None or ex.outcome['reward'] == 0 or ex.outcome.get('offer', None) is None or ex.outcome['offer']['price'] is None: 30 | return False 31 | return True 32 | 33 | -------------------------------------------------------------------------------- /craigslistbargain/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/core/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/core/controller.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.controller import Controller as BaseController 2 | 3 | class Controller(BaseController): 4 | def __init__(self, scenario, sessions, chat_id=None, session_names=(None, None)): 5 | super(Controller, self).__init__(scenario, sessions, chat_id, session_names=session_names) 6 | # self.prices = [None, None] 7 | self.offers = [None, None] 8 | # self.sides = [None, None] 9 | self.outcomes = [None, None] 10 | self.quit = False 11 | 12 | def event_callback(self, event): 13 | if event.action == 'offer': 14 | self.offers[event.agent] = event.data 15 | elif event.action == 'accept': 16 | self.outcomes[event.agent] = True 17 | elif event.action == 'reject': 18 | self.outcomes[event.agent] = False 19 | elif event.action == 'quit': 20 | self.quit = True 21 | self.outcomes[event.agent] = False 22 | 23 | def get_outcome(self): 24 | offer = None 25 | reward = 0 26 | if self.offers[0] is not None and self.outcomes[1] is True: 27 | reward = 1 28 | offer = self.offers[0] 29 | elif self.offers[1] is not None and self.outcomes[0] is True: 30 | reward = 1 31 | offer = self.offers[1] 32 | else: 33 | if (self.offers[0] is not None or self.offers[1] is not None) and False in self.outcomes: 34 | reward = 0 35 | offer = self.offers[0] if self.offers[1] is None else self.offers[1] 36 | 37 | # possible outcomes: 38 | # reward is 1 and offer is not null: complete dialogue 39 | # reward is 0 and offer is not null: incomplete dialogue (disagreement): offer was made and not accepted 40 | # reweard is 0 and offer is null: incomplete dialogue: no offer was made 41 | return {'reward': reward, 'offer': offer} 42 | 43 | def game_over(self): 44 | return not self.inactive() and \ 45 | ((self.offers[0] is not None and self.outcomes[1] is not None) or 46 | (self.offers[1] is not None and self.outcomes[0] is not None) or 47 | self.quit) 48 | 49 | def get_result(self, agent_idx): 50 | # todo fix this if we ever want to display results in the survey 51 | return None 52 | 53 | def complete(self): 54 | return (self.offers[0] is not None and self.outcomes[1] is True) or (self.offers[1] is not None and self.outcomes[0] is True) 55 | 56 | def get_winner(self): 57 | # todo fix this if we ever want to calculate who the winner is 58 | return -1 59 | -------------------------------------------------------------------------------- /craigslistbargain/core/event.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.event import Event as BaseEvent 2 | 3 | class Event(BaseEvent): 4 | @staticmethod 5 | def OfferEvent(agent, data, time=None, metadata=None): 6 | return Event(agent, time, 'offer', data, metadata=metadata) 7 | 8 | @staticmethod 9 | def QuitEvent(agent, time=None, metadata=None): 10 | return Event(agent, time, 'quit', None, metadata=metadata) 11 | 12 | @staticmethod 13 | def AcceptEvent(agent, time=None, metadata=None): 14 | return Event(agent, time, 'accept', None, metadata=metadata) 15 | 16 | @staticmethod 17 | def RejectEvent(agent, time=None, metadata=None): 18 | return Event(agent, time, 'reject', None, metadata=metadata) 19 | -------------------------------------------------------------------------------- /craigslistbargain/core/kb.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.kb import KB as BaseKB 2 | 3 | class KB(BaseKB): 4 | def __init__(self, attributes, facts): 5 | super(KB, self).__init__(attributes) 6 | self.facts = facts 7 | 8 | @property 9 | def listing_price(self): 10 | return self.facts['item']['Price'] 11 | 12 | @property 13 | def target(self): 14 | return self.facts['personal']['Target'] 15 | 16 | @property 17 | def category(self): 18 | return self.facts['item']['Category'] 19 | 20 | @property 21 | def title(self): 22 | return self.facts['item']['Title'] 23 | 24 | @property 25 | def role(self): 26 | return self.facts['personal']['Role'] 27 | 28 | def to_dict(self): 29 | return self.facts 30 | 31 | @classmethod 32 | def from_dict(cls, attributes, raw): 33 | return cls(attributes, raw) 34 | 35 | def dump(self): 36 | # NOTE: We no longer have a bottomline price 37 | price_range = (None, self.target) 38 | print('----------------') 39 | print('Role: {}'.format(self.role)) 40 | print('Price range: {}'.format(str(price_range))) 41 | if self.role == 'seller': 42 | width = max([len(str(attr.name)) for attr in self.attributes]) 43 | for attr in self.attributes: 44 | if attr.name not in ('Role', 'Bottomline', 'Target'): 45 | if attr.name == 'Description': 46 | value = '\n' + '\n'.join(self.facts['item'][attr.name]).encode('utf8') 47 | elif attr.name == 'Price': 48 | value = self.facts['item'][attr.name] 49 | elif attr.name == 'Images': 50 | value = ' '.join(self.facts['item'][attr.name]) 51 | else: 52 | value = self.facts['item'][attr.name].encode('utf8') 53 | print('{empty:4}{name:<{width}s} {value}'.format(empty='', width=width, name=attr.name, value=value)) 54 | -------------------------------------------------------------------------------- /craigslistbargain/core/scenario.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.scenario_db import Scenario as BaseScenario 2 | from cocoa.core.schema import Attribute 3 | from kb import KB 4 | 5 | class Scenario(BaseScenario): 6 | # Agent ids 7 | BUYER = 0 8 | SELLER = 1 9 | 10 | def __init__(self, uuid, post_id, category, images, attributes, kbs): 11 | super(Scenario, self).__init__(uuid, attributes, kbs) 12 | self.post_id = post_id 13 | self.category = category 14 | self.images = images 15 | 16 | def to_dict(self): 17 | d = super(Scenario, self).to_dict() 18 | d['post_id'] = self.post_id 19 | d['category'] = self.category 20 | return d 21 | 22 | @staticmethod 23 | def from_dict(schema, raw): 24 | scenario_attributes = None 25 | if schema is not None: 26 | scenario_attributes = schema.attributes 27 | if 'attributes' in raw.keys(): 28 | scenario_attributes = [Attribute.from_json(a) for a in raw['attributes']] 29 | 30 | if scenario_attributes is None: 31 | raise ValueError("No scenario attributes found. " 32 | "Either schema must not be None (and have valid attributes) or " 33 | "scenario dict must have valid attributes field.") 34 | return Scenario(raw['uuid'], raw['post_id'], raw['category'], None, scenario_attributes, [KB.from_dict(scenario_attributes, kb) for kb in raw['kbs']]) 35 | -------------------------------------------------------------------------------- /craigslistbargain/core/tokenizer.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | nltk.download('punkt') 3 | from nltk.tokenize import word_tokenize 4 | import re 5 | import string 6 | 7 | def is_number(s): 8 | if re.match(r'[.,0-9]+', s): 9 | return True 10 | else: 11 | return False 12 | 13 | def stick_dollar_sign(tokens): 14 | ''' 15 | '$', '1000' -> '$1000' 16 | ''' 17 | new_tokens = [] 18 | i = 0 19 | while i < len(tokens): 20 | token = tokens[i] 21 | if token == '$': 22 | # $100 23 | if i < len(tokens) - 1 and is_number(tokens[i+1]): 24 | new_tokens.append(token + tokens[i+1]) 25 | i += 2 26 | # 100$ 27 | elif i > 0 and is_number(tokens[i-1]): 28 | new_tokens[-1] = new_tokens[-1] + token 29 | i += 1 30 | else: 31 | new_tokens.append(token) 32 | i += 1 33 | else: 34 | new_tokens.append(token) 35 | i += 1 36 | return new_tokens 37 | 38 | def stick_marker_sign(tokens): 39 | ''' 40 | Don't split on markers <> 41 | '<', 'x', '>' -> '' 42 | ''' 43 | new_tokens = [] 44 | in_brackets = False 45 | for tok in tokens: 46 | if in_brackets: 47 | new_tokens[-1] = new_tokens[-1] + tok 48 | else: 49 | new_tokens.append(tok) 50 | if tok == '<': 51 | in_brackets = True 52 | if tok == '>': 53 | in_brackets = False 54 | return new_tokens 55 | 56 | def tokenize(utterance, lowercase=True): 57 | ''' 58 | 'hi there!' => ['hi', 'there', '!'] 59 | ''' 60 | #utterance = utterance.encode('utf-8') 61 | if lowercase: 62 | utterance = utterance.lower() 63 | # NLTK would not tokenize "xx..", so normalize dots to "...". 64 | utterance = re.sub(r'\.{2,}', '...', utterance) 65 | # Remove some weird chars 66 | utterance = re.sub(r'\\|>|/', ' ', utterance) 67 | tokens = word_tokenize(utterance) 68 | #tokens = stick_marker_sign(tokens) 69 | tokens = stick_dollar_sign(tokens) 70 | return tokens 71 | 72 | def detokenize(tokens): 73 | new_tokens = [] 74 | for token in tokens: 75 | if (token in string.punctuation or "'" in token) and len(new_tokens) > 0: 76 | new_tokens[-1] += token 77 | elif token == 'na' and len(new_tokens) > 0 and new_tokens[-1] in ('gon', 'wan'): 78 | new_tokens[-1] += token 79 | else: 80 | new_tokens.append(token) 81 | return ' '.join(new_tokens) 82 | 83 | # ========= TEST =========== 84 | if __name__ == '__main__': 85 | print tokenize("i have 10,000$!..") 86 | print tokenize("i haven't $10,000") 87 | 88 | -------------------------------------------------------------------------------- /craigslistbargain/data/craigslist-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": { 3 | }, 4 | "attributes": [ 5 | {"name": "Role", "value_type": "role", "multivalued": false, "entity": false}, 6 | {"name": "Target", "value_type": "price", "multivalued": false, "entity": false}, 7 | {"name": "Bottomline", "value_type": "price", "multivalued": false, "entity": false}, 8 | {"name": "Title", "value_type": "text", "multivalued": false, "entity": false}, 9 | {"name": "Category", "value_type": "text", "multivalued": false, "entity": false}, 10 | {"name": "Price", "value_type": "price", "multivalued": false, "entity": false}, 11 | {"name": "Images", "value_type": "text", "multivalued": false, "entity": false}, 12 | {"name": "Description", "value_type": "text", "multivalued": false, "entity": false} 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /craigslistbargain/evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch.nn as nn 3 | from torch import cuda 4 | from onmt.Utils import use_gpu 5 | 6 | from cocoa.io.utils import read_json, write_json, read_pickle, write_pickle, create_path 7 | from cocoa.core.schema import Schema 8 | 9 | from cocoa.neural.trainer import Trainer, Statistics 10 | from cocoa.neural.loss import SimpleLossCompute 11 | from cocoa.neural.beam import Scorer 12 | 13 | from neural.utterance import UtteranceBuilder 14 | from neural import get_data_generator, make_model_mappings 15 | from neural import model_builder 16 | from neural.evaluator import Evaluator 17 | from neural.generator import get_generator 18 | import options 19 | 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--random-seed', help='Random seed', type=int, default=1) 24 | options.add_data_generator_arguments(parser) 25 | options.add_generator_arguments(parser) 26 | args = parser.parse_args() 27 | 28 | # Know which arguments are for the models thus should not be 29 | # overwritten during test 30 | dummy_parser = argparse.ArgumentParser(description='duh') 31 | options.add_model_arguments(dummy_parser) 32 | options.add_data_generator_arguments(dummy_parser) 33 | dummy_args = dummy_parser.parse_known_args([])[0] 34 | 35 | if cuda.is_available() and not args.gpuid: 36 | print("WARNING: You have a CUDA device, should run with --gpuid 0") 37 | 38 | if args.gpuid: 39 | cuda.set_device(args.gpuid[0]) 40 | 41 | # Load the model. 42 | mappings, model, model_args = \ 43 | model_builder.load_test_model(args.checkpoint, args, dummy_args.__dict__) 44 | 45 | # Figure out src and tgt vocab 46 | make_model_mappings(model_args.model, mappings) 47 | 48 | schema = Schema(model_args.schema_path, None) 49 | data_generator = get_data_generator(args, model_args, schema, test=True) 50 | 51 | # Prefix: [GO, CATEGORY] 52 | # Just giving it GO seems okay as it can learn to copy the CATEGORY from the input 53 | scorer = Scorer(args.alpha) 54 | generator = get_generator(model, mappings['tgt_vocab'], scorer, args, model_args) 55 | builder = UtteranceBuilder(mappings['tgt_vocab'], args.n_best, has_tgt=True) 56 | evaluator = Evaluator(model, mappings, generator, builder, gt_prefix=1) 57 | evaluator.evaluate(args, model_args, data_generator) 58 | -------------------------------------------------------------------------------- /craigslistbargain/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/model/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/model/dialogue_state.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.dialogue_state import DialogueState as State 2 | 3 | class DialogueState(State): 4 | def __init__(self, agent, kb): 5 | super(DialogueState, self).__init__(agent, kb) 6 | self.price = [None, None] 7 | self.curr_price = None 8 | self.num_inquiry = 0 9 | 10 | @property 11 | def listing_price(self): 12 | return self.kb.listing_price 13 | 14 | @property 15 | def my_price(self): 16 | return self.price[self.agent] 17 | 18 | @my_price.setter 19 | def my_price(self, price): 20 | self.price[self.agent] = price 21 | 22 | @property 23 | def partner_price(self): 24 | return self.price[self.partner] 25 | 26 | def update(self, agent, utterance): 27 | super(DialogueState, self).update(agent, utterance) 28 | if not utterance: 29 | return 30 | lf = utterance.lf 31 | if hasattr(lf, 'price') and lf.price is not None: 32 | self.price[agent] = lf.price 33 | self.curr_price = lf.price 34 | if agent == self.agent and lf.intent == 'inquiry': 35 | self.num_inquiry += 1 36 | -------------------------------------------------------------------------------- /craigslistbargain/model/manager.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.manager import Manager as BaseManager 2 | 3 | class Manager(BaseManager): 4 | def available_actions(self, state): 5 | actions = super(Manager, self).available_actions(state) 6 | # These actions should be decided by the bot's strategy 7 | masked_actions = ['reject', 'accept', 'quit', 'agree'] 8 | if state.num_inquiry > 1: 9 | masked_actions.append('inquiry') 10 | if state.curr_price is None: 11 | actions = ['init-price'] 12 | if state.partner_price is None or state.curr_price is None: 13 | masked_actions.append('offer') 14 | actions = [a for a in actions if not a in masked_actions] 15 | return actions 16 | 17 | def choose_action(self, state, context=None): 18 | action = super(Manager, self).choose_action(state, context) 19 | if action == 'offer' and state.partner_act == 'unknown': 20 | return 'agree' 21 | return action 22 | 23 | -------------------------------------------------------------------------------- /craigslistbargain/neural/__init__.py: -------------------------------------------------------------------------------- 1 | import onmt 2 | 3 | def get_data_generator(args, model_args, schema, test=False): 4 | from cocoa.core.scenario_db import ScenarioDB 5 | from cocoa.core.dataset import read_dataset 6 | from cocoa.core.util import read_json 7 | 8 | from core.scenario import Scenario 9 | from core.price_tracker import PriceTracker 10 | from preprocess import DataGenerator, Preprocessor 11 | import os.path 12 | 13 | # TODO: move this to dataset 14 | dataset = read_dataset(args, Scenario) 15 | 16 | mappings_path = model_args.mappings 17 | 18 | lexicon = PriceTracker(model_args.price_tracker_model) 19 | 20 | preprocessor = Preprocessor(schema, lexicon, model_args.entity_encoding_form, 21 | model_args.entity_decoding_form, model_args.entity_target_form, 22 | model=model_args.model) 23 | 24 | if test: 25 | model_args.dropout = 0 26 | train, dev, test = None, None, dataset.test_examples 27 | else: 28 | train, dev, test = dataset.train_examples, dataset.test_examples, None 29 | data_generator = DataGenerator(train, dev, test, preprocessor, schema, mappings_path, 30 | cache=args.cache, ignore_cache=args.ignore_cache, 31 | num_context=model_args.num_context, 32 | batch_size=args.batch_size, 33 | model=model_args.model) 34 | 35 | return data_generator 36 | 37 | def check_model_args(args): 38 | if args.pretrained_wordvec: 39 | if isinstance(args.pretrained_wordvec, list): 40 | pretrained = args.pretrained_wordvec[0] 41 | else: 42 | pretrained = args.pretrained_wordvec 43 | with open(pretrained, 'r') as fin: 44 | pretrained_word_embed_size = len(fin.readline().strip().split()) - 1 45 | assert pretrained_word_embed_size == args.word_embed_size 46 | 47 | if args.context and args.context_encoder == 'bow': 48 | assert pretrained_word_embed_size == args.context_size 49 | 50 | if args.decoder == 'rnn-attn': 51 | assert args.attention_memory is not None 52 | 53 | if args.num_context > 0: 54 | assert not args.stateful 55 | 56 | assert args.temperature >= 0 57 | 58 | def make_model_mappings(model, mappings): 59 | mappings['src_vocab'] = mappings['utterance_vocab'] 60 | mappings['tgt_vocab'] = mappings['utterance_vocab'] 61 | return mappings 62 | 63 | def build_optim(opt, model, checkpoint): 64 | print('Making optimizer for training.') 65 | optim = onmt.Optim( 66 | opt.optim, opt.learning_rate, opt.max_grad_norm, 67 | model_size=opt.rnn_size) 68 | 69 | optim.set_parameters(model.parameters()) 70 | 71 | return optim 72 | -------------------------------------------------------------------------------- /craigslistbargain/neural/evaluator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from itertools import count 4 | 5 | from onmt.Utils import use_gpu 6 | 7 | from cocoa.neural.evaluator import Evaluator as BaseEvaluator 8 | 9 | from neural.generator import get_generator 10 | 11 | class Evaluator(BaseEvaluator): 12 | def print_results(self, model_opt, batch, utterances): 13 | titles = batch.title_inputs.transpose(0,1) 14 | enc_inputs = batch.encoder_inputs.transpose(0,1) 15 | for i, response in enumerate(utterances): 16 | sent_number = next(counter) 17 | title = self.builder.var_to_sent(titles[i], self.mappings['kb_vocab']) 18 | summary = self.builder.var_to_sent(enc_inputs[i]) 19 | print("--------- {0}: {1} -----------".format(sent_number, title)) 20 | if model_opt.model in ["sum2sum", "sum2seq"]: 21 | print("SUMMARY: {}".format(summary) ) 22 | output = response.log(sent_number) 23 | os.write(1, output.encode('utf-8')) 24 | -------------------------------------------------------------------------------- /craigslistbargain/neural/models.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from cocoa.neural.models import NMTModel 4 | 5 | class NegotiationModel(NMTModel): 6 | 7 | def __init__(self, encoder, decoder, context_embedder, kb_embedder, stateful=False): 8 | super(NegotiationModel, self).__init__(encoder, decoder, stateful=stateful) 9 | self.context_embedder = context_embedder 10 | self.kb_embedder = kb_embedder 11 | 12 | def forward(self, src, tgt, context, title, desc, lengths, dec_state=None, enc_state=None, tgt_lengths=None): 13 | enc_final, enc_memory_bank = self.encoder(src, lengths, enc_state) 14 | _, context_memory_bank = self.context_embedder(context) 15 | if self.kb_embedder: 16 | _, title_memory_bank = self.kb_embedder(title) 17 | _, desc_memory_bank = self.kb_embedder(desc) 18 | memory_banks = [enc_memory_bank, context_memory_bank, title_memory_bank, desc_memory_bank] 19 | else: 20 | memory_banks = [enc_memory_bank, context_memory_bank] 21 | 22 | enc_state = self.decoder.init_decoder_state(src, enc_memory_bank, enc_final) 23 | dec_state = enc_state if dec_state is None else dec_state 24 | decoder_outputs, dec_state, attns = self.decoder(tgt, memory_banks, 25 | dec_state, memory_lengths=lengths, lengths=tgt_lengths) 26 | 27 | return decoder_outputs, attns, dec_state 28 | -------------------------------------------------------------------------------- /craigslistbargain/neural/symbols.py: -------------------------------------------------------------------------------- 1 | #from collections import namedtuple 2 | # 3 | #SpecialSymbols = namedtuple('SpecialSymbols', 4 | # ['EOS', 'END_SUM', 'GO_S', 'GO_B', 'OFFER', 'QUIT', 'ACCEPT', 'REJECT', 'PAD', 'C_car', 'C_phone', 'C_housing', 'C_electronics', 'C_furniture', 'C_bike']) 5 | # 6 | #markers = SpecialSymbols(EOS='', END_SUM='', GO_S='', GO_B='', OFFER='', QUIT='', ACCEPT='', REJECT='', PAD='', C_car='', C_phone='', C_housing='', C_electronics='', C_furniture='', C_bike='') 7 | 8 | from cocoa.neural.symbols import Marker as BaseMarker 9 | 10 | class Marker(BaseMarker): 11 | # Sequence 12 | END_SUM = '' 13 | GO_S = '' 14 | GO_B = '' 15 | 16 | # Actions 17 | OFFER = '' 18 | QUIT = '' 19 | ACCEPT = '' 20 | REJECT = '' 21 | 22 | # Categories 23 | C_car = '' 24 | C_phone = '' 25 | C_housing = '' 26 | C_electronics = '' 27 | C_furniture = '' 28 | C_bike = '' 29 | 30 | markers = Marker 31 | 32 | category_markers = [markers.C_car, markers.C_phone, markers.C_housing, markers.C_electronics, markers.C_furniture, markers.C_bike] 33 | 34 | action_markers = [markers.ACCEPT, markers.REJECT, markers.OFFER, markers.QUIT] 35 | 36 | sequence_markers = [markers.EOS, markers.GO_S, markers.GO_B, markers.PAD] 37 | 38 | -------------------------------------------------------------------------------- /craigslistbargain/neural/trainer.py: -------------------------------------------------------------------------------- 1 | from cocoa.neural.trainer import Trainer as BaseTrainer 2 | 3 | class Trainer(BaseTrainer): 4 | ''' Class that controls the training process which inherits from Cocoa ''' 5 | 6 | def _run_batch(self, batch, dec_state=None, enc_state=None): 7 | encoder_inputs = batch.encoder_inputs 8 | decoder_inputs = batch.decoder_inputs 9 | targets = batch.targets 10 | lengths = batch.lengths 11 | #tgt_lengths = batch.tgt_lengths 12 | 13 | # running forward() method in the NegotiationModel 14 | if hasattr(self.model, 'context_embedder'): 15 | context_inputs = batch.context_inputs 16 | title_inputs = batch.title_inputs 17 | desc_inputs = batch.desc_inputs 18 | 19 | outputs, attns, dec_state = self.model(encoder_inputs, 20 | decoder_inputs, context_inputs, title_inputs, 21 | desc_inputs, lengths, dec_state, enc_state) 22 | # running forward() method in NMT Model 23 | else: 24 | outputs, attns, dec_state = self.model(encoder_inputs, 25 | decoder_inputs, lengths, dec_state, enc_state) 26 | 27 | return outputs, attns, dec_state 28 | -------------------------------------------------------------------------------- /craigslistbargain/neural/utterance.py: -------------------------------------------------------------------------------- 1 | from cocoa.neural.utterance import Utterance 2 | from cocoa.neural.utterance import UtteranceBuilder as BaseUtteranceBuilder 3 | 4 | from symbols import markers, category_markers 5 | from core.price_tracker import PriceScaler 6 | from cocoa.core.entity import is_entity 7 | 8 | class UtteranceBuilder(BaseUtteranceBuilder): 9 | """ 10 | Build a word-based utterance from the batch output 11 | of generator and the underlying dictionaries. 12 | """ 13 | def build_target_tokens(self, predictions, kb=None): 14 | tokens = super(UtteranceBuilder, self).build_target_tokens(predictions, kb) 15 | tokens = [x for x in tokens if not x in category_markers] 16 | return tokens 17 | 18 | def _entity_to_str(self, entity_token, kb): 19 | raw_price = PriceScaler.unscale_price(kb, entity_token) 20 | human_readable_price = "${}".format(raw_price.canonical.value) 21 | return human_readable_price 22 | 23 | def get_price_number(self, entity, kb): 24 | raw_price = PriceScaler.unscale_price(kb, entity) 25 | return raw_price.canonical.value 26 | -------------------------------------------------------------------------------- /craigslistbargain/neural/vocab_builder.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.vocab import Vocabulary 2 | from cocoa.neural.vocab_builder import build_utterance_vocab 3 | 4 | from symbols import markers, sequence_markers 5 | 6 | def build_kb_vocab(dialogues, special_symbols=[]): 7 | kb_vocab = Vocabulary(offset=0, unk=True) 8 | cat_vocab = Vocabulary(offset=0, unk=False) 9 | 10 | for dialogue in dialogues: 11 | assert dialogue.is_int is False 12 | kb_vocab.add_words(dialogue.title) 13 | kb_vocab.add_words(dialogue.description) 14 | cat_vocab.add_word(dialogue.category) 15 | 16 | kb_vocab.add_words(special_symbols, special=True) 17 | kb_vocab.finish(freq_threshold=5) 18 | cat_vocab.add_words(['bike', 'car', 'electronics', 'furniture', 'housing', 'phone'], special=True) 19 | cat_vocab.finish() 20 | 21 | print 'KB vocab size:', kb_vocab.size 22 | print 'Category vocab size:', cat_vocab.size 23 | return kb_vocab, cat_vocab 24 | 25 | def build_lf_vocab(dialogues): 26 | vocab = Vocabulary(offset=0, unk=True) 27 | for dialogue in dialogues: 28 | assert dialogue.is_int is False 29 | for lf in dialogue.lfs: 30 | vocab.add_words(lf) 31 | vocab.add_words(sequence_markers, special=True) 32 | vocab.finish() 33 | print 'LF vocabulary size:', vocab.size 34 | return vocab 35 | 36 | def create_mappings(dialogues, schema, entity_forms): 37 | utterance_vocab = build_utterance_vocab(dialogues, sequence_markers, entity_forms) 38 | kb_vocab, cat_vocab = build_kb_vocab(dialogues, [markers.PAD]) 39 | lf_vocab = build_lf_vocab(dialogues) 40 | return {'utterance_vocab': utterance_vocab, 41 | 'kb_vocab': kb_vocab, 42 | 'cat_vocab': cat_vocab, 43 | 'lf_vocab': lf_vocab, 44 | } 45 | -------------------------------------------------------------------------------- /craigslistbargain/onmt: -------------------------------------------------------------------------------- 1 | ../onmt -------------------------------------------------------------------------------- /craigslistbargain/reinforce.py: -------------------------------------------------------------------------------- 1 | """ 2 | Takes two agent (Session) implementations, generates the dialogues, 3 | and run REINFORCE. 4 | """ 5 | 6 | import argparse 7 | import random 8 | import json 9 | import numpy as np 10 | 11 | from onmt.Utils import use_gpu 12 | 13 | from cocoa.core.util import read_json 14 | from cocoa.core.schema import Schema 15 | from cocoa.core.scenario_db import ScenarioDB 16 | from cocoa.neural.loss import ReinforceLossCompute 17 | import cocoa.options 18 | 19 | from core.scenario import Scenario 20 | from core.controller import Controller 21 | from systems import get_system 22 | from neural.rl_trainer import RLTrainer 23 | from neural import build_optim 24 | import options 25 | 26 | def make_loss(opt, model, tgt_vocab): 27 | loss = ReinforceLossCompute(model.generator, tgt_vocab) 28 | if use_gpu(opt): 29 | loss.cuda() 30 | return loss 31 | 32 | 33 | if __name__ == '__main__': 34 | parser = argparse.ArgumentParser(conflict_handler='resolve') 35 | parser.add_argument('--agents', help='What kind of agent to use. The first agent is always going to be updated and the second is fixed.', nargs='*', required=True) 36 | parser.add_argument('--agent-checkpoints', nargs='+', help='Directory to learned models') 37 | parser.add_argument('--random-seed', help='Random seed', type=int, default=1) 38 | parser.add_argument('--verbose', default=False, action='store_true', help='Whether or not to have verbose prints') 39 | parser.add_argument('--valid-scenarios-path', help='Output path for the validation scenarios') 40 | cocoa.options.add_scenario_arguments(parser) 41 | options.add_system_arguments(parser) 42 | options.add_rl_arguments(parser) 43 | options.add_model_arguments(parser) 44 | args = parser.parse_args() 45 | 46 | if args.random_seed: 47 | random.seed(args.random_seed) 48 | np.random.seed(args.random_seed) 49 | 50 | schema = Schema(args.schema_path) 51 | scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario) 52 | valid_scenario_db = ScenarioDB.from_dict(schema, read_json(args.valid_scenarios_path), Scenario) 53 | 54 | assert len(args.agent_checkpoints) <= len(args.agents) 55 | systems = [get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents)] 56 | 57 | rl_agent = 0 58 | system = systems[rl_agent] 59 | model = system.env.model 60 | loss = make_loss(args, model, system.mappings['tgt_vocab']) 61 | optim = build_optim(args, model, None) 62 | 63 | scenarios = {'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list} 64 | trainer = RLTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward) 65 | trainer.learn(args) 66 | -------------------------------------------------------------------------------- /craigslistbargain/results.txt: -------------------------------------------------------------------------------- 1 | make dump outdir=2017-12-08-test 2 | 3 | ct(, {(u'human', u'neural-gen'): 272, (u'human', u'rulebased'): 252, (u'human', u'human'): 168}) 4 | neural-gen {'success rate': 0.7316176470588235, 'average margin': -0.13129977119911687} 5 | rulebased {'success rate': 0.7341269841269841, 'average margin': 0.37129803906567777} 6 | =========== mean =========== 7 | neural-gen [72 46 30 73 51] 8 | rulebased [44 37 9 78 84] 9 | human [ 4 10 22 71 170] 10 | ============= Humanlikeness =============== 11 | agent avg_score error #score win 12 | --------------------------------------- 13 | Neural 2.9 0.09 272 14 | Rule-based 3.5 0.09 252 n 15 | Human 4.4 0.05 277 nr 16 | -------------------------------------------------------------------------------- /craigslistbargain/scraper/scraper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/scraper/scraper/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/scraper/scraper/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class ScraperItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | -------------------------------------------------------------------------------- /craigslistbargain/scraper/scraper/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ScraperSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /craigslistbargain/scraper/scraper/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | from scrapy.pipelines.images import ImagesPipeline 9 | from scrapy.exceptions import DropItem 10 | import os.path 11 | import logging 12 | import scrapy 13 | from scraper.settings import IMAGES_STORE 14 | 15 | class ScraperPipeline(object): 16 | def process_item(self, item, spider): 17 | return item 18 | 19 | class CraigslistValidationPipeline(object): 20 | def process_item(self, item, spider): 21 | if item == {}: 22 | raise DropItem('parse error') 23 | else: 24 | return item 25 | 26 | class CraigslistImagesPipeline(ImagesPipeline): 27 | def get_media_requests(self, item, info): 28 | for i, image_url in enumerate(item['image_urls']): 29 | meta = {'filename': '%s/%s_%d.jpg' % (item['category'], item['post_id'], i)} 30 | yield scrapy.Request(image_url, meta=meta) 31 | 32 | def file_path(self, request, response=None, info=None): 33 | filename = request.meta['filename'] 34 | return filename 35 | 36 | def item_completed(self, results, item, info): 37 | for i, result in enumerate([x for ok, x in results if ok]): 38 | path = result['path'] # path is relative to IMAGES_STORE 39 | item['images'].append(path) 40 | return item 41 | 42 | -------------------------------------------------------------------------------- /craigslistbargain/scraper/scraper/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /craigslistbargain/scraper/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = scraper.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = scraper 12 | -------------------------------------------------------------------------------- /craigslistbargain/scripts/visualize_transcripts.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from cocoa.core.util import write_json 4 | 5 | from analysis.visualizer import Visualizer 6 | from analysis.html_visualizer import HTMLVisualizer 7 | 8 | if __name__ == '__main__': 9 | parser = ArgumentParser() 10 | parser.add_argument('--survey-transcripts', nargs='+', 11 | help='Path to directory containing evaluation transcripts') 12 | parser.add_argument('--dialogue-transcripts', nargs='+', 13 | help='Path to directory containing dialogue transcripts') 14 | parser.add_argument('--worker-ids', nargs='+', 15 | help='Path to json file containing chat_id to worker_id mappings') 16 | parser.add_argument('--summary', default=False, action='store_true', 17 | help='Summarize human ratings') 18 | parser.add_argument('--hist', default=False, action='store_true', 19 | help='Plot histgram of ratings') 20 | parser.add_argument('--html-visualize', action='store_true', 21 | help='Output html files') 22 | parser.add_argument('--outdir', default='.', help='Output dir') 23 | parser.add_argument('--stats', default='stats.json', 24 | help='Path to stats file') 25 | parser.add_argument('--partner', default=False, action='store_true', 26 | help='Whether this is from partner survey') 27 | HTMLVisualizer.add_html_visualizer_arguments(parser) 28 | args = parser.parse_args() 29 | 30 | visualizer = Visualizer(args.dialogue_transcripts, 31 | args.survey_transcripts, args.worker_ids) 32 | 33 | visualizer.compute_effectiveness() 34 | 35 | # TODO: move summary and hist to analyzer 36 | if args.hist: 37 | visualizer.hist(question_scores, args.outdir, partner=args.partner) 38 | 39 | if args.summary: 40 | summary = visualizer.summarize() 41 | write_json(summary, args.stats) 42 | 43 | if args.worker_ids: 44 | visualizer.worker_stats() 45 | 46 | if args.html_output: 47 | visualizer.html_visualize(args.viewer_mode, args.html_output, 48 | css_file=args.css_file, img_path=args.img_path, 49 | worker_ids=visualizer.worker_ids) 50 | -------------------------------------------------------------------------------- /craigslistbargain/sessions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/sessions/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/sessions/cmd_session.py: -------------------------------------------------------------------------------- 1 | from session import Session 2 | 3 | 4 | class CmdSession(Session): 5 | def __init__(self, agent, kb): 6 | super(CmdSession, self).__init__(agent) 7 | self.kb = kb 8 | 9 | def send(self): 10 | message = raw_input() 11 | event = self.parse_input(message) 12 | return event 13 | 14 | def parse_input(self, message): 15 | """Parse user input from the command line. 16 | Args: message (str) 17 | Returns: Event 18 | """ 19 | raw_tokens = message.split() 20 | tokens = self.remove_nonprintable(raw_tokens) 21 | 22 | if len(tokens) >= 2 and tokens[0] == '': 23 | return self.offer({'price': int(tokens[1]), 'sides': ''}) 24 | elif tokens[0] == '': 25 | return self.accept() 26 | elif tokens[0] == '': 27 | return self.reject() 28 | elif tokens[0] == '': 29 | return self.quit() 30 | else: 31 | return self.message(message) 32 | 33 | def receive(self, event): 34 | print event.data 35 | -------------------------------------------------------------------------------- /craigslistbargain/sessions/session.py: -------------------------------------------------------------------------------- 1 | from cocoa.sessions.session import Session as BaseSession 2 | from core.event import Event 3 | 4 | class Session(BaseSession): 5 | def offer(self, offer, metadata=None): 6 | """Send an offer event. 7 | 8 | Args: 9 | offer ({'price': float, 'sides', str}) 10 | 11 | Returns: 12 | OfferEvent 13 | 14 | """ 15 | return Event.OfferEvent(self.agent, offer, time=self.timestamp(), metadata=None) 16 | 17 | def accept(self, metadata=None): 18 | return Event.AcceptEvent(self.agent, time=self.timestamp(), metadata=None) 19 | 20 | def reject(self, metadata=None): 21 | return Event.RejectEvent(self.agent, time=self.timestamp(), metadata=None) 22 | 23 | def quit(self, metadata=None): 24 | return Event.QuitEvent(self.agent, time=self.timestamp(), metadata=None) 25 | -------------------------------------------------------------------------------- /craigslistbargain/systems/__init__.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.util import read_json, read_pickle 2 | 3 | import options 4 | 5 | 6 | def get_system(name, args, schema=None, timed=False, model_path=None): 7 | from core.price_tracker import PriceTracker 8 | lexicon = PriceTracker(args.price_tracker_model) 9 | 10 | if name == 'rulebased': 11 | from rulebased_system import RulebasedSystem 12 | from model.generator import Templates, Generator 13 | from model.manager import Manager 14 | templates = Templates.from_pickle(args.templates) 15 | generator = Generator(templates) 16 | manager = Manager.from_pickle(args.policy) 17 | return RulebasedSystem(lexicon, generator, manager, timed) 18 | elif name == 'hybrid': 19 | from hybrid_system import HybridSystem 20 | templates = Templates.from_pickle(args.templates) 21 | manager = PytorchNeuralSystem(args, schema, lexicon, model_path, timed) 22 | generator = Generator(templates) 23 | return HybridSystem(lexicon, generator, manager, timed) 24 | elif name == 'cmd': 25 | from cmd_system import CmdSystem 26 | return CmdSystem() 27 | elif name == 'pt-neural': 28 | from neural_system import PytorchNeuralSystem 29 | assert model_path 30 | return PytorchNeuralSystem(args, schema, lexicon, model_path, timed) 31 | else: 32 | raise ValueError('Unknown system %s' % name) 33 | -------------------------------------------------------------------------------- /craigslistbargain/systems/cmd_system.py: -------------------------------------------------------------------------------- 1 | from cocoa.systems.system import System as BaseSystem 2 | from sessions.cmd_session import CmdSession 3 | 4 | class CmdSystem(BaseSystem): 5 | def __init__(self): 6 | super(CmdSystem, self).__init__() 7 | 8 | @classmethod 9 | def name(cls): 10 | return 'cmd' 11 | 12 | def new_session(self, agent, kb): 13 | return CmdSession(agent, kb) 14 | -------------------------------------------------------------------------------- /craigslistbargain/systems/hybrid_system.py: -------------------------------------------------------------------------------- 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem 2 | from sessions.hybrid_session import HybridSession 3 | 4 | 5 | class HybridSystem(BaseRulebasedSystem): 6 | 7 | def _new_session(self, agent, kb, config=None): 8 | self.manager.timed_session = False 9 | manager_session = self.manager.new_session(agent, kb) 10 | return HybridSession.get_session(agent, kb, self.lexicon, 11 | self.generator, manager_session) 12 | 13 | @classmethod 14 | def name(cls): 15 | return 'hybrid' 16 | 17 | -------------------------------------------------------------------------------- /craigslistbargain/systems/rulebased_system.py: -------------------------------------------------------------------------------- 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem 2 | from sessions.rulebased_session import RulebasedSession 3 | 4 | class RulebasedSystem(BaseRulebasedSystem): 5 | def _new_session(self, agent, kb, config=None): 6 | return RulebasedSession.get_session(agent, kb, self.lexicon, config, self.generator, self.manager) 7 | 8 | -------------------------------------------------------------------------------- /craigslistbargain/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/web/app_params.json: -------------------------------------------------------------------------------- 1 | { 2 | "status_params": { 3 | "waiting": { 4 | "num_seconds": 180 5 | }, 6 | 7 | "chat": { 8 | "num_seconds": 1200 9 | }, 10 | 11 | "finished": { 12 | "num_seconds": -1 13 | }, 14 | 15 | "survey": { 16 | "num_seconds": -1 17 | }, 18 | "redirected": { 19 | "num_seconds": -1 20 | }, 21 | "incomplete": { 22 | "num_seconds": -1 23 | }, 24 | "reporting": { 25 | "num_seconds": -1 26 | } 27 | }, 28 | "connection_timeout_num_seconds": 3, 29 | "idle_timeout_num_seconds": 210, 30 | "templates_dir": "web/templates", 31 | "images_base": "web/static/images", 32 | "instructions": "web/templates/craigslist-instructions.html", 33 | "task_title": "Let's Negotiate!", 34 | "icon": "handshake.jpg", 35 | "end_survey": 1, 36 | "skip_chat_enabled": false, 37 | "num_chats_per_scenario": {"human": 1}, 38 | "debug": false 39 | } 40 | 41 | -------------------------------------------------------------------------------- /craigslistbargain/web/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/main/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/web/main/db_reader.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | 4 | from cocoa.web.main.db_reader import DatabaseReader as BaseDatabaseReader 5 | from cocoa.core.util import write_json 6 | 7 | class DatabaseReader(BaseDatabaseReader): 8 | @classmethod 9 | def get_chat_outcome(cls, cursor, chat_id): 10 | outcome = super(DatabaseReader, cls).get_chat_outcome(cursor, chat_id) 11 | try: 12 | if math.isnan(outcome['offer']['price']): 13 | outcome['offer']['price'] = None 14 | except (ValueError, TypeError, KeyError) as e: 15 | pass 16 | return outcome 17 | 18 | @classmethod 19 | def get_chat_example(cls, cursor, chat_id, scenario_db): 20 | ex = super(DatabaseReader, cls).get_chat_example(cursor, chat_id, scenario_db) 21 | if not ex is None: 22 | cursor.execute('SELECT config FROM bot where chat_id=?', (chat_id,)) 23 | result = cursor.fetchone() 24 | if result: 25 | ex.agents_info = {'config': result[0]} 26 | return ex 27 | 28 | @classmethod 29 | def process_event_data(cls, action, data): 30 | if action == 'offer': 31 | data = json.loads(data) 32 | try: 33 | if math.isnan(data['price']): 34 | data['price'] = None 35 | except (ValueError, TypeError) as e: 36 | pass 37 | return data 38 | 39 | @classmethod 40 | def dump_surveys(cls, cursor, json_path): 41 | questions = ['fluent', 'honest', 'persuasive', 'fair', 'negotiator', 'coherent', 'comments'] 42 | 43 | cursor.execute('''SELECT * FROM survey''') 44 | logged_surveys = cursor.fetchall() 45 | survey_data = {} 46 | agent_types = {} 47 | 48 | for survey in logged_surveys: 49 | # todo this is pretty lazy - support variable # of questions per task eventually.. 50 | (userid, cid, _, q1, q2, q3, q4, q5, q6, comments) = survey 51 | responses = dict(zip(questions, [q1, q2, q3, q4, q5, q6, comments])) 52 | cursor.execute('''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''', (cid,)) 53 | chat_result = cursor.fetchone() 54 | agents = json.loads(chat_result[0]) 55 | agent_ids = json.loads(chat_result[1]) 56 | agent_types[cid] = agents 57 | if cid not in survey_data.keys(): 58 | survey_data[cid] = {0: {}, 1: {}} 59 | partner_idx = 0 if agent_ids['1'] == userid else 1 60 | survey_data[cid][partner_idx] = responses 61 | 62 | write_json([agent_types, survey_data], json_path) 63 | -------------------------------------------------------------------------------- /craigslistbargain/web/main/utils.py: -------------------------------------------------------------------------------- 1 | from cocoa.web.main.utils import Messages as BaseMessages 2 | 3 | class Messages(BaseMessages): 4 | ChatCompleted = "Great, you reached a final offer!" 5 | ChatIncomplete = "Sorry, you weren't able to reach a deal. :(" 6 | Redirect = "Sorry, that chat did not meet our acceptance criteria." 7 | #BetterDeal = "Congratulations, you got the better deal! We'll award you a bonus on Mechanical Turk." 8 | #WorseDeal = "Sorry, your partner got the better deal. :(" 9 | 10 | -------------------------------------------------------------------------------- /craigslistbargain/web/static/img/handshake.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/static/img/handshake.jpg -------------------------------------------------------------------------------- /craigslistbargain/web/templates/finished.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Thank You! 4 | 5 | 6 | 11 | 12 | 18 | 19 | 20 |
21 |

Let's Negotiate!

22 |

{{ finished_message }}

23 |
24 |
25 | {% if mturk_code is not none %} 26 |
27 |

Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}

28 | {% endif %} 29 |
30 | {% if visualize %} 31 |

Click here to visualize the dialogue you just completed

32 | {% endif %} 33 |
34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /craigslistbargain/web/templates/third_party_eval_finished.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Thank You! 4 | 9 | 10 | 11 |
12 |

Evaluating Dialogues

13 | {{ finished_message }} 14 |
15 |
16 |
17 | {% if mturk_code is not none %} 18 |
19 |

Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}

20 | {% endif %} 21 |
22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /craigslistbargain/web/templates/visualize.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{ dialogue }} 4 | -------------------------------------------------------------------------------- /craigslistbargain/web/views/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/views/__init__.py -------------------------------------------------------------------------------- /craigslistbargain/web/views/action.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from cocoa.web.views.utils import userid, format_message 3 | 4 | from web.main.backend import Backend 5 | get_backend = Backend.get_backend 6 | 7 | action = Blueprint('action', __name__) 8 | 9 | @action.route('/_offer/', methods=['GET']) 10 | def offer(): 11 | backend = get_backend() 12 | price = float(request.args.get('price')) 13 | sides = request.args.get('sides') 14 | 15 | offer = {'price': price, 16 | 'sides': sides} 17 | 18 | if offer is None or price == -1: 19 | return jsonify(message=format_message("You made an invalid offer. Please try again.", True)) 20 | backend.make_offer(userid(), offer) 21 | 22 | displayed_message = format_message("You made an offer!", True) 23 | return jsonify(message=displayed_message) 24 | 25 | 26 | @action.route('/_accept_offer/', methods=['GET']) 27 | def accept_offer(): 28 | backend = get_backend() 29 | backend.accept_offer(userid()) 30 | 31 | msg = format_message("You accepted the offer!", True) 32 | return jsonify(message=msg) 33 | 34 | 35 | @action.route('/_reject_offer/', methods=['GET']) 36 | def reject_offer(): 37 | backend = get_backend() 38 | backend.reject_offer(userid()) 39 | 40 | msg = format_message("You rejected the offer.", True) 41 | return jsonify(message=msg) 42 | 43 | 44 | @action.route('/_quit/', methods=['GET']) 45 | def quit(): 46 | backend = get_backend() 47 | backend.quit(userid()) 48 | displayed_message = format_message("You chose to quit this task.", True) 49 | return jsonify(message=displayed_message) 50 | -------------------------------------------------------------------------------- /data/turk/sample_aws_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "secret_key": "secretkey", 3 | "access_key": "accesskey", 4 | "username": "user" 5 | } -------------------------------------------------------------------------------- /data/web/matchmaking-instructions.html: -------------------------------------------------------------------------------- 1 |

2 | You and another user online want to set two of your friends up on a date. For each of your friends, you know their favorite hobby and their location/time preferences. Find the best match by chatting with your partner and finding the pair of friends who have the most in common! (Hint: There's exactly one pair that has all three in common.) 3 |

4 |

Instructions

5 |
    6 |
  • Please use natural sentences as much as possible. 7 |
      8 |
    • Do: i have 3 friends who like graffiti and being outdoors
    • 9 |
    • Don't do: 3 graffiti outdoors
    • 10 |
    11 |
  • 12 |
  • Avoid simply listing out the hobbies (or preferences) of your friends.
  • 13 |
  • Look at your list of friends at right.
  • 14 |
  • Use the chat box below to find out more about your partner's friends.
  • 15 |
  • Once you've found the pair, select your friend by clicking Select in the first column. 16 |
  • Please select carefully. If you select a friend once, the website will not allow you to select another friend for 10 seconds.
  • 17 | 18 |
-------------------------------------------------------------------------------- /data/web/matchmaking_params.json: -------------------------------------------------------------------------------- 1 | { 2 | "status_params": { 3 | "waiting": { 4 | "num_seconds": 180 5 | }, 6 | 7 | "chat": { 8 | "num_seconds": 300 9 | }, 10 | 11 | "finished": { 12 | "num_seconds": 15 13 | }, 14 | 15 | "survey": { 16 | "num_seconds": -1 17 | } 18 | }, 19 | 20 | "scenarios_json_file": "/Users/anushabala/projects/game-dialogue/data/matchmaking-scenarios-large.json", 21 | "connection_timeout_num_seconds": 5, 22 | "templates_dir": "/Users/anushabala/projects/game-dialogue/src/web/templates", 23 | "end_survey": 1, 24 | "instructions": "/Users/anushabala/projects/game-dialogue/data/web/matchmaking-instructions.html", 25 | "task_title": "Let's Set Our Friends Up!", 26 | "icon": "heart.png" 27 | } 28 | 29 | -------------------------------------------------------------------------------- /data/web/negotiation/app_params.json: -------------------------------------------------------------------------------- 1 | { 2 | "status_params": { 3 | "waiting": { 4 | "num_seconds": 180 5 | }, 6 | 7 | "chat": { 8 | "num_seconds": 1200 9 | }, 10 | 11 | "finished": { 12 | "num_seconds": -1 13 | }, 14 | 15 | "survey": { 16 | "num_seconds": -1 17 | }, 18 | "redirected": { 19 | "num_seconds": -1 20 | }, 21 | "incomplete": { 22 | "num_seconds": -1 23 | }, 24 | "reporting": { 25 | "num_seconds": -1 26 | } 27 | }, 28 | "connection_timeout_num_seconds": 3, 29 | "idle_timeout_num_seconds": 210, 30 | "templates_dir": "/home/hehe/game-dialogue/src/web/templates/negotiation", 31 | "images_base": "/home/hehe/game-dialogue/images", 32 | "instructions": "/home/hehe/game-dialogue/data/web/negotiation/craiglist-instructions.html", 33 | "task_title": "Let's Negotiate!", 34 | "icon": "handshake.jpg", 35 | "end_survey": 1, 36 | "skip_chat_enabled": false, 37 | "num_chats_per_scenario": 1, 38 | "debug": false, 39 | "models": { 40 | "rulebased": { 41 | "active": true, 42 | "type": "rulebased" 43 | } 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /data/web/negotiation/rent-instructions.html: -------------------------------------------------------------------------------- 1 |

2 | You and another user online will negotiate the monthly rent of a 1B1B apartment. 3 |

4 |

Instructions

5 |
    6 | 7 |
  • Your role (tenant or landlord) is to the right.
  • 8 |
  • Your expected price range is provided as well. We will award bonuses to those who get a better deal!
  • 9 |
  • Use the chat box below to negotiate with your partner given facts listed on the right.
  • 10 |
  • When you and your partner have agreed on a rent, enter it in the text box and hit 'Submit'. The task ends when you both offer the same price.
  • 11 |
  • You must negotiate a final price within the time limit in order to receive credit for this HIT.
  • 12 |
  • Please do not state facts about the apartment or about the terms of the lease that are not explicitly given to you. It's okay to embellish certain aspects as a negotiation tactic! 13 |
  • To the landlord:
  • 14 |
      15 |
    • The tenant cannot see all properties of the apartment and may ask you questions about them.
    • 16 |
    • It's okay to embellish facts, but don't make up new properties not provided to you (or contradict given properties).
    • 17 |
    • Do: this neighborhood is really beautiful, the neighbors are all really friendly
    • 18 |
    • Don't do: (if "no parking" is given) I'd be happy to give you a parking spot if you can pay $50 more.
    • 19 |
    20 | 21 |
  • To the tenant:
  • 22 |
      23 |
    • If a property is marked with "?", it means that its value is unknown to you, but you can ask the landlord about it.
    • 24 |
    • It's okay to make up preferences as a negotiation tactic, but don't make up properties that are not provided (or contradict given properties).
    • 25 |
    • Do: It is next to a highway and seems like it can get quite noisy.
    • 26 |
    • Don't do: (if the facts did not mention a broken window) The window is broken.
    • 27 |
    28 | 29 |
30 | -------------------------------------------------------------------------------- /dealornodeal/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/analysis/__init__.py -------------------------------------------------------------------------------- /dealornodeal/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/core/__init__.py -------------------------------------------------------------------------------- /dealornodeal/core/event.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.event import Event as BaseEvent 2 | 3 | class Event(BaseEvent): 4 | @staticmethod 5 | def SelectEvent(agent, data, time=None, metadata=None): 6 | return Event(agent, time, 'select', data, metadata=metadata) 7 | 8 | @staticmethod 9 | def QuitEvent(agent, time=None, metadata=None): 10 | return Event(agent, time, 'quit', None, metadata=metadata) 11 | 12 | -------------------------------------------------------------------------------- /dealornodeal/core/kb.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.kb import KB as BaseKB 2 | 3 | class KB(BaseKB): 4 | def __init__(self, attributes, items): 5 | super(KB, self).__init__(attributes) 6 | self.items = items 7 | self.item_counts = {item['Name']: item['Count'] for item in items} 8 | self.item_values = {item['Name']: item['Value'] for item in items} 9 | 10 | def to_dict(self): 11 | return self.items 12 | 13 | @classmethod 14 | def from_dict(cls, attributes, raw): 15 | return cls(attributes, raw) 16 | 17 | @classmethod 18 | def from_ints(cls, attributes, names, ints): 19 | """Build KB from integers. 20 | 21 | Args: 22 | names (list[str]) 23 | ints (list[int]): [count1, value1, count2, value2, ...] 24 | 25 | """ 26 | items = [] 27 | assert 1. * len(ints) / len(names) == 2 28 | for i, name in enumerate(names): 29 | item = {'Name': name, 'Count': ints[i*2], 'Value': ints[i*2+1]} 30 | items.append(item) 31 | return cls(attributes, items) 32 | 33 | def dump(self): 34 | item_counts = ', '.join(['{count} {item}s'.format(count=c, item=n) for n, c in self.item_counts.iteritems()]) 35 | print 'Items Available: {}'.format(item_counts) 36 | 37 | for item, value in self.item_values.iteritems(): 38 | print 'How you value {0}: {1} points'.format(item, value) 39 | print '----------------' 40 | -------------------------------------------------------------------------------- /dealornodeal/core/lexicon.py: -------------------------------------------------------------------------------- 1 | import re 2 | from cocoa.core.entity import Entity 3 | 4 | class Lexicon(object): 5 | """Detect item and numbers in a list of tokens. 6 | 7 | Example: 8 | ['i', 'would', 'like', 'the', 'ball', 'and', '2', 'books'] -> 9 | ['i', 'would', 'like', 'the', ('ball', ('ball', 'item')), 'and', ('2', (2, 'number')), ('books', ('book', 'item'))] 10 | 11 | """ 12 | 13 | word_to_num = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10} 14 | 15 | def __init__(self, items): 16 | self.items = items 17 | 18 | def detect_item(self, token): 19 | for item in self.items: 20 | if re.match(r'{}s?'.format(item), token) or \ 21 | (item == 'ball' and re.match(r'(basket)?balls?', token)): 22 | return Entity.from_elements(surface=token, value=item, type='item') 23 | return False 24 | 25 | def detect_number(self, token): 26 | try: 27 | n = int(token) 28 | except ValueError: 29 | try: 30 | n = self.word_to_num[token] 31 | except KeyError: 32 | n = None 33 | if n is not None: 34 | return Entity.from_elements(surface=token, value=n, type='number') 35 | return False 36 | 37 | def link_entity(self, tokens): 38 | return [(self.detect_item(token) or self.detect_number(token) or token) for token in tokens] 39 | 40 | ############### TEST ############### 41 | if __name__ == '__main__': 42 | lexicon = Lexicon(['ball', 'hat', 'book']) 43 | print lexicon.link_entity('i need 3 books'.split()) 44 | -------------------------------------------------------------------------------- /dealornodeal/core/scenario.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.scenario_db import Scenario as BaseScenario 2 | from cocoa.core.schema import Attribute 3 | from kb import KB 4 | 5 | class Scenario(BaseScenario): 6 | ## Agent ids 7 | #FIRST = 0 8 | #SECOND = 1 9 | 10 | #def __init__(self, uuid, attributes, kbs): 11 | # super(Scenario, self).__init__(uuid, attributes, kbs) 12 | # # self.bottom_line = 8 13 | # # self.post_id = post_id // bunch of random numbers: 923461346 14 | # # self.category = category // phone, housing, bike, furniture, electronics 15 | # # self.images = images // link to product image: bike/6123601035_0.jpg 16 | 17 | #def to_dict(self): 18 | # d = super(Scenario, self).to_dict() 19 | # return d 20 | 21 | @classmethod 22 | def from_dict(cls, schema, raw): 23 | scenario_attributes = None 24 | if schema is not None: 25 | scenario_attributes = schema.attributes 26 | if 'attributes' in raw.keys(): 27 | scenario_attributes = [Attribute.from_json(a) for a in raw['attributes']] 28 | 29 | if scenario_attributes is None: 30 | raise ValueError("No scenario attributes found. " 31 | "Either schema must not be None (and have valid attributes) or " 32 | "scenario dict must have valid attributes field.") 33 | kb_list = [KB.from_dict(scenario_attributes, kb) for kb in raw['kbs']] 34 | return cls(raw['uuid'], scenario_attributes, kb_list) 35 | -------------------------------------------------------------------------------- /dealornodeal/core/tokenizer.py: -------------------------------------------------------------------------------- 1 | from cocoa.core.tokenizer import tokenize, detokenize 2 | -------------------------------------------------------------------------------- /dealornodeal/data/bookhatball-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "values": { 3 | "item": ["book", "hat", "ball"] 4 | }, 5 | "attributes": [ 6 | {"name": "Name", "value_type": "item", "entity": true}, 7 | {"name": "Count", "value_type": "integer", "entity": true}, 8 | {"name": "Value", "value_type": "integer", "entity": true} 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /dealornodeal/evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch.nn as nn 3 | from torch import cuda 4 | from onmt.Utils import use_gpu 5 | 6 | from cocoa.io.utils import read_json, write_json, read_pickle, write_pickle, create_path 7 | from cocoa.core.schema import Schema 8 | from cocoa.lib import logstats 9 | 10 | from cocoa.neural.trainer import add_trainer_arguments, Trainer, Statistics 11 | from cocoa.neural.loss import SimpleLossCompute 12 | from cocoa.neural.beam import Scorer 13 | 14 | from neural.utterance import UtteranceBuilder 15 | from neural.model_builder import add_model_arguments 16 | from neural import add_data_generator_arguments, get_data_generator, make_model_mappings 17 | from neural import model_builder 18 | from neural.evaluator import Evaluator, add_evaluator_arguments 19 | 20 | if __name__ == '__main__': 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--random-seed', help='Random seed', type=int, default=1) 23 | parser.add_argument('--stats-file', help='Path to save json statistics (dataset, training etc.) file') 24 | add_data_generator_arguments(parser) 25 | add_evaluator_arguments(parser) 26 | args = parser.parse_args() 27 | 28 | # Know which arguments are for the models thus should not be 29 | # overwritten during test 30 | dummy_parser = argparse.ArgumentParser(description='duh') 31 | add_model_arguments(dummy_parser) 32 | add_data_generator_arguments(dummy_parser) 33 | dummy_args = dummy_parser.parse_known_args([])[0] 34 | 35 | if cuda.is_available() and not args.gpuid: 36 | print("WARNING: You have a CUDA device, should run with --gpuid 0") 37 | 38 | if args.gpuid: 39 | cuda.set_device(args.gpuid[0]) 40 | 41 | # Load the model. 42 | mappings, model, model_args = \ 43 | model_builder.load_test_model(args.checkpoint, args, dummy_args.__dict__) 44 | 45 | # Figure out src and tgt vocab 46 | make_model_mappings(model_args.model, mappings) 47 | 48 | schema = Schema(model_args.schema_path, None) 49 | data_generator = get_data_generator(args, model_args, schema, test=True) 50 | 51 | # Prefix: [GO] 52 | scorer = Scorer(args.alpha) 53 | generator = get_generator(model, mappings['tgt_vocab'], scorer, args, model_args) 54 | builder = UtteranceBuilder(mappings['tgt_vocab'], args.n_best, has_tgt=True) 55 | evaluator = Evaluator(model, mappings, generator, builder, gt_prefix=1) 56 | evaluator.evaluate(args, model_args, data_generator) 57 | -------------------------------------------------------------------------------- /dealornodeal/fb_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/fb_model/__init__.py -------------------------------------------------------------------------------- /dealornodeal/fb_model/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Configuration script. Stores variables and settings used across application 8 | """ 9 | 10 | import logging 11 | 12 | log_level = logging.INFO 13 | log_format = '%(asctime)s : %(levelname)s : %(filename)s : %(message)s' 14 | 15 | # default training settings 16 | data_dir = 'data/negotiate' # data corpus directory 17 | nembed_word = 256 # size of word embeddings 18 | nembed_ctx = 64 # size of context embeddings 19 | nhid_lang = 256 # size of the hidden state for the language model 20 | nhid_ctx = 64 # size of the hidden state for the context model 21 | nhid_strat = 64 # size of the hidden state for the strategy model 22 | nhid_attn = 64 # size of the hidden state for the attention module 23 | nhid_sel = 64 # size of the hidden state for the selection module 24 | lr = 20.0 # initial learning rate 25 | min_lr = 1e-5 # min thresshold for learning rate annealing 26 | decay_rate = 9.0 # decrease learning rate by this factor 27 | decay_every = 1 # decrease learning rate after decay_every epochs 28 | momentum = 0.0 # momentum for SGD 29 | nesterov = False # enable Nesterov momentum 30 | clip = 0.2 # gradient clipping 31 | dropout = 0.5 # dropout rate in embedding layer 32 | init_range = 0.1 #initialization range 33 | max_epoch = 30 # max number of epochs 34 | bsz = 25 # batch size 35 | unk_threshold = 20 # minimum word frequency to be in dictionary 36 | temperature = 0.1 # temperature 37 | sel_weight = 1.0 # selection weight 38 | seed = 1 # random seed 39 | cuda = False # use CUDA 40 | plot_graphs = False # use visdom 41 | domain = "object_division" # domain for the dialogue 42 | rnn_ctx_encoder = False # Whether to use RNN for encoding the context 43 | 44 | # rl settings 45 | rl_temperature = 0.1 46 | verbose = True 47 | rl_score_threshold = 6 48 | rl_gamma = 0.95 49 | rl_eps = 0 50 | rl_momentum = 0.1 51 | rl_lr = 0.5 52 | rl_reinforcement_lr = 0.1 53 | rl_reinforcement_clip = 1 54 | rl_clip = 1 55 | rl_bsz = 16 56 | rl_sv_train_freq = 4 57 | rl_nepoch = 1 58 | -------------------------------------------------------------------------------- /dealornodeal/fb_model/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/fb_model/models/__init__.py -------------------------------------------------------------------------------- /dealornodeal/fb_model/test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Performs evaluation of the model on the test dataset. 8 | """ 9 | 10 | import argparse 11 | 12 | import numpy as np 13 | import torch 14 | from torch.autograd import Variable 15 | 16 | import data 17 | import utils 18 | from engine import Engine, Criterion 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser(description='testing script') 23 | parser.add_argument('--data', type=str, default='data/negotiate', 24 | help='location of the data corpus') 25 | parser.add_argument('--unk_threshold', type=int, default=20, 26 | help='minimum word frequency to be in dictionary') 27 | parser.add_argument('--model_file', type=str, 28 | help='pretrained model file') 29 | parser.add_argument('--seed', type=int, default=1, 30 | help='random seed') 31 | parser.add_argument('--hierarchical', action='store_true', default=False, 32 | help='use hierarchical model') 33 | parser.add_argument('--bsz', type=int, default=16, 34 | help='batch size') 35 | parser.add_argument('--cuda', action='store_true', default=False, 36 | help='use CUDA') 37 | args = parser.parse_args() 38 | 39 | device_id = utils.use_cuda(args.cuda) 40 | utils.set_seed(args.seed) 41 | 42 | corpus = data.WordCorpus(args.data, freq_cutoff=args.unk_threshold, verbose=True) 43 | model = utils.load_model(args.model_file) 44 | 45 | crit = Criterion(model.word_dict, device_id=device_id) 46 | sel_crit = Criterion(model.item_dict, device_id=device_id, 47 | bad_toks=['', '']) 48 | 49 | 50 | testset, testset_stats = corpus.test_dataset(args.bsz, device_id=device_id) 51 | test_loss, test_select_loss = 0, 0 52 | 53 | N = len(corpus.word_dict) 54 | for batch in testset: 55 | # run forward on the batch, produces output, hidden, target, 56 | # selection output and selection target 57 | out, hid, tgt, sel_out, sel_tgt = Engine.forward(model, batch, volatile=False) 58 | 59 | # compute LM and selection losses 60 | test_loss += tgt.size(0) * crit(out.view(-1, N), tgt).data[0] 61 | test_select_loss += sel_crit(sel_out, sel_tgt).data[0] 62 | 63 | test_loss /= testset_stats['nonpadn'] 64 | test_select_loss /= len(testset) 65 | print('testloss %.3f | testppl %.3f' % (test_loss, np.exp(test_loss))) 66 | print('testselectloss %.3f | testselectppl %.3f' % (test_select_loss, np.exp(test_select_loss))) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /dealornodeal/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/model/__init__.py -------------------------------------------------------------------------------- /dealornodeal/model/dialogue_state.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.dialogue_state import DialogueState as State 2 | 3 | class DialogueState(State): 4 | def __init__(self, agent, kb): 5 | super(DialogueState, self).__init__(agent, kb) 6 | self.proposal = [None, None] 7 | self.curr_proposal = None 8 | 9 | @property 10 | def my_proposal(self): 11 | return self.proposal[self.agent] 12 | 13 | @my_proposal.setter 14 | def my_proposal(self, proposal): 15 | self.proposal[self.agent] = proposal 16 | 17 | @property 18 | def partner_proposal(self): 19 | return self.proposal[self.partner] 20 | 21 | def update(self, agent, utterance): 22 | super(DialogueState, self).update(agent, utterance) 23 | if not utterance: 24 | return 25 | lf = utterance.lf 26 | if hasattr(lf, 'proposal') and lf.proposal is not None: 27 | self.proposal[agent] = lf.proposal 28 | self.curr_proposal = lf.proposal 29 | -------------------------------------------------------------------------------- /dealornodeal/model/manager.py: -------------------------------------------------------------------------------- 1 | from cocoa.model.manager import Manager as BaseManager 2 | 3 | class Manager(BaseManager): 4 | def available_actions(self, state): 5 | actions = super(Manager, self).available_actions(state) 6 | masked_actions = [] 7 | if state.curr_proposal is None: 8 | masked_actions.extend(['select', 'agree']) 9 | actions = [a for a in actions if not a in masked_actions] 10 | return actions 11 | 12 | def choose_action(self, state, context=None): 13 | action = super(Manager, self).choose_action(state, context) 14 | if action in ('select', 'agree') and not state.my_act in ('clarify', 'agree'): 15 | return 'clarify' 16 | if state.my_act == 'clarify': 17 | if state.partner_act in ('propose', 'insist'): 18 | if state.partner_proposal and state.partner_proposal != state.my_proposal: 19 | return 'propose' 20 | if state.partner_act == 'disagree': 21 | return 'propose' 22 | return 'select' 23 | return action 24 | -------------------------------------------------------------------------------- /dealornodeal/neural/__init__.py: -------------------------------------------------------------------------------- 1 | import onmt 2 | 3 | def get_data_generator(args, model_args, schema, test=False): 4 | from cocoa.core.scenario_db import ScenarioDB 5 | from cocoa.core.dataset import read_dataset 6 | from cocoa.core.util import read_json 7 | 8 | from core.scenario import Scenario 9 | from core.lexicon import Lexicon 10 | from preprocess import DataGenerator, Preprocessor 11 | import os.path 12 | 13 | # TODO: move this to dataset 14 | dataset = read_dataset(args, Scenario) 15 | 16 | mappings_path = model_args.mappings 17 | 18 | lexicon = Lexicon(schema.values['item']) 19 | preprocessor = Preprocessor(schema, lexicon, model_args.entity_encoding_form, 20 | model_args.entity_decoding_form, model_args.entity_target_form, 21 | model=model_args.model) 22 | 23 | if test: 24 | model_args.dropout = 0 25 | train, dev, test = None, None, dataset.test_examples 26 | else: 27 | train, dev, test = dataset.train_examples, dataset.test_examples, None 28 | data_generator = DataGenerator(train, dev, test, preprocessor, args, schema, mappings_path, 29 | cache=args.cache, ignore_cache=args.ignore_cache, 30 | num_context=model_args.num_context, 31 | batch_size=args.batch_size, 32 | model=model_args.model) 33 | 34 | return data_generator 35 | 36 | def check_model_args(args): 37 | if args.pretrained_wordvec: 38 | if isinstance(args.pretrained_wordvec, list): 39 | pretrained = args.pretrained_wordvec[0] 40 | else: 41 | pretrained = args.pretrained_wordvec 42 | with open(pretrained, 'r') as fin: 43 | pretrained_word_embed_size = len(fin.readline().strip().split()) - 1 44 | assert pretrained_word_embed_size == args.word_embed_size 45 | 46 | if args.context and args.context_encoder == 'bow': 47 | assert pretrained_word_embed_size == args.context_size 48 | 49 | if args.decoder == 'rnn-attn': 50 | assert args.attention_memory is not None 51 | 52 | if args.num_context > 0: 53 | assert not args.stateful 54 | 55 | assert args.temperature >= 0 56 | 57 | def make_model_mappings(model, mappings): 58 | if model == 'seq2lf': 59 | mappings['src_vocab'] = mappings['utterance_vocab'] 60 | mappings['tgt_vocab'] = mappings['lf_vocab'] 61 | else: 62 | mappings['src_vocab'] = mappings['utterance_vocab'] 63 | mappings['tgt_vocab'] = mappings['utterance_vocab'] 64 | return mappings 65 | 66 | def build_optim(opt, model, checkpoint): 67 | print('Making optimizer for training.') 68 | optim = onmt.Optim( 69 | opt.optim, opt.learning_rate, opt.max_grad_norm, 70 | model_size=opt.rnn_size) 71 | 72 | optim.set_parameters(model.parameters()) 73 | 74 | return optim 75 | -------------------------------------------------------------------------------- /dealornodeal/neural/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from cocoa.neural.models import NMTModel 4 | 5 | class NegotiationModel(NMTModel): 6 | 7 | def __init__(self, encoder, decoder, context_embedder, selectors, scene_settings, 8 | dropout, stateful=False): 9 | super(NegotiationModel, self).__init__(encoder, decoder, stateful=stateful) 10 | self.context_embedder = context_embedder 11 | self.kb_embedder = nn.Embedding(*scene_settings) 12 | self.dropout = dropout 13 | self.select_encoder = selectors['enc'] # 1 encoder 14 | self.select_decoders = selectors['dec'] # 6 decoders 15 | 16 | def forward(self, src, tgt, context, scene, lengths, dec_state=None, enc_state=None, tgt_lengths=None): 17 | # ---- ENCODING PROCESS ----- 18 | enc_final, enc_memory_bank = self.encoder(src, lengths, enc_state) 19 | # the memory bas are the RNN hidden states 20 | context_output, context_memory_bank = self.context_embedder(context) 21 | scene_memory_bank = self.kb_embedder(scene) 22 | 23 | # memory_banks are each (seq_len x batch_size x hidden_size) 24 | memory_banks = [enc_memory_bank, context_memory_bank, scene_memory_bank] 25 | 26 | # ---- DECODING PROCESS ---- 27 | enc_state = self.decoder.init_decoder_state(src, enc_memory_bank, enc_final) 28 | dec_state = enc_state if dec_state is None else dec_state 29 | outputs, dec_state, attns = self.decoder(tgt, memory_banks, 30 | dec_state, memory_lengths=lengths, lengths=tgt_lengths) 31 | 32 | return outputs, attns, dec_state 33 | -------------------------------------------------------------------------------- /dealornodeal/neural/symbols.py: -------------------------------------------------------------------------------- 1 | from cocoa.neural.symbols import Marker as BaseMarker 2 | 3 | # Facebook Negotiation 4 | class Marker(BaseMarker): 5 | # Sequence 6 | GO = '' 7 | 8 | # Actions 9 | SELECT = ' x y z; which corresponds to book, hat, ball") 8 | 9 | def send(self): 10 | message = raw_input() 11 | event = self.parse_input(message) 12 | return event 13 | 14 | def parse_input(self, message): 15 | """Parse user input from the command line. 16 | Args: message (str) 17 | Returns: Event 18 | """ 19 | raw_tokens = message.split() 20 | tokens = self.remove_nonprintable(raw_tokens) 21 | 22 | print tokens 23 | 24 | if len(tokens) >= 2 and tokens[0] == '', ''): 31 | tokens.append(w[1:-1].upper()) 32 | # Category markers 33 | elif len(w) > 2 and w[0] == '<' and w[-1] == '>': 34 | continue 35 | # De-tokenize 36 | elif (w in string.punctuation or "'" in w) and len(tokens) > 0: 37 | tokens[-1] += w 38 | else: 39 | tokens.append(w) 40 | return super(EvalData, cls).process_utterance(' '.join(tokens), role) 41 | 42 | @classmethod 43 | def valid_example(cls, example, num_context_utterances): 44 | last_utterance = example['prev_turns'][-1] 45 | if '': 25 | return self.select(self.kb.items[int(tokens[1])]) 26 | return self.message(message) 27 | 28 | def receive(self, event): 29 | print event.data 30 | -------------------------------------------------------------------------------- /mutualfriends/sessions/session.py: -------------------------------------------------------------------------------- 1 | from cocoa.sessions.session import Session as BaseSession 2 | from core.event import Event 3 | 4 | class Session(BaseSession): 5 | def select(self, item): 6 | """Select an item from the KB. 7 | 8 | Args: 9 | item ({attribute_name: attribute_value}) 10 | 11 | Returns: 12 | SelectionEvent 13 | 14 | """ 15 | return Event.SelectionEvent(self.agent, item, time=self.timestamp()) 16 | -------------------------------------------------------------------------------- /mutualfriends/systems/__init__.py: -------------------------------------------------------------------------------- 1 | from core.lexicon import Lexicon, add_lexicon_arguments 2 | from model.manager import Manager 3 | from model.generator import Templates, Generator 4 | from core.inverse_lexicon import InverseLexicon, DefaultInverseLexicon 5 | from rulebased_system import RulebasedSystem, add_rulebased_arguments 6 | from neural_system import NeuralSystem, add_neural_system_arguments 7 | from cmd_system import CmdSystem 8 | 9 | def add_system_arguments(parser): 10 | add_lexicon_arguments(parser) 11 | add_neural_system_arguments(parser) 12 | add_rulebased_arguments(parser) 13 | 14 | def get_system(name, args, schema=None, timed=False, model_path=None): 15 | if name in ('rulebased', 'neural'): 16 | lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words, lexicon_path=args.lexicon) 17 | if args.inverse_lexicon: 18 | realizer = InverseLexicon.from_file(args.inverse_lexicon) 19 | else: 20 | realizer = DefaultInverseLexicon() 21 | if name == 'rulebased': 22 | templates = Templates.from_pickle(args.templates) 23 | generator = Generator(templates) 24 | manager = Manager.from_pickle(args.policy) 25 | return RulebasedSystem(lexicon, generator, manager, timed) 26 | elif name == 'neural': 27 | assert args.model_path 28 | return NeuralSystem(schema, lexicon, args.model_path, args.fact_check, args.decoding, realizer=realizer) 29 | elif name == 'cmd': 30 | return CmdSystem() 31 | else: 32 | raise ValueError('Unknown system %s' % name) 33 | 34 | -------------------------------------------------------------------------------- /mutualfriends/systems/cmd_system.py: -------------------------------------------------------------------------------- 1 | from cocoa.systems.system import System as BaseSystem 2 | from sessions.cmd_session import CmdSession 3 | 4 | class CmdSystem(BaseSystem): 5 | def __init__(self): 6 | super(CmdSystem, self).__init__() 7 | 8 | @classmethod 9 | def name(cls): 10 | return 'cmd' 11 | 12 | def new_session(self, agent, kb): 13 | return CmdSession(agent, kb) 14 | -------------------------------------------------------------------------------- /mutualfriends/systems/heuristic_system.py: -------------------------------------------------------------------------------- 1 | __author__ = 'anushabala' 2 | from cocoa.core.systems.system import System 3 | from cocoa.core.sessions.mutualfriends.heuristic_session import HeuristicSession 4 | 5 | def add_heuristic_system_arguments(parser): 6 | parser.add_argument('--joint-facts', default=False, action='store_true', help='Generate joint attributes, e.g., hiking and philosophy') 7 | parser.add_argument('--ask', default=False, action='store_true', help='Ask questions, e.g., do you have ...') 8 | 9 | class HeuristicSystem(System): 10 | def __init__(self, joint_facts, ask): 11 | super(HeuristicSystem, self).__init__() 12 | # Control difficulty 13 | self.joint_facts = joint_facts 14 | self.ask = ask 15 | 16 | @classmethod 17 | def name(cls): 18 | return 'heuristic' 19 | 20 | def new_session(self, agent, kb): 21 | return HeuristicSession(agent, kb, self.joint_facts, self.ask) 22 | -------------------------------------------------------------------------------- /mutualfriends/systems/rulebased_system.py: -------------------------------------------------------------------------------- 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem, add_rulebased_arguments 2 | from sessions.rulebased_session import RulebasedSession 3 | 4 | class RulebasedSystem(BaseRulebasedSystem): 5 | 6 | def __init__(self, lexicon, generator, manager, timed_session, realizer=None): 7 | super(RulebasedSystem, self).__init__(lexicon, generator, manager, timed_session) 8 | self.realizer = realizer 9 | 10 | def _new_session(self, agent, kb, config): 11 | return RulebasedSession(agent, kb, self.lexicon, config, self.generator, self.manager, realizer=self.realizer) 12 | -------------------------------------------------------------------------------- /mutualfriends/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/__init__.py -------------------------------------------------------------------------------- /mutualfriends/web/app_params.json: -------------------------------------------------------------------------------- 1 | { 2 | "status_params": { 3 | "waiting": { 4 | "num_seconds": 180 5 | }, 6 | "chat": { 7 | "num_seconds": 300 8 | }, 9 | "finished": { 10 | "num_seconds": 15 11 | }, 12 | "redirected": { 13 | "num_seconds": -1 14 | }, 15 | "incomplete": { 16 | "num_seconds": -1 17 | }, 18 | "reporting": { 19 | "num_seconds": -1 20 | }, 21 | "survey": { 22 | "num_seconds": -1 23 | } 24 | }, 25 | 26 | "scenarios_json_file": "data/test-scenarios.json", 27 | "idle_timeout_num_seconds": 210, 28 | "connection_timeout_num_seconds": 3, 29 | "templates_dir": "web/templates", 30 | "images_base": "web/static/images", 31 | "end_survey": 1, 32 | "instructions": "web/templates/instructions.html", 33 | "task_title": "Who's Our Mutual Friend?", 34 | "icon": "handshake.jpg", 35 | "num_chats_per_scenario": {"rulebased": 1, "human": 1}, 36 | "debug": false, 37 | "models": { 38 | "rulebased": { 39 | "active": true, 40 | "type": "rulebased" 41 | } 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /mutualfriends/web/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/main/__init__.py -------------------------------------------------------------------------------- /mutualfriends/web/main/db_reader.py: -------------------------------------------------------------------------------- 1 | import json 2 | from cocoa.core.util import write_json 3 | from cocoa.web.main.db_reader import DatabaseReader as BaseDatabaseReader 4 | 5 | class DatabaseReader(BaseDatabaseReader): 6 | @classmethod 7 | def process_event_data(cls, action, data): 8 | if action == 'select': 9 | data = json.loads(data) 10 | return data 11 | 12 | # TODO: move this to cocoa. factor survey questions 13 | @classmethod 14 | def dump_surveys(cls, cursor, json_path): 15 | questions = ['humanlike', 'cooperative', 'comments'] 16 | 17 | cursor.execute('''SELECT * FROM survey''') 18 | logged_surveys = cursor.fetchall() 19 | survey_data = {} 20 | agent_types = {} 21 | 22 | for survey in logged_surveys: 23 | # todo this is pretty lazy - support variable # of questions per task eventually.. 24 | (userid, cid, _, q1, q2, comments) = survey 25 | responses = dict(zip(questions, [q1, q2, comments])) 26 | cursor.execute('''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''', (cid,)) 27 | chat_result = cursor.fetchone() 28 | agents = json.loads(chat_result[0]) 29 | agent_ids = json.loads(chat_result[1]) 30 | agent_types[cid] = agents 31 | if cid not in survey_data.keys(): 32 | survey_data[cid] = {0: {}, 1: {}} 33 | partner_idx = 0 if agent_ids['1'] == userid else 1 34 | survey_data[cid][partner_idx] = responses 35 | 36 | write_json([agent_types, survey_data], json_path) 37 | -------------------------------------------------------------------------------- /mutualfriends/web/main/utils.py: -------------------------------------------------------------------------------- 1 | from cocoa.web.main.utils import Messages as BaseMessages 2 | 3 | class Messages(BaseMessages): 4 | ChatCompleted = "Congratulations, you successfully completed the task!" 5 | ChatIncomplete = "Sorry, you weren't able to complete the task." 6 | Redirect = "Sorry, that chat did not meet our acceptance criteria." 7 | 8 | -------------------------------------------------------------------------------- /mutualfriends/web/static/img/handshake.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/static/img/handshake.jpg -------------------------------------------------------------------------------- /mutualfriends/web/templates/finished.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Thank You! 4 | 5 | 10 | 11 | 17 | 18 | 19 |
20 |

Who's Our Mutual Friend?

21 |

{{ finished_message }}

22 |
23 |
24 | {% if mturk_code is not none %} 25 |
26 |

Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}

27 | {% endif %} 28 |
29 | {% if visualize %} 30 |

Click here to visualize the dialogue you just completed

31 | {% endif %} 32 |
33 | 34 | 35 | -------------------------------------------------------------------------------- /mutualfriends/web/templates/instructions.html: -------------------------------------------------------------------------------- 1 |

2 | You and another user online have exactly one mutual friend in common. You know some attributes for each of your friends (like name, school, etc.). Your goal is to find the mutual friend using those attributes! 3 |

4 |

Instructions

5 |
    6 |
  • Please use natural sentences as much as possible. 7 |
      8 |
    • Do: three of my friends went to ut austin
    • 9 |
    • Don't do: 3 ut austin
    • 10 |
    11 |
  • 12 |
  • Avoid simply listing out any attributes (name, company, etc.) of your friends .
  • 13 |
  • Look at your list of friends at right.
  • 14 |
  • Use the chat box below to find out more about your partner's friends.
  • 15 |
  • Once you've found out who it is, select them by clicking Select in the first column.
  • 16 |
  • If you run out of time while doing the chat, we will still award you money for the HIT if you made a good effort to complete the task.
  • 17 |
  • Please select carefully. If you select a friend once, the website will not allow you to select another friend for 10 seconds.
  • 18 | 19 | 20 |
-------------------------------------------------------------------------------- /mutualfriends/web/templates/third_party_eval_finished.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Thank You! 4 | 9 | 10 | 11 |
12 |

Evaluating Dialogues

13 | {{ finished_message }} 14 |
15 |
16 |
17 | {% if mturk_code is not none %} 18 |
19 |

Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}

20 | {% endif %} 21 |
22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /mutualfriends/web/templates/visualize.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{ dialogue }} 4 | -------------------------------------------------------------------------------- /mutualfriends/web/views/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/views/__init__.py -------------------------------------------------------------------------------- /mutualfriends/web/views/action.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, request 2 | from cocoa.web.views.utils import userid, format_message 3 | from web.main.backend import get_backend 4 | 5 | action = Blueprint('action', __name__) 6 | 7 | @action.route('/_select_option/', methods=['GET']) 8 | def select(): 9 | backend = get_backend() 10 | selection_id = int(request.args.get('selection')) 11 | if selection_id == -1: 12 | return 13 | selected_item = backend.select(userid(), selection_id) 14 | 15 | ordered_item = backend.schema.get_ordered_item(selected_item) 16 | displayed_message = format_message("You selected: {}".format(", ".join([v[1] for v in ordered_item])), True) 17 | return jsonify(message=displayed_message) 18 | -------------------------------------------------------------------------------- /onmt/Utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def aeq(*args): 5 | """ 6 | Assert all arguments have the same value 7 | """ 8 | arguments = (arg for arg in args) 9 | first = next(arguments) 10 | assert all(arg == first for arg in arguments), \ 11 | "Not all arguments have the same value: " + str(args) 12 | 13 | 14 | def sequence_mask(lengths, max_len=None): 15 | """ 16 | Creates a boolean mask from sequence lengths. 17 | """ 18 | batch_size = lengths.numel() 19 | max_len = max_len or lengths.max() 20 | return (torch.arange(0, max_len) 21 | .type_as(lengths) 22 | .repeat(batch_size, 1) 23 | .lt(lengths.unsqueeze(1))) 24 | 25 | 26 | def use_gpu(opt): 27 | return (hasattr(opt, 'gpuid') and len(opt.gpuid) > 0) or \ 28 | (hasattr(opt, 'gpu') and opt.gpu > -1) 29 | -------------------------------------------------------------------------------- /onmt/__init__.py: -------------------------------------------------------------------------------- 1 | import onmt.io 2 | import onmt.translate 3 | import onmt.Models 4 | import onmt.Loss 5 | from onmt.Trainer import Trainer, Statistics 6 | from onmt.Optim import Optim 7 | 8 | # For flake8 compatibility 9 | __all__ = [onmt.Loss, onmt.Models, 10 | Trainer, Optim, Statistics, onmt.io, onmt.translate] 11 | -------------------------------------------------------------------------------- /onmt/io/__init__.py: -------------------------------------------------------------------------------- 1 | from onmt.io.IO import collect_feature_vocabs, make_features, \ 2 | collect_features, get_num_features, \ 3 | load_fields_from_vocab, get_fields, \ 4 | save_fields_to_vocab, build_dataset, \ 5 | build_vocab, merge_vocabs, OrderedIterator 6 | from onmt.io.DatasetBase import ONMTDatasetBase, PAD_WORD, BOS_WORD, \ 7 | EOS_WORD, UNK 8 | from onmt.io.TextDataset import TextDataset, ShardedTextCorpusIterator 9 | from onmt.io.ImageDataset import ImageDataset 10 | from onmt.io.AudioDataset import AudioDataset 11 | 12 | 13 | __all__ = [PAD_WORD, BOS_WORD, EOS_WORD, UNK, ONMTDatasetBase, 14 | collect_feature_vocabs, make_features, 15 | collect_features, get_num_features, 16 | load_fields_from_vocab, get_fields, 17 | save_fields_to_vocab, build_dataset, 18 | build_vocab, merge_vocabs, OrderedIterator, 19 | TextDataset, ImageDataset, AudioDataset, 20 | ShardedTextCorpusIterator] 21 | -------------------------------------------------------------------------------- /onmt/modules/AudioEncoder.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class AudioEncoder(nn.Module): 7 | """ 8 | A simple encoder convolutional -> recurrent neural network for 9 | audio input. 10 | 11 | Args: 12 | num_layers (int): number of encoder layers. 13 | bidirectional (bool): bidirectional encoder. 14 | rnn_size (int): size of hidden states of the rnn. 15 | dropout (float): dropout probablity. 16 | sample_rate (float): input spec 17 | window_size (int): input spec 18 | 19 | """ 20 | def __init__(self, num_layers, bidirectional, rnn_size, dropout, 21 | sample_rate, window_size): 22 | super(AudioEncoder, self).__init__() 23 | self.num_layers = num_layers 24 | self.num_directions = 2 if bidirectional else 1 25 | self.hidden_size = rnn_size 26 | 27 | self.layer1 = nn.Conv2d(1, 32, kernel_size=(41, 11), 28 | padding=(0, 10), stride=(2, 2)) 29 | self.batch_norm1 = nn.BatchNorm2d(32) 30 | self.layer2 = nn.Conv2d(32, 32, kernel_size=(21, 11), 31 | padding=(0, 0), stride=(2, 1)) 32 | self.batch_norm2 = nn.BatchNorm2d(32) 33 | 34 | input_size = int(math.floor((sample_rate * window_size) / 2) + 1) 35 | input_size = int(math.floor(input_size - 41) / 2 + 1) 36 | input_size = int(math.floor(input_size - 21) / 2 + 1) 37 | input_size *= 32 38 | self.rnn = nn.LSTM(input_size, rnn_size, 39 | num_layers=num_layers, 40 | dropout=dropout, 41 | bidirectional=bidirectional) 42 | 43 | def load_pretrained_vectors(self, opt): 44 | # Pass in needed options only when modify function definition. 45 | pass 46 | 47 | def forward(self, input, lengths=None): 48 | "See :obj:`onmt.modules.EncoderBase.forward()`" 49 | # (batch_size, 1, nfft, t) 50 | # layer 1 51 | input = self.batch_norm1(self.layer1(input[:, :, :, :])) 52 | 53 | # (batch_size, 32, nfft/2, t/2) 54 | input = F.hardtanh(input, 0, 20, inplace=True) 55 | 56 | # (batch_size, 32, nfft/2/2, t/2) 57 | # layer 2 58 | input = self.batch_norm2(self.layer2(input)) 59 | 60 | # (batch_size, 32, nfft/2/2, t/2) 61 | input = F.hardtanh(input, 0, 20, inplace=True) 62 | 63 | batch_size = input.size(0) 64 | length = input.size(3) 65 | input = input.view(batch_size, -1, length) 66 | input = input.transpose(0, 2).transpose(1, 2) 67 | 68 | output, hidden = self.rnn(input) 69 | 70 | return hidden, output 71 | -------------------------------------------------------------------------------- /onmt/modules/StackedRNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class StackedLSTM(nn.Module): 6 | """ 7 | Our own implementation of stacked LSTM. 8 | Needed for the decoder, because we do input feeding. 9 | """ 10 | def __init__(self, num_layers, input_size, rnn_size, dropout): 11 | super(StackedLSTM, self).__init__() 12 | self.dropout = nn.Dropout(dropout) 13 | self.num_layers = num_layers 14 | self.layers = nn.ModuleList() 15 | 16 | for i in range(num_layers): 17 | self.layers.append(nn.LSTMCell(input_size, rnn_size)) 18 | input_size = rnn_size 19 | 20 | def forward(self, input, hidden): 21 | h_0, c_0 = hidden 22 | h_1, c_1 = [], [] 23 | for i, layer in enumerate(self.layers): 24 | h_1_i, c_1_i = layer(input, (h_0[i], c_0[i])) 25 | input = h_1_i 26 | if i + 1 != self.num_layers: 27 | input = self.dropout(input) 28 | h_1 += [h_1_i] 29 | c_1 += [c_1_i] 30 | 31 | h_1 = torch.stack(h_1) 32 | c_1 = torch.stack(c_1) 33 | 34 | return input, (h_1, c_1) 35 | 36 | 37 | class StackedGRU(nn.Module): 38 | 39 | def __init__(self, num_layers, input_size, rnn_size, dropout): 40 | super(StackedGRU, self).__init__() 41 | self.dropout = nn.Dropout(dropout) 42 | self.num_layers = num_layers 43 | self.layers = nn.ModuleList() 44 | 45 | for i in range(num_layers): 46 | self.layers.append(nn.GRUCell(input_size, rnn_size)) 47 | input_size = rnn_size 48 | 49 | def forward(self, input, hidden): 50 | h_1 = [] 51 | for i, layer in enumerate(self.layers): 52 | h_1_i = layer(input, hidden[0][i]) 53 | input = h_1_i 54 | if i + 1 != self.num_layers: 55 | input = self.dropout(input) 56 | h_1 += [h_1_i] 57 | 58 | h_1 = torch.stack(h_1) 59 | return input, (h_1,) 60 | -------------------------------------------------------------------------------- /onmt/modules/StructuredAttention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.cuda 4 | from torch.autograd import Variable 5 | 6 | 7 | class MatrixTree(nn.Module): 8 | """Implementation of the matrix-tree theorem for computing marginals 9 | of non-projective dependency parsing. This attention layer is used 10 | in the paper "Learning Structured Text Representations." 11 | 12 | 13 | :cite:`DBLP:journals/corr/LiuL17d` 14 | """ 15 | def __init__(self, eps=1e-5): 16 | self.eps = eps 17 | super(MatrixTree, self).__init__() 18 | 19 | def forward(self, input): 20 | laplacian = input.exp() + self.eps 21 | output = input.clone() 22 | for b in range(input.size(0)): 23 | lap = laplacian[b].masked_fill( 24 | Variable(torch.eye(input.size(1)).cuda().ne(0)), 0) 25 | lap = -lap + torch.diag(lap.sum(0)) 26 | # store roots on diagonal 27 | lap[0] = input[b].diag().exp() 28 | inv_laplacian = lap.inverse() 29 | 30 | factor = inv_laplacian.diag().unsqueeze(1)\ 31 | .expand_as(input[b]).transpose(0, 1) 32 | term1 = input[b].exp().mul(factor).clone() 33 | term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone() 34 | term1[:, 0] = 0 35 | term2[0] = 0 36 | output[b] = term1 - term2 37 | roots_output = input[b].diag().exp().mul( 38 | inv_laplacian.transpose(0, 1)[0]) 39 | output[b] = output[b] + torch.diag(roots_output) 40 | return output 41 | 42 | 43 | if __name__ == "__main__": 44 | dtree = MatrixTree() 45 | q = torch.rand(1, 5, 5).cuda() 46 | marg = dtree.forward(Variable(q)) 47 | print(marg.sum(1)) 48 | -------------------------------------------------------------------------------- /onmt/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from onmt.modules.UtilClass import LayerNorm, Bottle, BottleLinear, \ 2 | BottleLayerNorm, BottleSoftmax, Elementwise 3 | from onmt.modules.Gate import context_gate_factory, ContextGate 4 | from onmt.modules.GlobalAttention import GlobalAttention 5 | from onmt.modules.ConvMultiStepAttention import ConvMultiStepAttention 6 | from onmt.modules.ImageEncoder import ImageEncoder 7 | from onmt.modules.AudioEncoder import AudioEncoder 8 | from onmt.modules.CopyGenerator import CopyGenerator, CopyGeneratorLossCompute 9 | from onmt.modules.StructuredAttention import MatrixTree 10 | from onmt.modules.Transformer import \ 11 | TransformerEncoder, TransformerDecoder, PositionwiseFeedForward 12 | from onmt.modules.Conv2Conv import CNNEncoder, CNNDecoder 13 | from onmt.modules.MultiHeadedAttn import MultiHeadedAttention 14 | from onmt.modules.StackedRNN import StackedLSTM, StackedGRU 15 | from onmt.modules.Embeddings import Embeddings, PositionalEncoding 16 | from onmt.modules.WeightNorm import WeightNormConv2d 17 | 18 | from onmt.Models import EncoderBase, MeanEncoder, StdRNNDecoder, \ 19 | RNNDecoderBase, InputFeedRNNDecoder, RNNEncoder, NMTModel 20 | 21 | from onmt.modules.SRU import check_sru_requirement 22 | can_use_sru = check_sru_requirement() 23 | if can_use_sru: 24 | from onmt.modules.SRU import SRU 25 | 26 | 27 | # For flake8 compatibility. 28 | __all__ = [EncoderBase, MeanEncoder, RNNDecoderBase, InputFeedRNNDecoder, 29 | RNNEncoder, NMTModel, 30 | StdRNNDecoder, ContextGate, GlobalAttention, ImageEncoder, 31 | PositionwiseFeedForward, PositionalEncoding, 32 | CopyGenerator, MultiHeadedAttention, 33 | LayerNorm, Bottle, BottleLinear, BottleLayerNorm, BottleSoftmax, 34 | TransformerEncoder, TransformerDecoder, Embeddings, Elementwise, 35 | MatrixTree, WeightNormConv2d, ConvMultiStepAttention, 36 | CNNEncoder, CNNDecoder, StackedLSTM, StackedGRU, 37 | context_gate_factory, CopyGeneratorLossCompute, AudioEncoder] 38 | 39 | if can_use_sru: 40 | __all__.extend([SRU, check_sru_requirement]) 41 | -------------------------------------------------------------------------------- /onmt/translate/__init__.py: -------------------------------------------------------------------------------- 1 | from onmt.translate.Translator import Translator 2 | from onmt.translate.Translation import Translation, TranslationBuilder 3 | from onmt.translate.Beam import Beam, GNMTGlobalScorer 4 | 5 | __all__ = [Translator, Translation, Beam, GNMTGlobalScorer, TranslationBuilder] 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.2.4 2 | whoosh==2.7.4 3 | ujson=1.35 4 | numpy=1.13.3 5 | matplotlib=2.0.2 6 | flask=0.12.2 7 | flask-socketio=2.8.5 8 | scipy=1.0.1 9 | scikit-learn==0.19.0 10 | datasketch==1.2.3 11 | langdetect==1.0.7 12 | -------------------------------------------------------------------------------- /scripts/analyze.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from cocoa.core.dataset import read_examples 4 | from cocoa.model.dialogue_parser import parse_example 5 | from cocoa.analysis.analyzer import Analyzer 6 | 7 | from core.scenario import Scenario 8 | from core.lexicon import Lexicon 9 | from model.generator import Templates 10 | from model.manager import Manager 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--lexicon', help='Path to pickled lexicon') 15 | parser.add_argument('--transcripts', nargs='*', help='JSON transcripts to extract templates') 16 | parser.add_argument('--max-examples', default=-1, type=int) 17 | parser.add_argument('--templates', help='Path to load templates') 18 | parser.add_argument('--policy', help='Path to load model') 19 | args = parser.parse_args() 20 | 21 | lexicon = Lexicon.from_pickle(args.lexicon) 22 | #templates = Templates.from_pickle(args.templates) 23 | templates = Templates() 24 | manager = Manager.from_pickle(args.policy) 25 | analyzer = Analyzer(lexicon) 26 | 27 | examples = read_examples(args.transcripts, args.max_examples, Scenario) 28 | 29 | parsed_dialogues = [] 30 | for example in examples: 31 | utterances = parse_example(example, lexicon, templates) 32 | parsed_dialogues.append(utterances) 33 | 34 | analyzer.example_stats(examples) 35 | analyzer.parser_stats(parsed_dialogues) 36 | #analyzer.manager_stats(manager) 37 | -------------------------------------------------------------------------------- /scripts/chat_to_scenarios.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from cocoa.core.util import read_json, write_json 3 | from cocoa.core.scenario_db import ScenarioDB 4 | from cocoa.core.schema import Schema 5 | from core.scenario import Scenario 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--chats') 9 | parser.add_argument('--scenarios') 10 | parser.add_argument('--max', type=int) 11 | args = parser.parse_args() 12 | 13 | chats = read_json(args.chats) 14 | scenarios = [] 15 | n = args.max or len(chats) 16 | for chat in chats[:n]: 17 | scenarios.append(Scenario.from_dict(None, chat['scenario'])) 18 | scenario_db = ScenarioDB(scenarios) 19 | write_json(scenario_db.to_dict(), args.scenarios) 20 | -------------------------------------------------------------------------------- /scripts/combine_json.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Combine different batches of data (transcripts.json and surveys.json). 3 | ''' 4 | 5 | import argparse 6 | import os 7 | from cocoa.core.util import read_json, write_json 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--paths', nargs='+', help='Paths to transcripts directories') 11 | parser.add_argument('--output', help='Output directory') 12 | args = parser.parse_args() 13 | 14 | all_chats = [] 15 | # survey data structure: [{}, {}] 16 | all_surveys = [{}, {}] 17 | 18 | for d in args.paths: 19 | transcript_file = os.path.join(d, 'transcripts/transcripts.json') 20 | survey_file = os.path.join(d, 'transcripts/surveys.json') 21 | 22 | chats = read_json(transcript_file) 23 | all_chats.extend(chats) 24 | 25 | surveys = read_json(survey_file) 26 | for i, s in enumerate(surveys): 27 | all_surveys[i].update(s) 28 | print "Combined data from {}".format(d) 29 | 30 | output_dir = args.output + '/transcripts' 31 | if not os.path.isdir(output_dir): 32 | os.makedirs(output_dir) 33 | write_json(all_chats, os.path.join(output_dir, 'transcripts.json')) 34 | write_json(all_surveys, os.path.join(output_dir, 'surveys.json')) 35 | -------------------------------------------------------------------------------- /scripts/split_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from cocoa.core.util import read_json, write_json 3 | import numpy as np 4 | from itertools import izip 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--example-paths', help='Transciprts paths', nargs='*', default=[]) 8 | parser.add_argument('--train-frac', help='Fraction of training examples', type=float, default=0.6) 9 | parser.add_argument('--test-frac', help='Fraction of test examples', type=float, default=0.2) 10 | parser.add_argument('--dev-frac', help='Fraction of dev examples', type=float, default=0.2) 11 | parser.add_argument('--output-path', help='Output path for splits') 12 | args = parser.parse_args() 13 | 14 | np.random.seed(0) 15 | json_data = ([], [], []) 16 | for path in args.example_paths: 17 | examples = read_json(path) 18 | folds = np.random.choice(3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac]) 19 | for ex, fold in izip(examples, folds): 20 | json_data[fold].append(ex) 21 | 22 | for fold, dataset in izip(('train', 'dev', 'test'), json_data): 23 | if len(dataset) > 0: 24 | write_json(dataset, '%s%s.json' % (args.output_path, fold)) 25 | -------------------------------------------------------------------------------- /scripts/split_transcripts.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from collections import defaultdict 4 | from src.core.util import read_json, write_json 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--transcripts', help='Path to transcritps of mixed partners') 8 | parser.add_argument('--output', help='Output directories') 9 | args = parser.parse_args() 10 | 11 | chats = read_json(args.transcripts) 12 | chats_by_agents = defaultdict(list) 13 | scenario_agents = defaultdict(set) 14 | for chat in chats: 15 | agents = chat['agents'] 16 | if agents['0'] == 'human': 17 | agents = (agents['0'], agents['1']) 18 | else: 19 | agents = (agents['1'], agents['0']) 20 | chats_by_agents[agents].append(chat) 21 | 22 | scenario_id = chat['scenario_uuid'] 23 | scenario_agents[scenario_id].add(agents) 24 | 25 | # Only keep scenarios with all 4 agents 26 | scenario_subset = set([s for s, a in scenario_agents.iteritems() if len(a) == 4]) 27 | print 'Number of scenarios:', len(scenario_subset) 28 | 29 | for agents, chats in chats_by_agents.iteritems(): 30 | chats = [c for c in chats if c['scenario_uuid'] in scenario_subset] 31 | print agents, len(chats) 32 | path = os.path.join(args.output, '%s_transcripts.json' % '-'.join(agents)) 33 | if not os.path.isdir(args.output): 34 | os.makedirs(args.output) 35 | write_json(chats, path) 36 | -------------------------------------------------------------------------------- /scripts/visualize_transcripts.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from cocoa.core.util import write_json 4 | from analysis.visualizer import Visualizer 5 | from analysis.html_visualizer import HTMLVisualizer 6 | 7 | if __name__ == '__main__': 8 | parser = ArgumentParser() 9 | parser.add_argument('--survey-transcripts', nargs='+', 10 | help='Path to directory containing evaluation transcripts') 11 | parser.add_argument('--dialogue-transcripts', nargs='+', 12 | help='Path to directory containing dialogue transcripts') 13 | parser.add_argument('--summary', default=False, action='store_true', 14 | help='Summarize human ratings') 15 | parser.add_argument('--html-visualize', action='store_true', 16 | help='Output html files') 17 | parser.add_argument('--outdir', default='.', help='Output dir') 18 | parser.add_argument('--stats', default='stats.json', 19 | help='Path to stats file') 20 | parser.add_argument('--partner', default=False, action='store_true', 21 | help='Whether this is from partner survey') 22 | parser.add_argument('--task', default='cl-neg', 23 | choices=['cl-neg','fb-neg', 'mutual', 'movies'], 24 | help='which task you are trying run') 25 | parser.add_argument('--worker-ids', nargs='+', 26 | help='Path to json file containing chat_id to worker_id mappings') 27 | parser.add_argument('--hist', default=False, action='store_true', 28 | help='Plot histgram of ratings') 29 | parser.add_argument('--survey-only', default=False, action='store_true', 30 | help='Only analyze dialogues with survey (completed)') 31 | parser.add_argument('--base-agent', default='human', 32 | help='Agent to compare against') 33 | 34 | HTMLVisualizer.add_html_visualizer_arguments(parser) 35 | args = parser.parse_args() 36 | 37 | visualizer = Visualizer(args.dialogue_transcripts, args.survey_transcripts) 38 | results = visualizer.compute_effectiveness(with_survey=args.survey_only, base_agent=args.base_agent) 39 | visualizer.print_results(results) 40 | 41 | if args.hist: 42 | visualizer.hist(question_scores, args.outdir, partner=args.partner) 43 | if args.worker_ids: 44 | visualizer.worker_stats() 45 | 46 | # TODO: move summary and hist to analyzer 47 | if args.summary: 48 | summary = visualizer.summarize() 49 | write_json(summary, args.stats) 50 | if args.html_output: 51 | visualizer.html_visualize(args.viewer_mode, args.html_output, 52 | css_file=args.css_file, img_path=args.img_path, 53 | worker_ids=visualizer.worker_ids) 54 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys 3 | 4 | setup(name='cocoa', 5 | version='0.1', 6 | description='platform for dialogue research', 7 | packages=find_packages(exclude=('scraper', 'scripts', 'mutualfriends', 'negotiation', 'test')), 8 | ) 9 | --------------------------------------------------------------------------------