├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── chat_viewer
    ├── chat.html
    ├── css
    │   └── my.css
    └── js
    │   └── my.js
├── cocoa
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── analyzer.py
    │   ├── html_visualizer.py
    │   ├── utils.py
    │   └── visualizer.py
    ├── core
    │   ├── __init__.py
    │   ├── controller.py
    │   ├── dataset.py
    │   ├── entity.py
    │   ├── event.py
    │   ├── kb.py
    │   ├── sample_utils.py
    │   ├── scenario_db.py
    │   ├── schema.py
    │   ├── tokenizer.py
    │   └── util.py
    ├── io
    │   ├── __init__.py
    │   └── utils.py
    ├── lib
    │   ├── __init__.py
    │   ├── bleu.py
    │   ├── logstats.py
    │   └── multi_bleu.py
    ├── model
    │   ├── __init__.py
    │   ├── counter.py
    │   ├── dialogue_parser.py
    │   ├── dialogue_state.py
    │   ├── generator.py
    │   ├── manager.py
    │   ├── ngram.py
    │   ├── parser.py
    │   ├── util.py
    │   └── vocab.py
    ├── neural
    │   ├── __init__.py
    │   ├── attention.py
    │   ├── beam.py
    │   ├── embeddings_to_torch.py
    │   ├── evaluator.py
    │   ├── generator.py
    │   ├── loss.py
    │   ├── models.py
    │   ├── rl_trainer.py
    │   ├── symbols.py
    │   ├── trainer.py
    │   ├── utterance.py
    │   └── vocab_builder.py
    ├── options.py
    ├── sessions
    │   ├── __init__.py
    │   ├── human_session.py
    │   ├── rulebased_session.py
    │   ├── session.py
    │   └── timed_session.py
    ├── systems
    │   ├── __init__.py
    │   ├── human_system.py
    │   ├── rulebased_system.py
    │   └── system.py
    ├── turk
    │   ├── __init__.py
    │   ├── eval_data.py
    │   ├── task.py
    │   ├── templates
    │   │   ├── compare_question.html
    │   │   ├── frame.html
    │   │   ├── multi_question.html
    │   │   └── question.html
    │   └── utils.py
    └── web
    │   ├── README.md
    │   ├── __init__.py
    │   ├── main
    │       ├── __init__.py
    │       ├── backend.py
    │       ├── db_reader.py
    │       ├── logger.py
    │       ├── states.py
    │       └── utils.py
    │   └── views
    │       ├── __init__.py
    │       ├── chat.py
    │       └── utils.py
├── craigslistbargain
    ├── README.md
    ├── analysis
    │   ├── __init__.py
    │   ├── analyze_strategy.py
    │   ├── dialogue.py
    │   ├── get_data_statistics.py
    │   ├── html_visualizer.py
    │   ├── speech_acts.py
    │   ├── utils.py
    │   └── visualizer.py
    ├── core
    │   ├── __init__.py
    │   ├── controller.py
    │   ├── event.py
    │   ├── kb.py
    │   ├── price_tracker.py
    │   ├── scenario.py
    │   └── tokenizer.py
    ├── data
    │   └── craigslist-schema.json
    ├── evaluate.py
    ├── main.py
    ├── model
    │   ├── __init__.py
    │   ├── dialogue_state.py
    │   ├── generator.py
    │   ├── manager.py
    │   ├── parser.py
    │   └── templates.py
    ├── neural
    │   ├── __init__.py
    │   ├── batcher.py
    │   ├── evaluator.py
    │   ├── generator.py
    │   ├── model_builder.py
    │   ├── models.py
    │   ├── preprocess.py
    │   ├── rl_trainer.py
    │   ├── symbols.py
    │   ├── trainer.py
    │   ├── utterance.py
    │   └── vocab_builder.py
    ├── onmt
    ├── options.py
    ├── parse_dialogue.py
    ├── reinforce.py
    ├── results.txt
    ├── scraper
    │   ├── data
    │   │   └── negotiation
    │   │   │   ├── craigslist_bike.json
    │   │   │   ├── craigslist_car.json
    │   │   │   ├── craigslist_electronics.json
    │   │   │   ├── craigslist_furniture.json
    │   │   │   ├── craigslist_housing.json
    │   │   │   └── craigslist_phone.json
    │   ├── scraper
    │   │   ├── __init__.py
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │   │   ├── __init__.py
    │   │   │   └── craigslist_spider.py
    │   └── scrapy.cfg
    ├── scripts
    │   ├── generate_scenarios.py
    │   ├── turk_eval.py
    │   └── visualize_transcripts.py
    ├── sessions
    │   ├── __init__.py
    │   ├── cmd_session.py
    │   ├── hybrid_session.py
    │   ├── neural_session.py
    │   ├── rulebased_session.py
    │   └── session.py
    ├── systems
    │   ├── __init__.py
    │   ├── cmd_system.py
    │   ├── hybrid_system.py
    │   ├── neural_system.py
    │   └── rulebased_system.py
    └── web
    │   ├── __init__.py
    │   ├── app_params.json
    │   ├── app_params_allsys.json
    │   ├── chat_app.py
    │   ├── main
    │       ├── __init__.py
    │       ├── backend.py
    │       ├── db_reader.py
    │       └── utils.py
    │   ├── static
    │       ├── css
    │       │   └── survey.css
    │       └── img
    │       │   └── handshake.jpg
    │   ├── templates
    │       ├── chat.html
    │       ├── craigslist-instructions.html
    │       ├── finished.html
    │       ├── report.html
    │       ├── task_survey.html
    │       ├── third_party_eval_finished.html
    │       ├── visualize.html
    │       └── waiting.html
    │   └── views
    │       ├── __init__.py
    │       └── action.py
├── data
    ├── turk
    │   └── sample_aws_config.json
    └── web
    │   ├── matchmaking-instructions.html
    │   ├── matchmaking_params.json
    │   └── negotiation
    │       ├── app_params.json
    │       ├── craiglist-instructions.html
    │       └── rent-instructions.html
├── dealornodeal
    ├── README.md
    ├── analysis
    │   ├── __init__.py
    │   ├── html_visualizer.py
    │   └── visualizer.py
    ├── core
    │   ├── __init__.py
    │   ├── controller.py
    │   ├── event.py
    │   ├── kb.py
    │   ├── lexicon.py
    │   ├── scenario.py
    │   └── tokenizer.py
    ├── data
    │   ├── bookhatball-schema.json
    │   ├── test.json
    │   ├── test.txt
    │   ├── train.json
    │   ├── train.txt
    │   ├── val.json
    │   └── val.txt
    ├── evaluate.py
    ├── fb_model
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── avg_rank.py
    │   ├── chat.py
    │   ├── config.py
    │   ├── data.py
    │   ├── dialog.py
    │   ├── domain.py
    │   ├── engine.py
    │   ├── eval_selfplay.py
    │   ├── metric.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── dialog_model.py
    │   │   └── modules.py
    │   ├── reinforce.py
    │   ├── selfplay.py
    │   ├── split.py
    │   ├── test.py
    │   ├── train.py
    │   ├── utils.py
    │   └── vis.py
    ├── main.py
    ├── model
    │   ├── __init__.py
    │   ├── dialogue_state.py
    │   ├── generator.py
    │   ├── manager.py
    │   └── parser.py
    ├── neural
    │   ├── __init__.py
    │   ├── batcher.py
    │   ├── evaluator.py
    │   ├── generator.py
    │   ├── model_builder.py
    │   ├── models.py
    │   ├── preprocess.py
    │   ├── rl_trainer.py
    │   ├── symbols.py
    │   ├── trainer.py
    │   ├── utterance.py
    │   └── vocab_builder.py
    ├── onmt
    ├── options.py
    ├── parse_dialogue.py
    ├── reinforce.py
    ├── scripts
    │   ├── analyze.py
    │   ├── create_scenarios.py
    │   ├── fb_data_to_int_scenario.py
    │   └── fb_data_to_json.py
    ├── sessions
    │   ├── __init__.py
    │   ├── cmd_session.py
    │   ├── hybrid_session.py
    │   ├── neural_session.py
    │   ├── rulebased_session.py
    │   └── session.py
    ├── systems
    │   ├── __init__.py
    │   ├── cmd_system.py
    │   ├── hybrid_system.py
    │   ├── neural_system.py
    │   └── rulebased_system.py
    ├── turk
    │   ├── __init__.py
    │   └── eval_data.py
    └── web
    │   ├── __init__.py
    │   ├── app_params.json
    │   ├── chat_app.py
    │   ├── main
    │       ├── __init__.py
    │       ├── backend.py
    │       ├── db_reader.py
    │       └── utils.py
    │   ├── static
    │       ├── css
    │       │   ├── chat.css
    │       │   └── survey.css
    │       └── img
    │       │   ├── ball.png
    │       │   ├── book.png
    │       │   ├── handshake.jpg
    │       │   └── hat.png
    │   ├── templates
    │       ├── chat.html
    │       ├── finished.html
    │       ├── instructions.html
    │       ├── report.html
    │       ├── task_survey.html
    │       ├── visualize.html
    │       └── waiting.html
    │   └── views
    │       ├── __init__.py
    │       └── action.py
├── environment.yml
├── mutualfriends
    ├── analysis
    │   ├── __init__.py
    │   ├── html_visualizer.py
    │   └── visualizer.py
    ├── core
    │   ├── __init__.py
    │   ├── controller.py
    │   ├── entity_ranker.py
    │   ├── event.py
    │   ├── inverse_lexicon.py
    │   ├── kb.py
    │   ├── lexicon.py
    │   ├── lexicon_utils.py
    │   ├── scenario.py
    │   └── tokenizer.py
    ├── data
    │   ├── common_words.txt
    │   ├── friends-schema-old.json
    │   ├── friends-schema.json
    │   ├── inverse_lexicon_data.txt
    │   └── schema.json
    ├── model
    │   ├── __init__.py
    │   ├── dialogue_state.py
    │   ├── encdec.py
    │   ├── evaluate.py
    │   ├── generator.py
    │   ├── graph.py
    │   ├── graph_embedder.py
    │   ├── graph_embedder_config.py
    │   ├── learner.py
    │   ├── manager.py
    │   ├── parser.py
    │   ├── preprocess.py
    │   └── rnn_cell.py
    ├── parse_dialogue.py
    ├── results.txt
    ├── scripts
    │   ├── analyze.py
    │   ├── compile_eval_results.py
    │   ├── dataset_statistics.py
    │   ├── eval_lexicon.py
    │   ├── generate_entity_ranker_data.py
    │   ├── generate_inverse_lexicon_data.py
    │   ├── generate_scenarios.py
    │   ├── generate_schema.py
    │   ├── get_data_statistics.py
    │   ├── get_eval_statistics.py
    │   ├── make_eval_result_plots.py
    │   ├── plot.py
    │   └── summarize_eval.py
    ├── sessions
    │   ├── __init__.py
    │   ├── cmd_session.py
    │   ├── heuristic_session.py
    │   ├── neural_session.py
    │   ├── rulebased_session.py
    │   └── session.py
    ├── systems
    │   ├── __init__.py
    │   ├── cmd_system.py
    │   ├── heuristic_system.py
    │   ├── neural_system.py
    │   └── rulebased_system.py
    └── web
    │   ├── __init__.py
    │   ├── app_params.json
    │   ├── chat_app.py
    │   ├── lexicon_annotator_app.py
    │   ├── main
    │       ├── __init__.py
    │       ├── backend.py
    │       ├── db_reader.py
    │       └── utils.py
    │   ├── static
    │       ├── css
    │       │   ├── bootstrap.min.css
    │       │   ├── single_task_lexicon.css
    │       │   ├── survey.css
    │       │   └── third_party_eval.css
    │       └── img
    │       │   └── handshake.jpg
    │   ├── templates
    │       ├── chat.html
    │       ├── finished.html
    │       ├── instructions.html
    │       ├── single_task_lexicon.html
    │       ├── survey.html
    │       ├── task_survey.html
    │       ├── third_party_eval.html
    │       ├── third_party_eval_finished.html
    │       ├── visualize.html
    │       └── waiting.html
    │   ├── third_party_backend.py
    │   ├── third_party_eval_app.py
    │   └── views
    │       ├── __init__.py
    │       └── action.py
├── onmt
    ├── Loss.py
    ├── ModelConstructor.py
    ├── Models.py
    ├── Optim.py
    ├── Trainer.py
    ├── Utils.py
    ├── __init__.py
    ├── io
    │   ├── AudioDataset.py
    │   ├── DatasetBase.py
    │   ├── IO.py
    │   ├── ImageDataset.py
    │   ├── TextDataset.py
    │   └── __init__.py
    ├── modules
    │   ├── AudioEncoder.py
    │   ├── Conv2Conv.py
    │   ├── ConvMultiStepAttention.py
    │   ├── CopyGenerator.py
    │   ├── Embeddings.py
    │   ├── Gate.py
    │   ├── GlobalAttention.py
    │   ├── ImageEncoder.py
    │   ├── MultiHeadedAttn.py
    │   ├── SRU.py
    │   ├── StackedRNN.py
    │   ├── StructuredAttention.py
    │   ├── Transformer.py
    │   ├── UtilClass.py
    │   ├── WeightNorm.py
    │   └── __init__.py
    └── translate
    │   ├── Beam.py
    │   ├── Translation.py
    │   ├── Translator.py
    │   └── __init__.py
├── requirements.txt
├── scripts
    ├── analyze.py
    ├── bot_bot_chat.py
    ├── chat_to_scenarios.py
    ├── combine_json.py
    ├── generate_dataset.py
    ├── split_dataset.py
    ├── split_transcripts.py
    ├── turk
    │   ├── accept_negotiation_hits.py
    │   ├── assign_qualification.py
    │   ├── award_time_bonuses.py
    │   └── eval_dialogue.py
    ├── visualize_transcripts.py
    └── web
    │   ├── cleanup_db.py
    │   └── dump_db.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | bin
 2 | web_output
 3 | output
 4 | scr
 5 | data
 6 | log
 7 | .ipynb_checkpoints
 8 | Makefile
 9 | aws_config.json
10 | cocoa.egg-info
11 | cocoa/tags
12 | *.pyc
13 | *.log
14 | *.pkl
15 | *.json
16 | *.png
17 | *.pdf
18 | cl*.sh
19 | aws_config.json
20 | *.DS_Store
21 | *.txt
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 2 | 
 3 | RUN apt-get update && apt-get install -y --no-install-recommends \
 4 |          build-essential \
 5 |          locales \
 6 |          cmake \
 7 |          git \
 8 |          curl \
 9 |          vim \
10 |          unzip \
11 |          ca-certificates \
12 |          libjpeg-dev \
13 |          libpng-dev \
14 |          libfreetype6-dev \
15 |          libxft-dev &&\
16 |      rm -rf /var/lib/apt/lists/*
17 | 
18 | 
19 | RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
20 |      chmod +x ~/miniconda.sh && \
21 |      ~/miniconda.sh -b -p /opt/conda && \
22 |      rm ~/miniconda.sh && \
23 |      /opt/conda/bin/conda install -y python=2.7 numpy pyyaml scipy ipython mkl mkl-include cython typing && \
24 |      /opt/conda/bin/conda install -y -c pytorch magma-cuda90 && \
25 |      /opt/conda/bin/conda clean -ya
26 | ENV PATH /opt/conda/bin:$PATH
27 | 
28 | RUN conda install -c pytorch pytorch=0.4.1 cuda90
29 | 
30 | RUN conda install flask=0.12.2=py27_0 && \
31 |     conda install flask-socketio=2.8.5=py27_0 && \
32 |     conda install nltk=3.2.4=py27_0 && \
33 |     conda install numpy=1.13.3=py27hdbf6ddf_4 && \
34 |     conda install pandas=0.20.3=py27_0 && \
35 |     conda install ujson=1.35=py27_0 && \
36 |     conda install decorator=4.1.2=py27_0 && \
37 |     conda install matplotlib=2.0.2=np113py27_0
38 | 
39 | RUN pip install future==0.16.0 && \
40 |     pip install nose==1.3.7 && \
41 |     pip install scikit-learn==0.19.0 && \
42 |     pip install sklearn==0.0 && \
43 |     pip install torchtext==0.2.1 && \
44 |     pip install visdom==0.1.6.1
45 | 
46 | RUN python -m nltk.downloader punkt && \
47 |     python -m nltk.downloader stopwords
48 | 
49 | RUN DUMMY3=${DUMMY3} git clone https://github.com/stanfordnlp/cocoa.git && \
50 |     cd cocoa && \
51 |     python setup.py develop
52 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Stanford NLP
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/chat_viewer/css/my.css:
--------------------------------------------------------------------------------
 1 | * {
 2 |     font-size: 1em;
 3 |     font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
 4 | }
 5 | #filter td {
 6 |     padding: 0 20px 5px 0;
 7 | }
 8 | .scenario {
 9 |     width: 100%;
10 |     display: inline-block;
11 |     text-align: left;
12 | }
13 | .scenario table {
14 |     border-collapse: collapse;
15 |     text-align: left;
16 | }
17 | .survey {
18 |     width: 100%;
19 | }
20 | .survey table {
21 |     border-collapse: collapse;
22 | }
23 | .survey table td, .scenario table td {
24 |     padding: 0 5px;
25 | }
26 | .survey .response {
27 |     padding: 0 0 20px 0;
28 | }
29 | .agentLabel {
30 |     padding: 10px;
31 | }
32 | .scenario td, .scenario th, .scenario tr, .survey td, .survey th, .survey tr {
33 |     border-bottom: 1px solid #ddd;
34 |     padding: 0 5px 0 0;
35 | }
36 | .divTitle {
37 |     padding: 20px 0px 10px 0px;
38 |     font-weight: bold;
39 |     margin-bottom: 10px;
40 | }
41 | .count-title {
42 |     margin: 5px 20px;
43 |     width: 30px;
44 |     display: inline;
45 |     font-weight: bold;
46 |     text-align: center;
47 | }
48 | .count {
49 |     margin: 5px 20px;
50 |     padding: 38px;
51 |     display: inline;
52 |     text-align: center;
53 | }
54 | .kb0 {
55 |     width: 48%;
56 |     float: left;
57 | }
58 | .kb1 {
59 |     width: 48%;
60 |     margin-left: 4%;
61 |     float: left;
62 | }
63 | .chatLog .chat {
64 |     width: auto;
65 |     border-collapse: collapse;
66 | }
67 | .chat td {
68 |     padding: 2px 15px 2px 15px;
69 | }
70 | .kb0 th {
71 |     background-color: #80ced6;
72 | }
73 | .chatLog .agent0 {
74 |     background-color: #80ced6;
75 | }
76 | .survey .response0 th {
77 |     background-color: #80ced6;
78 | }
79 | .kb1 th {
80 |     background-color: #ffef96;
81 | }
82 | .chatLog .agent1 {
83 |     background-color: #ffef96;
84 | }
85 | .survey .response1 th {
86 |     background-color: #ffef96;
87 | }
88 | 


--------------------------------------------------------------------------------
/cocoa/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'anushabala'
2 | 


--------------------------------------------------------------------------------
/cocoa/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/analysis/__init__.py


--------------------------------------------------------------------------------
/cocoa/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/core/__init__.py


--------------------------------------------------------------------------------
/cocoa/core/entity.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | class CanonicalEntity(namedtuple('CanonicalEntity', ['value', 'type'])):
 4 |     __slots__ = ()
 5 | 
 6 |     def __str__(self):
 7 |         return '[%s]' % str(self.value)
 8 | 
 9 | class Entity(namedtuple('Entity', ['surface', 'canonical'])):
10 |     __slots__ = ()
11 | 
12 |     @classmethod
13 |     def from_elements(cls, surface=None, value=None, type=None):
14 |         if value is None:
15 |             value = surface
16 |         return super(cls, Entity).__new__(cls, surface, CanonicalEntity(value, type))
17 | 
18 |     def __str__(self):
19 |         return '[%s|%s]' % (str(self.surface), str(self.canonical.value))
20 | 
21 | def is_entity(x):
22 |     return isinstance(x, Entity) or isinstance(x, CanonicalEntity)
23 | 


--------------------------------------------------------------------------------
/cocoa/core/event.py:
--------------------------------------------------------------------------------
 1 | class Event(object):
 2 |     """
 3 |     An atomic event of a dialogue, which could be someone talking or making a selection.
 4 | 
 5 |     Params:
 6 |     agent: The index of the agent triggering the event
 7 |     time: Time at which event occurred
 8 |     action: The action this event corresponds to ('select', 'message', ..)
 9 |     data: Any data that is part of the event
10 |     start_time: The time at which the event action was started (e.g. the time at which an agent starting typing a
11 |     message to send)
12 |     """
13 | 
14 |     decorative_events = ('join', 'leave', 'typing', 'eval')
15 | 
16 |     def __init__(self, agent, time, action, data, start_time=None, metadata=None):
17 |         self.agent = agent
18 |         self.time = time
19 |         self.action = action
20 |         self.data = data
21 |         self.start_time = start_time
22 |         self.metadata = metadata
23 | 
24 |     @staticmethod
25 |     def from_dict(raw):
26 |         return Event(raw['agent'], raw['time'], raw['action'], raw['data'], start_time=raw.get('start_time'), metadata=raw.get('metadata'))
27 | 
28 |     def to_dict(self):
29 |         return {'agent': self.agent, 'time': self.time, 'action': self.action, 'data': self.data,
30 |                 'start_time': self.start_time, 'metadata': self.metadata}
31 | 
32 |     @classmethod
33 |     def MessageEvent(cls, agent, data, time=None, start_time=None, metadata=None):
34 |         return cls(agent, time, 'message', data, start_time=start_time, metadata=metadata)
35 | 
36 |     @classmethod
37 |     def JoinEvent(cls, agent, userid=None, time=None):
38 |         return cls(agent, time, 'join', userid)
39 | 
40 |     @classmethod
41 |     def LeaveEvent(cls, agent, userid=None, time=None):
42 |         return cls(agent, time, 'leave', userid)
43 | 
44 |     @classmethod
45 |     def TypingEvent(cls, agent, data, time=None):
46 |         return cls(agent, time, 'typing', data)
47 | 
48 |     @classmethod
49 |     def EvalEvent(cls, agent, data, time):
50 |         return cls(agent, time, 'eval', data)
51 | 
52 |     @staticmethod
53 |     def gather_eval(events):
54 |         event_dict = {e.time: e for e in events if e.action != 'eval'}
55 |         for e in events:
56 |             if e.action == 'eval':
57 |                 event_dict[e.time].tags = [k for k, v in e.data['labels'].iteritems() if v != 0]
58 |             else:
59 |                 event_dict[e.time].tags = []
60 |         events_with_eval = [v for k, v in sorted(event_dict.iteritems(), key=lambda x: x[0])]
61 |         return events_with_eval
62 | 


--------------------------------------------------------------------------------
/cocoa/core/kb.py:
--------------------------------------------------------------------------------
 1 | class KB(object):
 2 |     '''
 3 |     Represents an agent's knowledge.
 4 |     '''
 5 |     def __init__(self, attributes):
 6 |         self.attributes = attributes
 7 | 
 8 |     def dump(self):
 9 |         raise NotImplementedError
10 | 


--------------------------------------------------------------------------------
/cocoa/core/sample_utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import math
 4 | 
 5 | def normalize_weights(weights):
 6 |     '''
 7 |     [3, 2] => [0.6, 0.4]
 8 |     '''
 9 |     if len(weights) == 0:
10 |         return []
11 |     s = sum(weights)
12 |     if s == 0:
13 |         print 'WARNING: zero normalization'
14 |         return weights
15 |     return [1.0 * weight / s for weight in weights]
16 | 
17 | def exp_normalize_weights(weights):
18 |     m = max(weights)
19 |     weights = [math.exp(w - m) for w in weights]  # Ensure no underflow
20 |     return normalize_weights(weights)
21 | 
22 | def normalize_candidates(candidates):
23 |     '''
24 |     [('a', 2), ('b', 8)] => [('a', 0.2), ('b', 0.8)]
25 |     '''
26 |     s = sum([weight for token, weight in candidates])
27 |     return [(k, weight / s) for k, weight in candidates]
28 | 
29 | #def sample_candidates(candidates):
30 | #    '''
31 | #    [('a', 2), ('b', 8)] => 'a' or 'b'
32 | #    '''
33 | #    weights = [weight for token, weight in candidates]
34 | #    sums = numpy.array(weights).cumsum()
35 | #    i = sums.searchsorted(random.random() * sums[-1])
36 | #    return candidates[i]
37 | 
38 | def sorted_candidates(candidates):
39 |     '''
40 |     [('a', 2), ('b', 8)] => [('b', 8), ('a', 2)]
41 |     '''
42 |     return sorted(candidates, key=lambda (token, weight) : weight, reverse=True)
43 | 
44 | def softmax(x):
45 |     """Compute softmax values for each sets of scores in x."""
46 |     return np.exp(x) / np.sum(np.exp(x), axis=0)
47 | 
48 | def sample_candidates(candidates, n=1):
49 |     n = min(n, len(candidates))
50 |     weights = softmax([weight for value, weight in candidates])
51 |     values = [value for value, weight in candidates]
52 |     samples = np.random.choice(range(len(values)), n, replace=False, p=weights)
53 |     return [values[i] for i in samples]
54 | 


--------------------------------------------------------------------------------
/cocoa/core/scenario_db.py:
--------------------------------------------------------------------------------
 1 | class Scenario(object):
 2 |     '''
 3 |     A scenario represents a situation to be played out where each agent has a KB.
 4 |     '''
 5 |     def __init__(self, uuid, attributes, kbs):
 6 |         self.uuid = uuid
 7 |         self.attributes = attributes
 8 |         self.kbs = kbs
 9 | 
10 |     @staticmethod
11 |     def from_dict(schema, raw):
12 |         raise NotImplementedError
13 | 
14 |     def to_dict(self):
15 |         return {'uuid': self.uuid,
16 |                 'attributes': [attr.to_json() for attr in self.attributes],
17 |                 'kbs': [kb.to_dict() for kb in self.kbs]
18 |                 }
19 | 
20 |     def get_kb(self, agent):
21 |         return self.kbs[agent]
22 | 
23 | 
24 | class ScenarioDB(object):
25 |     '''
26 |     Consists a list of scenarios (specifies the pair of KBs).
27 |     '''
28 |     def __init__(self, scenarios_list):
29 |         self.scenarios_list = scenarios_list  # Keep things in order
30 |         self.scenarios_map = {}  # Map from uuid to scenario
31 |         self.selected_scenarios = set()
32 |         for scenario in scenarios_list:
33 |             self.scenarios_map[scenario.uuid] = scenario
34 |         self.size = len(self.scenarios_map)
35 | 
36 |     def get(self, uuid):
37 |         return self.scenarios_map[uuid]
38 | 
39 |     def select_random(self, exclude_seen=True):
40 |         scenarios = set(self.scenarios_map.keys())
41 | 
42 |         if exclude_seen:
43 |             scenarios = scenarios - self.selected_scenarios
44 |             if len(scenarios) == 0:
45 |                 scenarios = set(self.scenarios_map.keys())
46 |                 self.selected_scenarios = set()
47 |         uuid = np.random.choice(list(scenarios))
48 | 
49 |         return self.scenarios_map[uuid]
50 | 
51 |     @staticmethod
52 |     def from_dict(schema, raw, scenario_class):
53 |         return ScenarioDB([scenario_class.from_dict(schema, s) for s in raw])
54 | 
55 |     def to_dict(self):
56 |         return [s.to_dict() for s in self.scenarios_list]
57 | 


--------------------------------------------------------------------------------
/cocoa/core/schema.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A schema specifies information about a domain (types, entities, relations).
 3 | '''
 4 | 
 5 | import json
 6 | from itertools import izip
 7 | 
 8 | 
 9 | class Attribute(object):
10 |     def __init__(self, name, value_type, unique=False, multivalued=False, entity=True):
11 |         self.name = name
12 |         self.value_type = value_type
13 |         self.unique = unique
14 |         self.multivalued = multivalued
15 |         # Whether the value of this attribute is an entity
16 |         self.entity = entity
17 | 
18 |     @staticmethod
19 |     def from_json(raw):
20 |         return Attribute(raw['name'], raw['value_type'], raw.get('unique', False), raw.get('multivalued', False), raw.get('entity', True))
21 | 
22 |     def to_json(self):
23 |         return {'name': self.name, 'value_type': self.value_type, 'unique': self.unique, 'multivalued': self.multivalued, 'entity': self.entity}
24 | 
25 | 
26 | class Schema(object):
27 |     '''
28 |     A schema contains information about possible entities and relations.
29 |     '''
30 |     def __init__(self, path, domain=None):
31 |         raw = json.load(open(path))
32 |         # Mapping from type (e.g., hobby) to list of values (e.g., hiking)
33 |         values = raw['values']
34 |         # List of attributes (e.g., place_of_birth)
35 |         attributes = [Attribute.from_json(a) for a in raw['attributes']]
36 |         self.attr_names = [attr.name for attr in attributes]
37 | 
38 |         self.values = values
39 |         self.attributes = attributes
40 |         self.domain = domain
41 | 
42 |     def get_attributes(self):
43 |         '''
44 |         Return a dict {name: value_type} of all attributes.
45 |         '''
46 |         return {attr.name: attr.value_type for attr in self.attributes}
47 | 
48 |     def get_ordered_attribute_subset(self, attribute_subset):
49 |         """
50 |         Order a subset of this schema's attributes using the original order of attributes in the schema.
51 |         attribute_subset: A list containing the names of the attributes present in the subset
52 |         :return The same list, preserving the original order of attributes in this schema
53 |         """
54 | 
55 |         subset_ordered = sorted([(attr, self.attributes.index(attr)) for attr in attribute_subset], key=lambda x: x[1])
56 | 
57 |         return [x[0] for x in subset_ordered]
58 | 
59 |     def get_ordered_item(self, item):
60 |         '''
61 |         Order attrs in item according to get_ordered_attribute_subset and return a list.
62 |         '''
63 |         ordered_item = []
64 |         for name in self.attr_names:
65 |             try:
66 |                 ordered_item.append((name, item[name]))
67 |             except KeyError:
68 |                 continue
69 |         return ordered_item
70 | 


--------------------------------------------------------------------------------
/cocoa/core/tokenizer.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | nltk.download('punkt')
 3 | from nltk.tokenize import word_tokenize
 4 | from nltk.tokenize.moses import MosesDetokenizer
 5 | 
 6 | detokenizer = MosesDetokenizer()
 7 | 
 8 | def detokenize(tokens):
 9 |     return detokenizer.detokenize(tokens, return_str=True)
10 | 
11 | def tokenize(utterance, lowercase=True):
12 |     if lowercase:
13 |         utterance = utterance.lower()
14 |     tokens = word_tokenize(utterance)
15 |     return tokens
16 | 


--------------------------------------------------------------------------------
/cocoa/core/util.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import ujson as json
 3 | import string
 4 | import cPickle as pickle
 5 | import numpy as np
 6 | 
 7 | def random_multinomial(probs):
 8 |     target = random.random()
 9 |     i = 0
10 |     accum = 0
11 |     while True:
12 |         accum += probs[i]
13 |         if accum >= target:
14 |             return i
15 |         i += 1
16 | 
17 | def generate_uuid(prefix):
18 |     return prefix + '_' + ''.join([random.choice(string.digits + string.letters) for _ in range(16)])
19 | 
20 | def read_json(path):
21 |     return json.load(open(path))
22 | 
23 | def write_json(raw, path):
24 |     with open(path, 'w') as out:
25 |         print >>out, json.dumps(raw)
26 | 
27 | def read_pickle(path):
28 |     with open(path, 'rb') as fin:
29 |         return pickle.load(fin)
30 | 
31 | def write_pickle(obj, path):
32 |     with open(path, 'wb') as fout:
33 |         pickle.dump(obj, fout)
34 | 
35 | def normalize(a):
36 |     ma = np.max(a)
37 |     mi = np.min(a)
38 |     assert ma > mi
39 |     a = (a - mi) / (ma - mi)
40 |     return a
41 | 


--------------------------------------------------------------------------------
/cocoa/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/io/__init__.py


--------------------------------------------------------------------------------
/cocoa/io/utils.py:
--------------------------------------------------------------------------------
 1 | """Basic IO utils.
 2 | """
 3 | 
 4 | import os
 5 | import ujson as json
 6 | import cPickle as pickle
 7 | 
 8 | def create_path(path):
 9 |     dirname = os.path.dirname(path)
10 |     if not os.path.isdir(dirname):
11 |         os.makedirs(dirname)
12 | 
13 | def read_json(path):
14 |     try:
15 |         return json.load(open(path))
16 |     except:
17 |         raise Exception('Error reading JSON from %s' % path)
18 | 
19 | def write_json(raw, path, ensure_path=False):
20 |     if ensure_path:
21 |         create_path(path)
22 |     with open(path, 'w') as out:
23 |         print >>out, json.dumps(raw)
24 | 
25 | def read_pickle(path):
26 |     with open(path, 'rb') as fin:
27 |         return pickle.load(fin)
28 | 
29 | def write_pickle(obj, path, ensure_path=False):
30 |     if ensure_path:
31 |         create_path(path)
32 |     with open(path, 'wb') as fout:
33 |         pickle.dump(obj, fout)
34 | 
35 | 


--------------------------------------------------------------------------------
/cocoa/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/lib/__init__.py


--------------------------------------------------------------------------------
/cocoa/lib/bleu.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copied from https://github.com/anoopsarkar/nlp-class-hw/blob/master/reranker/bleu.py
 3 | 
 4 | '''
 5 | 
 6 | import math
 7 | from collections import Counter
 8 | 
 9 | 
10 | # Collect BLEU-relevant statistics for a single sentence/reference pair.
11 | # Return value is a generator yielding:
12 | # (c, r, numerator1, denominator1, ... numerator4, denominator4)
13 | # Summing the columns across calls to this function on an entire corpus will
14 | # produce a vector of statistics that can be used to compute BLEU (below)
15 | def bleu_stats(sentence, reference):
16 |     yield len(sentence)
17 |     yield len(reference)
18 |     for n in xrange(1,5):
19 |         s_ngrams = Counter([tuple(sentence[i:i+n]) for i in xrange(len(sentence)+1-n)])
20 |         r_ngrams = Counter([tuple(reference[i:i+n]) for i in xrange(len(reference)+1-n)])
21 |         yield max([sum((s_ngrams & r_ngrams).values()), 0])
22 |         yield max([len(sentence)+1-n, 0])
23 | 
24 | 
25 | # Compute BLEU from collected statistics obtained by call(s) to bleu_stats
26 | def bleu(stats):
27 |     if len(filter(lambda x: x==0, stats)) > 0:
28 |         return 0
29 |     (c, r) = stats[:2]
30 |     bleu_prec = sum([math.log(float(x)/y) for x,y in zip(stats[2::2],stats[3::2])])
31 |     return math.exp(min([0, 1-float(r)/c]) + 0.25 * bleu_prec)
32 | 
33 | 
34 | # A modification of BLEU that returns a positive value even when some
35 | # higher-order precisions are zero. From Liang et al. 2006 (Footnote 5):
36 | # http://aclweb.org/anthology-new/P/P06/P06-1096.pdf
37 | def smoothed_bleu(stats):
38 |     return sum([bleu(stats[:2+2*i])/math.pow(2,4-i+1) for i in xrange(1,5)])
39 | 
40 | 
41 | # Returns either smoothed or raw BLEU
42 | def compute_bleu(candidate, reference, smoothed=True):
43 |     stats = [0 for i in xrange(10)]
44 |     stats = [sum(scores) for scores in zip(stats, bleu_stats(candidate,reference))]
45 |     if smoothed:
46 |         return smoothed_bleu(stats)
47 |     else:
48 |         return bleu(stats)
49 | 
50 | 
51 | def test():
52 |     candidate = 'a b c'
53 |     candidate = candidate.split()
54 |     reference = 'a b c'
55 |     reference = reference.split()
56 | 
57 |     print compute_bleu(candidate, reference)
58 | 
59 | if __name__ == "__main__":
60 |     test()
61 | 


--------------------------------------------------------------------------------
/cocoa/lib/logstats.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from cocoa.core.util import read_json
 3 | 
 4 | # Global statistics that we can output to monitor the run.
 5 | 
 6 | stats_path = None
 7 | STATS = {}
 8 | 
 9 | def init(path, verbose=False):
10 |     global stats_path, STATS
11 |     stats_path = path
12 |     try:
13 |         STATS = read_json(stats_path)
14 |         if verbose:
15 |             print("Stats file loaded from {}".format(stats_path))
16 |     except Exception:
17 |         STATS = {}
18 |         if verbose:
19 |             print("New stats file created, will be stored in {}".format(stats_path))
20 | 
21 | def add(*args):
22 |     # Example: add_stats('data', 'num_examples', 3)
23 |     s = STATS
24 |     prefix = args[:-2]
25 |     for k in prefix:
26 |         if k not in s:
27 |             s[k] = {}
28 |         s = s[k]
29 |     s[args[-2]] = args[-1]
30 |     flush()
31 | 
32 | def add_args(key, args):
33 |     add(key, dict((arg, getattr(args, arg)) for arg in vars(args)))
34 | 
35 | def update(stats):
36 |     for k in stats:
37 |         STATS[k] = stats[k]
38 |     flush()
39 | 
40 | def flush():
41 |     if stats_path:
42 |         out = open(stats_path, 'w')
43 |         print >>out, json.dumps(STATS)
44 |         out.close()
45 | 
46 | ############################################################
47 | 
48 | # summary: {'mean': ...}
49 | # summary_map: {key: summary}
50 | 
51 | def summary_to_str(s):
52 |     return '%g / %g / %g (%g)' % (s['min'], s['mean'], s['max'], s['count'])
53 | 
54 | def summary_map_to_str(m):
55 |     return ' '.join('%s=%g' % (k, s['mean'] if isinstance(s, dict) else s) for k, s in sorted(m.items()))
56 | 
57 | def update_summary_map(m1, m2):
58 |     for k, s in m2.items():
59 |         if k not in m1:
60 |             m1[k] = {}
61 |         update_summary(m1[k], s)
62 | 
63 | def update_summary(s1, s2):
64 |     if isinstance(s2, dict):
65 |         s1['min'] = min(s1.get('min', s2['min']), s2['min'])
66 |         s1['max'] = max(s1.get('max', s2['max']), s2['max'])
67 |         s1['sum'] = s1.get('sum', 0) + s2['sum']
68 |         s1['count'] = s1.get('count', 0) + s2['count']
69 |     else:
70 |         s1['min'] = min(s1.get('min', s2), s2)
71 |         s1['max'] = max(s1.get('max', s2), s2)
72 |         s1['sum'] = s1.get('sum', 0) + s2
73 |         s1['count'] = s1.get('count', 0) + 1
74 |     s1['mean'] = 1.0 * s1['sum'] / s1['count']
75 | 
76 | def dump_summary_map(m):
77 |     for k, s in m.items():
78 |         print k, '=', summary_to_str(s)
79 | 


--------------------------------------------------------------------------------
/cocoa/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/model/__init__.py


--------------------------------------------------------------------------------
/cocoa/model/dialogue_parser.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from model.parser import Parser
 4 | from model.dialogue_state import DialogueState
 5 | 
 6 | def parse_example(example, lexicon, templates=None):
 7 |     """Parse example and collect templates.
 8 |     """
 9 |     kbs = example.scenario.kbs
10 |     parsers = [Parser(agent, kbs[agent], lexicon) for agent in (0, 1)]
11 |     states = [DialogueState(agent, kbs[agent]) for agent in (0, 1)]
12 |     # Add init utterance <start>
13 |     parsed_utterances = [states[0].utterance[0], states[1].utterance[1]]
14 |     for event in example.events:
15 |         writing_agent = event.agent  # Speaking agent
16 |         reading_agent = 1 - writing_agent
17 | 
18 |         received_utterance = parsers[reading_agent].parse(event, states[reading_agent])
19 |         if received_utterance:
20 |             sent_utterance = copy.deepcopy(received_utterance)
21 |             if sent_utterance.tokens:
22 |                 sent_utterance.template = parsers[writing_agent].extract_template(sent_utterance.tokens, states[writing_agent])
23 | 
24 |             if templates is not None:
25 |                 templates.add_template(sent_utterance, states[writing_agent])
26 |             received_utterance.agent = example.agents[writing_agent]
27 |             parsed_utterances.append(received_utterance)
28 | 
29 |             # Update states
30 |             states[reading_agent].update(writing_agent, received_utterance)
31 |             states[writing_agent].update(writing_agent, sent_utterance)
32 |     return parsed_utterances
33 | 


--------------------------------------------------------------------------------
/cocoa/model/dialogue_state.py:
--------------------------------------------------------------------------------
 1 | from parser import LogicalForm as LF, Utterance
 2 | 
 3 | class DialogueState(object):
 4 |     def __init__(self, agent, kb):
 5 |         self.agent = agent
 6 |         self.partner = 1 - agent
 7 |         self.kb = kb
 8 |         self.time = 0
 9 |         init_utterance = Utterance(logical_form=LF('<start>'), template=['<start>'])
10 |         self.utterance = [init_utterance, init_utterance]
11 |         self.done = set()
12 | 
13 |     @property
14 |     def my_act(self):
15 |         return self.utterance[self.agent].lf.intent
16 | 
17 |     @property
18 |     def partner_act(self):
19 |         return self.utterance[self.partner].lf.intent
20 | 
21 |     @property
22 |     def partner_utterance(self):
23 |         return self.utterance[self.partner]
24 | 
25 |     @property
26 |     def partner_template(self):
27 |         try:
28 |             return self.utterance[self.partner].template
29 |         except:
30 |             return None
31 | 
32 |     def update(self, agent, utterance):
33 |         if not utterance:
34 |             return
35 |         self.time += 1
36 |         self.utterance[agent] = utterance
37 |         if agent == self.agent:
38 |             self.done.add(utterance.lf.intent)
39 | 
40 | 


--------------------------------------------------------------------------------
/cocoa/model/manager.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.util import read_pickle, write_pickle
 2 | from cocoa.model.counter import build_vocabulary, count_ngrams
 3 | from cocoa.model.ngram import MLENgramModel
 4 | from cocoa.model.util import entropy
 5 | 
 6 | class Manager(object):
 7 |     def __init__(self, model, actions):
 8 |         self.model = model
 9 |         self.actions = actions
10 | 
11 |     @classmethod
12 |     def from_train(cls, sequences, n=3):
13 |         vocab = build_vocabulary(1, *sequences)
14 |         counter = count_ngrams(n, vocab, sequences, pad_left=True, pad_right=False)
15 |         model = MLENgramModel(counter)
16 |         actions = vocab.keys()
17 |         #print model.score('init-price', ('<start>',))
18 |         #print model.ngrams.most_common(10)
19 |         return cls(model, actions)
20 | 
21 |     def available_actions(self, state):
22 |         actions = [a for a in self.actions if a != 'unknown']
23 |         return actions
24 | 
25 |     def most_likely_action(self, context, freqdist):
26 |         best_action = max(freqdist, key=lambda x: x[1])[0]
27 |         return best_action
28 | 
29 |     def min_entropy_action(self, context, freqdist):
30 |         ent = []
31 |         for a, _ in freqdist:
32 |             c = (context[-1], a)
33 |             f = self.model.freqdist(c)
34 |             e = entropy([x[1] for x in f], normalized=False)
35 |             ent.append((a, e))
36 |         best_action = min(ent, key=lambda x: x[1])[0]
37 |         return best_action
38 | 
39 |     def choose_action(self, state, context=None):
40 |         if not context:
41 |             context = (state.my_act, state.partner_act)
42 |         freqdist = self.model.freqdist(context)
43 |         actions = self.available_actions(state)
44 |         freqdist = [x for x in freqdist if x[0] in actions]
45 |         # TODO: backoff
46 |         if len(freqdist) == 0:
47 |             return None
48 |         best_action = max(freqdist, key=lambda x: x[1])[0]
49 |         print 'context:', context
50 |         #print 'dist:', freqdist
51 |         print 'available actions:', actions
52 |         print 'action:', best_action
53 |         return best_action
54 | 
55 |     def save(self, output):
56 |         data = {'model': self.model, 'actions': self.actions}
57 |         write_pickle(data, output)
58 | 
59 |     @classmethod
60 |     def from_pickle(cls, path):
61 |         data = read_pickle(path)
62 |         return cls(data['model'], data['actions'])
63 | 


--------------------------------------------------------------------------------
/cocoa/model/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | EPS = 1e-12
 4 | 
 5 | def safe_div(numerator, denominator):
 6 |     return numerator / (denominator + EPS)
 7 | 
 8 | def entropy(p, normalized=True):
 9 |     p = np.array(p, dtype=np.float32)
10 |     if not normalized:
11 |         p /= np.sum(p)
12 |     ent = -1. * np.sum(p * np.log(p))
13 |     return ent
14 | 


--------------------------------------------------------------------------------
/cocoa/neural/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/neural/__init__.py


--------------------------------------------------------------------------------
/cocoa/neural/beam.py:
--------------------------------------------------------------------------------
 1 | from onmt.translate.Beam import Beam
 2 | 
 3 | 
 4 | class Scorer(object):
 5 |     """
 6 |     Re-ranking score.
 7 |     """
 8 |     def __init__(self, length_alpha):
 9 |         self.alpha = length_alpha
10 | 
11 |     def score(self, beam, logprobs):
12 |         """
13 |         Additional term add to log probability
14 |         See https://arxiv.org/pdf/1609.08144.pdf.
15 |         """
16 |         l_term = (((5 + len(beam.next_ys)) ** self.alpha) /
17 |                   ((5 + 1) ** self.alpha))
18 |         return (logprobs / l_term)
19 | 
20 |     def update_global_state(self, beam):
21 |         return
22 | 
23 | 


--------------------------------------------------------------------------------
/cocoa/neural/evaluator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from itertools import count
 4 | 
 5 | from onmt.Utils import use_gpu
 6 | 
 7 | from utterance import UtteranceBuilder
 8 | from symbols import markers
 9 | 
10 | 
11 | class Evaluator(object):
12 |     def __init__(self, model, mappings, generator, builder, gt_prefix=1):
13 |         self.model = model
14 |         self.gt_prefix = gt_prefix
15 |         self.mappings = mappings
16 |         self.generator = generator
17 |         self.builder = builder
18 | 
19 |     def evaluate(self, opt, model_opt, data, split='test'):
20 |         text_generator = self.generator
21 | 
22 |         # Statistics
23 |         counter = count(1)
24 |         pred_score_total, pred_words_total = 0, 0
25 |         gold_score_total, gold_words_total = 0, 0
26 | 
27 |         data_iter = data.generator(split, shuffle=False)
28 |         num_batches = data_iter.next()
29 |         dec_state = None
30 |         for batch in data_iter:
31 |             if batch is None:
32 |                 dec_state = None
33 |                 continue
34 |             elif not self.model.stateful:
35 |                 dec_state = None
36 |             # TODO: this is not really stateful!
37 |             enc_state = dec_state.hidden if dec_state is not None else None
38 |             batch_data = text_generator.generate_batch(batch,
39 |                         gt_prefix=self.gt_prefix, enc_state=enc_state)
40 |             utterances = self.builder.from_batch(batch_data)
41 | 
42 |             for i, response in enumerate(utterances):
43 |                 pred_score_total += response.pred_scores[0]
44 |                 pred_words_total += len(response.pred_sents[0])
45 |                 gold_score_total += response.gold_score
46 |                 gold_words_total += len(response.gold_sent)
47 | 
48 |             if opt.verbose:
49 |                 counter = self.print_results(model_opt, batch, counter, utterances)
50 | 
51 |     def print_results(self, model_opt, batch, utterances):
52 |         for i, response in enumerate(utterances):
53 |             sent_number = next(counter)
54 |             print("--------- {0}: {1} -----------".format(sent_number, title))
55 |             output = response.log(sent_number)
56 |             os.write(1, output.encode('utf-8'))
57 | 


--------------------------------------------------------------------------------
/cocoa/neural/loss.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from onmt.Loss import LossComputeBase
 6 | from onmt.Utils import aeq
 7 | 
 8 | from symbols import markers
 9 | #from utterance import UtteranceBuilder
10 | 
11 | class SimpleLossCompute(LossComputeBase):
12 |     """
13 |     Simpler Loss Computation class - does not perform Truncated BPTT,
14 |         removes label_smoothing, confidence-scores and sharding
15 |     """
16 |     def __init__(self, generator, tgt_vocab):
17 |         super(LossComputeBase, self).__init__()
18 |         self.generator = generator
19 |         self.padding_idx = tgt_vocab.to_ind(markers.PAD)
20 |         weight = torch.ones(tgt_vocab.size)
21 |         weight[self.padding_idx] = 0
22 |         self.criterion = nn.NLLLoss(weight, size_average=False)
23 | 
24 |     def compute_loss(self, target, output):
25 |         # generator: RNN outputs to vocab_size scores/logprobs
26 |         # output: (seq_len, batch_size, rnn_size)
27 |         scores = self.generator(self._bottle(output))
28 |         gtruth = target.contiguous().view(-1)
29 |         loss = self.criterion(scores, gtruth)
30 |         loss_data = loss.data.clone()
31 |         stats = self._stats(loss_data, scores.data, target.view(-1).data)
32 |         return loss, stats
33 | 
34 | class ReinforceLossCompute(SimpleLossCompute):
35 |     """Compute loss/reward for REINFORCE.
36 |     """
37 |     def __init__(self, generator, tgt_vocab):
38 |         super(LossComputeBase, self).__init__()
39 |         self.generator = generator
40 |         self.padding_idx = tgt_vocab.to_ind(markers.PAD)
41 |         weight = torch.ones(tgt_vocab.size)
42 |         weight[self.padding_idx] = 0
43 |         self.criterion = nn.NLLLoss(weight, size_average=False, reduce=False)
44 |         #self.builder = UtteranceBuilder(tgt_vocab)
45 | 
46 |     def compute_loss(self, target, output):
47 |         # output: (seq_len, batch_size, rnn_size)
48 |         # reward: (batch_size,)
49 |         batch_size = output.size(1)
50 |         #aeq(batch_size, reward.size(0))
51 |         scores = self.generator(self._bottle(output))
52 |         gtruth = target.contiguous().view(-1)
53 |         loss = self.criterion(scores, gtruth).view(-1, batch_size)  # (seq_len, batch_size)
54 |         return loss, None
55 | 


--------------------------------------------------------------------------------
/cocoa/neural/rl_trainer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import argparse
 4 | import random
 5 | import json
 6 | import numpy as np
 7 | import copy
 8 | import sys
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | from torch.autograd import Variable
13 | 
14 | from onmt.Trainer import Statistics as BaseStatistics
15 | 
16 | from core.controller import Controller
17 | from utterance import UtteranceBuilder
18 | from trainer import Trainer
19 | 
20 | 
21 | class Statistics(BaseStatistics):
22 |     def __init__(self, episode=0, loss=0, reward=0):
23 |         self.episode = episode
24 |         self.loss = loss
25 |         self.reward = reward
26 |         self.total_rewards = []
27 | 
28 |     def update(self, stat):
29 |         self.loss += stat.loss
30 |         self.reward += stat.reward
31 |         self.episode += 1
32 | 
33 |     def mean_loss(self):
34 |         return self.loss / self.episode
35 | 
36 |     def mean_reward(self):
37 |         return self.reward / self.episode
38 | 
39 |     def output(self, episode):
40 |         print ("Episode %2d; loss: %6.2f; reward: %6.2f;" %
41 |               (episode,
42 |                self.mean_loss(),
43 |                self.mean_reward()))
44 |         sys.stdout.flush()
45 | 
46 | # TODO: refactor
47 | class RLTrainer(Trainer):
48 |     pass
49 | 


--------------------------------------------------------------------------------
/cocoa/neural/symbols.py:
--------------------------------------------------------------------------------
 1 | #from collections import namedtuple
 2 | #
 3 | #SpecialSymbols = namedtuple('SpecialSymbols',
 4 | #        ['EOS', 'END_SUM', 'GO_S', 'GO_B', 'OFFER', 'QUIT', 'ACCEPT', 'REJECT', 'PAD', 'C_car', 'C_phone', 'C_housing', 'C_electronics', 'C_furniture', 'C_bike'])
 5 | #
 6 | #markers = SpecialSymbols(EOS='</s>', END_SUM='</sum>', GO_S='<go-s>', GO_B='<go-b>', OFFER='<offer>', QUIT='<quit>', ACCEPT='<accept>', REJECT='<reject>', PAD='<pad>', C_car='<car>', C_phone='<phone>', C_housing='<housing>', C_electronics='<electronics>', C_furniture='<furniture>', C_bike='<bike>')
 7 | #
 8 | #category_markers = [markers.C_car, markers.C_phone, markers.C_housing, markers.C_electronics, markers.C_furniture, markers.C_bike]
 9 | #
10 | #action_markers = [markers.ACCEPT, markers.REJECT, markers.OFFER, markers.QUIT]
11 | #
12 | #sequence_markers = [markers.EOS, markers.GO_S, markers.GO_B, markers.PAD]
13 | 
14 | class Marker(object):
15 |     EOS = '</s>'
16 |     PAD = '<pad>'
17 |     GO = '<go>'
18 | 
19 | markers = Marker
20 | 


--------------------------------------------------------------------------------
/cocoa/neural/vocab_builder.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.vocab import Vocabulary
 2 | from cocoa.core.entity import is_entity
 3 | 
 4 | def get_entity_form(entity, form):
 5 |     assert len(entity) == 2
 6 |     if form == 'surface':
 7 |         return entity.surface
 8 |     elif form == 'type':
 9 |         return '<%s>' % entity.canonical.type
10 |     elif form == 'canonical':
11 |         return entity._replace(surface='')
12 |     else:
13 |         raise ValueError('Unknown entity form %s' % form)
14 | 
15 | def build_utterance_vocab(dialogues, special_symbols=[], entity_forms=[]):
16 |     vocab = Vocabulary(offset=0, unk=True)
17 | 
18 |     def _add_entity(entity):
19 |         for entity_form in entity_forms:
20 |             word = get_entity_form(entity, entity_form)
21 |             vocab.add_word(word)
22 | 
23 |     # Add words
24 |     for dialogue in dialogues:
25 |         assert dialogue.is_int is False
26 |         for turn in dialogue.token_turns:
27 |             for token in turn:
28 |                 if is_entity(token):
29 |                     _add_entity(token)
30 |                 else:
31 |                     vocab.add_word(token)
32 | 
33 |     # Add special symbols
34 |     vocab.add_words(special_symbols, special=True)
35 |     vocab.finish(size_threshold=10000)
36 |     print 'Utterance vocab size:', vocab.size
37 |     return vocab
38 | 


--------------------------------------------------------------------------------
/cocoa/sessions/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'anushabala'
2 | 
3 | 


--------------------------------------------------------------------------------
/cocoa/sessions/human_session.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | from session import Session
 3 | 
 4 | 
 5 | class HumanSession(Session):
 6 |     """
 7 |     HumanSession represents a single human agent in a dialogue. This class can be used to enqueue messages sent by the
 8 |     agent and retrieve messages received from the other agent in the dialogue.
 9 |     """
10 |     def __init__(self, agent):
11 |         super(HumanSession, self).__init__(agent)
12 |         self.outbox = []
13 |         self.inbox = []
14 |         self.cached_messages = []
15 |         # todo implement caching to store message history
16 | 
17 |     def send(self):
18 |         if len(self.outbox) > 0:
19 |             return self.outbox.pop(0)
20 |         return None
21 | 
22 |     def poll_inbox(self):
23 |         if len(self.inbox) > 0:
24 |             return self.inbox.pop(0)
25 |         return None
26 | 
27 |     def receive(self, event):
28 |         self.inbox.append(event)
29 | 
30 |     def enqueue(self, event):
31 |         self.outbox.append(event)
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/cocoa/sessions/session.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import string
 3 | from cocoa.core.event import Event
 4 | 
 5 | 
 6 | class Session(object):
 7 |     """An abstarct class for instantiating an agent.
 8 | 
 9 |     A session maintains the dialogue state and receive/send dialogue events.
10 | 
11 |     """
12 |     def __init__(self, agent, config=None):
13 |         """Construct a session for an agent.
14 | 
15 |         Args:
16 |             agent (int): agent id (0 or 1).
17 | 
18 |         """
19 |         self.agent = agent  # 0 or 1 (which player are we?)
20 |         self.partner = 1 - agent
21 |         self.config = config
22 | 
23 |     def receive(self, event):
24 |         """Parse the received event and update the dialogue state.
25 | 
26 |         Args:
27 |             event (Event)
28 | 
29 |         """
30 |         raise NotImplementedError
31 | 
32 |     def send(self):
33 |         """Send an event.
34 | 
35 |         Returns:
36 |             event (Event)
37 | 
38 |         """
39 |         raise NotImplementedError
40 | 
41 |     @staticmethod
42 |     def remove_nonprintable(raw_tokens):
43 |         tokens = []
44 |         for token in raw_tokens:
45 |             all_valid_characters = True
46 |             for char in token:
47 |                 if not char in string.printable:
48 |                     all_valid_characters = False
49 |             if all_valid_characters:
50 |                 tokens.append(token)
51 |         return tokens
52 | 
53 |     @staticmethod
54 |     def timestamp():
55 |         return str(time.time())
56 | 
57 |     def message(self, text, metadata=None):
58 |         return Event.MessageEvent(self.agent, text, time=self.timestamp(), metadata=metadata)
59 | 
60 |     def wait(self):
61 |         return None
62 | 


--------------------------------------------------------------------------------
/cocoa/systems/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/systems/__init__.py


--------------------------------------------------------------------------------
/cocoa/systems/human_system.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | from system import System
 3 | from cocoa.sessions.human_session import HumanSession
 4 | 
 5 | 
 6 | class HumanSystem(System):
 7 |     def __init__(self):
 8 |         super(HumanSystem, self).__init__()
 9 | 
10 |     @classmethod
11 |     def name(cls):
12 |         return 'human'
13 | 
14 |     def new_session(self, agent, kb):
15 |         return HumanSession(agent)
16 | 


--------------------------------------------------------------------------------
/cocoa/systems/rulebased_system.py:
--------------------------------------------------------------------------------
 1 | from system import System
 2 | from cocoa.sessions.timed_session import TimedSessionWrapper
 3 | 
 4 | class RulebasedSystem(System):
 5 |     def __init__(self, lexicon, generator, manager, timed_session):
 6 |         super(RulebasedSystem, self).__init__()
 7 |         self.timed_session = timed_session
 8 |         self.lexicon = lexicon
 9 |         self.generator = generator
10 |         self.manager = manager
11 | 
12 |     @classmethod
13 |     def name(cls):
14 |         return 'rulebased'
15 | 
16 |     def new_session(self, agent, kb, config=None):
17 |         session = self._new_session(agent, kb, config)
18 |         if self.timed_session:
19 |             session = TimedSessionWrapper(session)
20 |         return session
21 | 
22 |     def _new_session(self, agent, kb, config=None):
23 |         raise NotImplementedError
24 | 


--------------------------------------------------------------------------------
/cocoa/systems/system.py:
--------------------------------------------------------------------------------
 1 | class System(object):
 2 |     """An abstract class for building a Session object.
 3 |     """
 4 |     def new_session(self, agent, kb):
 5 |         raise NotImplementedError
 6 | 
 7 |     @classmethod
 8 |     def name(cls):
 9 |         return 'base'
10 | 


--------------------------------------------------------------------------------
/cocoa/turk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/turk/__init__.py


--------------------------------------------------------------------------------
/cocoa/turk/templates/compare_question.html:
--------------------------------------------------------------------------------
 1 | <div class="question">
 2 | 
 3 | <div class="panel panel-default context">
 4 |   <div class="panel-body">
 5 |       <p><b>Context:</b></p>
 6 |       <p>{context}</p>
 7 |   </div>
 8 | </div>
 9 | 
10 | <div class="form-group">
11 |   <p>Please select the more appropriate response given the above context. If they are equally good or bad, select "Both are appropriate" or "Both are NOT appropriate".</p>
12 |   <label class="radio"><input type="radio" name="{qid}" value="1">{response0}</label>
13 |   <label class="radio"><input type="radio" name="{qid}" value="-1">{response1}</label>
14 |   <label class="radio"><input type="radio" name="{qid}" value="0"><i class="em em-smile"></i>Both are appropriate</label>
15 |   <label class="radio"><input type="radio" name="{qid}" value="-2"><i class="em em-persevere"></i>Both are NOT appropriate</label>
16 | </div>
17 | 
18 | </div>
19 | <hr />
20 | 


--------------------------------------------------------------------------------
/cocoa/turk/templates/frame.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/>
 4 | <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
 5 | <link href="https://afeld.github.io/emoji-css/emoji.css" rel="stylesheet">
 6 | <script type='text/javascript' src='https://s3.amazonaws.com/mturk-public/externalHIT_v1.js'></script>
 7 | <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
 8 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script> 
 9 | <script src="//cdnjs.cloudflare.com/ajax/libs/jquery-form-validator/2.3.26/jquery.form-validator.min.js"></script>
10 | 
11 | <script>
12 |     function validateForm() {{
13 |         if ($('div.question:not(:has(:radio:checked))').length) {{
14 |                 alert("At least one question is blank");
15 |                 return false;
16 |         }}
17 |         document.form.submit();
18 |         return true;
19 |     }}
20 | </script>
21 | </head>
22 | 
23 | <body>
24 | <div class="container">
25 |   <h2>{title}</h2>
26 |   <div class="panel panel-default">
27 |     <div class="panel-heading">Instructions</div>
28 |     <div class="panel-body">{instructions}</div>
29 |   </div>
30 | 
31 |   <form name='mturk_form' method='post' id='mturk_form' action='https://www.mturk.com/mturk/externalSubmit' onsubmit="return validateForm()">
32 |   <input type='hidden' value='' name='assignmentId' id='assignmentId'/>
33 |  
34 |   {questions}
35 | 
36 |   <div class="form-group">
37 |     <label for="comment">Comment:</label>
38 |     <textarea class="form-control" rows="2" name="comment" id="comment"></textarea>
39 |   </div> 
40 | 
41 |   <button type='submit' class="btn btn-default" id='submitButton'>Submit</button>
42 |   </form>
43 | 
44 | <script language='Javascript'>turkSetAssignmentID();</script>
45 | 
46 | {script}
47 | 
48 | </div>
49 | 
50 | </body></html>
51 | 


--------------------------------------------------------------------------------
/cocoa/turk/templates/multi_question.html:
--------------------------------------------------------------------------------
 1 | <div class="question">
 2 | 
 3 | <div class="panel panel-default context">
 4 |   <div class="panel-body">
 5 |       <p><b>Context:</b></p>
 6 |       <p>{context}</p>
 7 |   </div>
 8 | </div>
 9 | 
10 | <p><b>{response}</b></p>
11 | 
12 | <div class="form-group">
13 |     <p>Please select all <b>problems</b> presented in the response given the above context.</p>
14 |   <div class="checkbox">
15 |   <label><input type="checkbox" name="{qid}" value="grammar">NOT grammatical</label>
16 |   </div>
17 |   <div class="checkbox">
18 |   <label><input type="checkbox" name="{qid}" value="relatedness">NOT related (off-topic)</label>
19 |   </div>
20 |   <div class="checkbox">
21 |   <label><input type="checkbox" name="{qid}" value="informativeness">NOT informative (common, generic)</label>
22 |   </div>
23 |   <div class="checkbox">
24 |   <label><input type="checkbox" name="{qid}" value="coherence">NOT coherent (illogical nor irrational)</label>
25 |   </div>
26 |   <div class="checkbox">
27 |   <label><input type="checkbox" name="{qid}" value="none">None of the above</label>
28 |   </div>
29 | </div>
30 | 
31 | <div class="form-group">
32 |   <p>Please rate how appropriate the response is given the conversation above.</p>
33 |   <label class="radio-inline"><input type="radio" name="{qid},all" value="-2"><i class="em em-persevere"></i>NOT appropriate at all</label>
34 |   <label class="radio-inline"><input type="radio" name="{qid},all" value="-1"><i class="em em-worried"></i>NOT appropriate</label>
35 |   <label class="radio-inline"><input type="radio" name="{qid},all" value="0"><i class="em em-neutral_face"></i>Neutral</label>
36 |   <label class="radio-inline"><input type="radio" name="{qid},all" value="1"><i class="em em-blush"></i>Appropriate</label>
37 |   <label class="radio-inline"><input type="radio" name="{qid},all" value="2"><i class="em em-smile"></i>Very appropriate</label>
38 | </div>
39 | 
40 | </div>
41 | <hr />
42 | 


--------------------------------------------------------------------------------
/cocoa/turk/templates/question.html:
--------------------------------------------------------------------------------
 1 | <div class="question">
 2 | 
 3 | <div class="panel panel-default context">
 4 |   <div class="panel-body">
 5 |       <p><b>Context:</b></p>
 6 |       <p>{context}</p>
 7 |   </div>
 8 | </div>
 9 | 
10 | <p><b>{response}</b></p>
11 | 
12 | <div class="form-group">
13 |   <p>Please rate how likely the response continues from the conversation above.</p>
14 |   <label class="radio-inline"><input type="radio" name="{qid}" value="-2"><i class="em em-persevere"></i>Very Unlikely</label>
15 |   <label class="radio-inline"><input type="radio" name="{qid}" value="-1"><i class="em em-worried"></i>Unlikely</label>
16 |   <label class="radio-inline"><input type="radio" name="{qid}" value="0"><i class="em em-neutral_face"></i>Neutral</label>
17 |   <label class="radio-inline"><input type="radio" name="{qid}" value="1"><i class="em em-blush"></i>Likely</label>
18 |   <label class="radio-inline"><input type="radio" name="{qid}" value="2"><i class="em em-smile"></i>Very Likely</label>
19 | </div>
20 | 
21 | </div>
22 | <hr />
23 | 


--------------------------------------------------------------------------------
/cocoa/turk/utils.py:
--------------------------------------------------------------------------------
 1 | from boto.mturk.connection import MTurkConnection
 2 | import boto.mturk.qualification as mtqual
 3 | 
 4 | def get_mturk_connection(config, debug=False):
 5 |     """Connect to MTurk account.
 6 | 
 7 |     Args:
 8 |         config (dict): {'access_key': str, 'secret_key': str}
 9 |         debug (bool): if true, use sandbox
10 | 
11 |     Returns:
12 |         MTrukConnection
13 | 
14 |     """
15 |     if debug:
16 |         host = 'mechanicalturk.sandbox.amazonaws.com'
17 |     else:
18 |         host = 'mechanicalturk.amazonaws.com'
19 | 
20 |     mturk_connection = MTurkConnection(aws_access_key_id=config["access_key"],
21 |                                        aws_secret_access_key=config["secret_key"],
22 |                                        host=host)
23 |     return mturk_connection
24 | 
25 | def default_qualifications():
26 |     quals = mtqual.Qualifications()
27 |     quals.add(mtqual.LocaleRequirement("EqualTo", "US"))
28 |     quals.add(mtqual.PercentAssignmentsApprovedRequirement("GreaterThan", 95))
29 |     quals.add(mtqual.NumberHitsApprovedRequirement("GreaterThan", 10))
30 |     return quals
31 | 
32 | def xml_safe(string):
33 |     string = string.replace("&", "&amp;")
34 |     string = string.replace("<", "&lt;")
35 |     string = string.replace(">", "&gt;")
36 |     string = string.replace("\"", "\\\"")
37 |     return string
38 | 


--------------------------------------------------------------------------------
/cocoa/web/README.md:
--------------------------------------------------------------------------------
1 | ### Main classes/modules
2 | `cocoa.web` provides basic backend functions follows the structure of a Flask application.
3 | - **Backend** (`main/backend.py`): Manage the database that records user information and the chat log.
4 | - **Routing** (`views/`): Handle requests, render templates, and interact with the backend.
5 | 
6 | To build you own chat interface, add HTML templates (based on [Jinja2](http://jinja.pocoo.org/docs/2.9/)) in `task/templates`.
7 | 


--------------------------------------------------------------------------------
/cocoa/web/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | 
 3 | from flask import Flask
 4 | from flask import g
 5 | 
 6 | from flask_socketio import SocketIO
 7 | 
 8 | 
 9 | # from multiprocessing import Process, Queue
10 | socketio = SocketIO()
11 | controller_process = None
12 | 
13 | 
14 | def close_connection(exception):
15 |     backend = getattr(g, '_backend', None)
16 |     if backend is not None:
17 |         backend.close()
18 | 
19 | 
20 | # def dump_events_to_json():
21 | def create_app(debug=False, templates_dir='templates'):
22 |     """Create an application."""
23 |     global controller_process
24 | 
25 |     app = Flask(__name__, template_folder=templates_dir)
26 |     app.debug = debug
27 |     app.config['SECRET_KEY'] = 'gjr39dkjn344_!67#'
28 |     app.config['PROPAGATE_EXCEPTIONS'] = True
29 | 
30 |     from .main import main as main_blueprint
31 |     app.register_blueprint(main_blueprint)
32 | 
33 |     # controller_queue = Queue()
34 |     # app.config['controller_queue'] = controller_queue
35 |     # controller_process = Process(target=run_controllers, args=(controller_queue,))
36 |     # controller_process.start()
37 |     app.teardown_appcontext_funcs = [close_connection]
38 | 
39 |     socketio.init_app(app)
40 |     return app
41 | 
42 | 


--------------------------------------------------------------------------------
/cocoa/web/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/web/main/__init__.py


--------------------------------------------------------------------------------
/cocoa/web/main/logger.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | 
 3 | import logging
 4 | from logging import Logger
 5 | 
 6 | logger = None
 7 | 
 8 | 
 9 | class WebLogger(object):
10 |     @classmethod
11 |     def initialize(cls, log_file):
12 |         logging.basicConfig(filename=log_file, filemode='w', level=logging.DEBUG)
13 |         global logger
14 |         logger = logging.getLogger("web")
15 | 
16 |     @classmethod
17 |     def get_logger(cls):
18 |         global logger
19 |         if not logger:
20 |             logging.basicConfig(filename="web.log", filemode='w')
21 |             logger = logging.getLogger("web")
22 |         return logger
23 | 


--------------------------------------------------------------------------------
/cocoa/web/main/states.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | 
 3 | 
 4 | class FinishedState(object):
 5 |     def __init__(self, message, num_seconds, mturk_code=None):
 6 |         self.message = message
 7 |         self.num_seconds = num_seconds
 8 |         self.mturk_code = mturk_code
 9 | 
10 | 
11 | class WaitingState(object):
12 |     def __init__(self, message, num_seconds):
13 |         if message and len(message) > 0:
14 |             self.message = message
15 |         else:
16 |             self.message = "Please wait while we try to find someone to pair you up with.."
17 |         self.num_seconds = num_seconds
18 | 
19 | 
20 | class SurveyState(object):
21 |     def __init__(self, message, agent_idx, scenario_id, kb, partner_kb, attributes, result):
22 |         self.message = message
23 |         self.agent_idx = agent_idx
24 |         self.kb = kb
25 |         self.partner_kb = partner_kb
26 |         self.attributes = attributes
27 |         self.result = result
28 |         self.scenario_id = scenario_id
29 | 
30 | 
31 | class UserChatState(object):
32 |     def __init__(self, agent_index, scenario_id, chat_id, kb, attributes, num_seconds, partner_kb=None):
33 |         self.agent_index = agent_index
34 |         self.scenario_id = scenario_id
35 |         self.chat_id = chat_id
36 |         self.kb = kb
37 |         self.attributes = attributes
38 |         self.num_seconds = num_seconds
39 |         self.partner_kb = partner_kb
40 | 
41 |     def to_dict(self):
42 |         return {"agent_index": self.agent_index,
43 |                 "scenario_id": self.scenario_id,
44 |                 "chat_id": self.chat_id,
45 |                 "kb": self.kb.to_dict(),
46 |                 "num_seconds": self.num_seconds,
47 |                 "partner_kb": self.partner_kb.to_dict()}


--------------------------------------------------------------------------------
/cocoa/web/main/utils.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | import time
 3 | import datetime
 4 | #import src.config as config
 5 | 
 6 | 
 7 | class Status(object):
 8 |     Waiting = "waiting"
 9 |     Chat = "chat"
10 |     Finished = "finished"
11 |     Survey = "survey"
12 |     Redirected = "redirected"
13 |     Incomplete = "incomplete"
14 |     Reporting = "reporting"
15 | 
16 | 
17 | class UnexpectedStatusException(Exception):
18 |     def __init__(self, found_status, expected_status):
19 |         self.expected_status = expected_status
20 |         self.found_status = found_status
21 | 
22 | 
23 | class ConnectionTimeoutException(Exception):
24 |     pass
25 | 
26 | 
27 | class InvalidStatusException(Exception):
28 |     pass
29 | 
30 | 
31 | class StatusTimeoutException(Exception):
32 |     pass
33 | 
34 | 
35 | class NoSuchUserException(Exception):
36 |     pass
37 | 
38 | 
39 | class Messages(object):
40 |     ChatExpired = 'You ran out of time!'
41 |     PartnerConnectionTimeout = "Your partner's connection has timed out! Waiting for a new chat..."
42 |     ConnectionTimeout = "Your connection has timed out. Please reenter this website using the original URL provided to " \
43 |                         "you to start a new chat."
44 |     YouLeftRoom = 'You skipped the chat. '
45 |     PartnerLeftRoom = 'Your partner has left the chat!'
46 |     WaitingTimeExpired = "Sorry, no other users appear to be active at the moment. Please come back later!"
47 |     ChatCompleted = "Great, you've completed the chat!"
48 |     ChatIncomplete = ConnectionTimeout
49 |     HITCompletionWarning = "Please note that you will only get credit for this HIT if you made a good attempt to complete the chat."
50 |     Waiting = 'Waiting for a new chat...'
51 | 
52 | 
53 | def current_timestamp_in_seconds():
54 |     return int(time.mktime(datetime.datetime.now().timetuple()))
55 | 
56 | 
57 | class User(object):
58 |     def __init__(self, row):
59 |         self.name = row[0]
60 |         self.status = row[1]
61 |         self.status_timestamp = row[2]
62 |         self.connected_status = row[3]
63 |         self.connected_timestamp = row[4]
64 |         self.message = row[5]
65 |         self.partner_type = row[6]
66 |         self.partner_id = row[7]
67 |         self.scenario_id = row[8]
68 |         self.agent_index = row[9]
69 |         self.selected_index = row[10]
70 |         self.chat_id = row[11]
71 | 


--------------------------------------------------------------------------------
/cocoa/web/views/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/cocoa/web/views/__init__.py


--------------------------------------------------------------------------------
/cocoa/web/views/utils.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from datetime import datetime
 3 | from flask import request, g
 4 | 
 5 | def generate_userid(prefix="U_"):
 6 |     return prefix + uuid.uuid4().hex
 7 | 
 8 | def userid():
 9 |     return request.args.get('uid')
10 | 
11 | def format_message(message, status_message):
12 |     """Format the message string.
13 | 
14 |     Args:
15 |         message (str)
16 |         status_message (bool): Whether the message is an action (e.g. select) or an utterance
17 | 
18 |     """
19 |     timestamp = datetime.now().strftime(u'%x %X')
20 |     left_delim = u"<" if status_message else u""
21 |     right_delim = u">" if status_message else u""
22 |     return u"[{}] {}{}{}".format(timestamp, left_delim, message, right_delim)
23 | 


--------------------------------------------------------------------------------
/craigslistbargain/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/analysis/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/analysis/analyze_strategy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import utils
 4 | 
 5 | MAX_MARGIN = 2.4
 6 | MIN_MARGIN = -2.0
 7 | 
 8 | class StrategyAnalyzer(object):
 9 |     @classmethod
10 |     def valid_margin(cls, margin):
11 |         return margin <= MAX_MARGIN and margin >= MIN_MARGIN
12 | 
13 |     @classmethod
14 |     def get_margin(cls, ex, price, agent, role, remove_outlier=True):
15 |         agent_target = ex.scenario.kbs[agent].facts["personal"]["Target"]
16 |         partner_target = ex.scenario.kbs[1 - agent].facts["personal"]["Target"]
17 |         midpoint = (agent_target + partner_target) / 2.
18 |         norm_factor = np.abs(midpoint - agent_target)
19 |         if role == utils.SELLER:
20 |             margin = (price - midpoint) / norm_factor
21 |         else:
22 |             margin = (midpoint - price) / norm_factor
23 |         if remove_outlier and not cls.valid_margin(margin):
24 |             return None
25 |         return margin
26 | 
27 |     @classmethod
28 |     def has_deal(cls, ex):
29 |         if ex.outcome is None or ex.outcome['reward'] == 0 or ex.outcome.get('offer', None) is None or ex.outcome['offer']['price'] is None:
30 |             return False
31 |         return True
32 | 
33 | 


--------------------------------------------------------------------------------
/craigslistbargain/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/core/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/core/controller.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.controller import Controller as BaseController
 2 | 
 3 | class Controller(BaseController):
 4 |     def __init__(self, scenario, sessions, chat_id=None, session_names=(None, None)):
 5 |         super(Controller, self).__init__(scenario, sessions, chat_id, session_names=session_names)
 6 |         # self.prices = [None, None]
 7 |         self.offers = [None, None]
 8 |         # self.sides = [None, None]
 9 |         self.outcomes = [None, None]
10 |         self.quit = False
11 | 
12 |     def event_callback(self, event):
13 |         if event.action == 'offer':
14 |             self.offers[event.agent] = event.data
15 |         elif event.action == 'accept':
16 |             self.outcomes[event.agent] = True
17 |         elif event.action == 'reject':
18 |             self.outcomes[event.agent] = False
19 |         elif event.action == 'quit':
20 |             self.quit = True
21 |             self.outcomes[event.agent] = False
22 | 
23 |     def get_outcome(self):
24 |         offer = None
25 |         reward = 0
26 |         if self.offers[0] is not None and self.outcomes[1] is True:
27 |             reward = 1
28 |             offer = self.offers[0]
29 |         elif self.offers[1] is not None and self.outcomes[0] is True:
30 |             reward = 1
31 |             offer = self.offers[1]
32 |         else:
33 |             if (self.offers[0] is not None or self.offers[1] is not None) and False in self.outcomes:
34 |                 reward = 0
35 |                 offer = self.offers[0] if self.offers[1] is None else self.offers[1]
36 | 
37 |         # possible outcomes:
38 |         # reward is 1 and offer is not null: complete dialogue
39 |         # reward is 0 and offer is not null: incomplete dialogue (disagreement): offer was made and not accepted
40 |         # reweard is 0 and offer is null: incomplete dialogue: no offer was made
41 |         return {'reward': reward, 'offer': offer}
42 | 
43 |     def game_over(self):
44 |         return not self.inactive() and \
45 |                ((self.offers[0] is not None and self.outcomes[1] is not None) or
46 |                 (self.offers[1] is not None and self.outcomes[0] is not None) or
47 |                  self.quit)
48 | 
49 |     def get_result(self, agent_idx):
50 |         # todo fix this if we ever want to display results in the survey
51 |         return None
52 | 
53 |     def complete(self):
54 |         return (self.offers[0] is not None and self.outcomes[1] is True) or (self.offers[1] is not None and self.outcomes[0] is True)
55 | 
56 |     def get_winner(self):
57 |         # todo fix this if we ever want to calculate who the winner is
58 |         return -1
59 | 


--------------------------------------------------------------------------------
/craigslistbargain/core/event.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.event import Event as BaseEvent
 2 | 
 3 | class Event(BaseEvent):
 4 |     @staticmethod
 5 |     def OfferEvent(agent, data, time=None, metadata=None):
 6 |         return Event(agent, time, 'offer', data, metadata=metadata)
 7 | 
 8 |     @staticmethod
 9 |     def QuitEvent(agent, time=None, metadata=None):
10 |         return Event(agent, time, 'quit', None, metadata=metadata)
11 | 
12 |     @staticmethod
13 |     def AcceptEvent(agent, time=None, metadata=None):
14 |         return Event(agent, time, 'accept', None, metadata=metadata)
15 | 
16 |     @staticmethod
17 |     def RejectEvent(agent, time=None, metadata=None):
18 |         return Event(agent, time, 'reject', None, metadata=metadata)
19 | 


--------------------------------------------------------------------------------
/craigslistbargain/core/kb.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.kb import KB as BaseKB
 2 | 
 3 | class KB(BaseKB):
 4 |     def __init__(self, attributes, facts):
 5 |         super(KB, self).__init__(attributes)
 6 |         self.facts = facts
 7 | 
 8 |     @property
 9 |     def listing_price(self):
10 |         return self.facts['item']['Price']
11 | 
12 |     @property
13 |     def target(self):
14 |         return self.facts['personal']['Target']
15 | 
16 |     @property
17 |     def category(self):
18 |         return self.facts['item']['Category']
19 | 
20 |     @property
21 |     def title(self):
22 |         return self.facts['item']['Title']
23 | 
24 |     @property
25 |     def role(self):
26 |         return self.facts['personal']['Role']
27 | 
28 |     def to_dict(self):
29 |         return self.facts
30 | 
31 |     @classmethod
32 |     def from_dict(cls, attributes, raw):
33 |         return cls(attributes, raw)
34 | 
35 |     def dump(self):
36 |         # NOTE: We no longer have a bottomline price
37 |         price_range = (None, self.target)
38 |         print('----------------')
39 |         print('Role: {}'.format(self.role))
40 |         print('Price range: {}'.format(str(price_range)))
41 |         if self.role == 'seller':
42 |             width = max([len(str(attr.name)) for attr in self.attributes])
43 |             for attr in self.attributes:
44 |                 if attr.name not in ('Role', 'Bottomline', 'Target'):
45 |                     if attr.name == 'Description':
46 |                         value = '\n' + '\n'.join(self.facts['item'][attr.name]).encode('utf8')
47 |                     elif attr.name == 'Price':
48 |                         value = self.facts['item'][attr.name]
49 |                     elif attr.name == 'Images':
50 |                         value = ' '.join(self.facts['item'][attr.name])
51 |                     else:
52 |                         value = self.facts['item'][attr.name].encode('utf8')
53 |                     print('{empty:4}{name:<{width}s} {value}'.format(empty='', width=width, name=attr.name, value=value))
54 | 


--------------------------------------------------------------------------------
/craigslistbargain/core/scenario.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.scenario_db import Scenario as BaseScenario
 2 | from cocoa.core.schema import Attribute
 3 | from kb import KB
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     # Agent ids
 7 |     BUYER = 0
 8 |     SELLER = 1
 9 | 
10 |     def __init__(self, uuid, post_id, category, images, attributes, kbs):
11 |         super(Scenario, self).__init__(uuid, attributes, kbs)
12 |         self.post_id = post_id
13 |         self.category = category
14 |         self.images = images
15 | 
16 |     def to_dict(self):
17 |         d = super(Scenario, self).to_dict()
18 |         d['post_id'] = self.post_id
19 |         d['category'] = self.category
20 |         return d
21 | 
22 |     @staticmethod
23 |     def from_dict(schema, raw):
24 |         scenario_attributes = None
25 |         if schema is not None:
26 |             scenario_attributes = schema.attributes
27 |         if 'attributes' in raw.keys():
28 |             scenario_attributes = [Attribute.from_json(a) for a in raw['attributes']]
29 | 
30 |         if scenario_attributes is None:
31 |             raise ValueError("No scenario attributes found. "
32 |                              "Either schema must not be None (and have valid attributes) or "
33 |                              "scenario dict must have valid attributes field.")
34 |         return Scenario(raw['uuid'], raw['post_id'], raw['category'], None, scenario_attributes, [KB.from_dict(scenario_attributes, kb) for kb in raw['kbs']])
35 | 


--------------------------------------------------------------------------------
/craigslistbargain/core/tokenizer.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | nltk.download('punkt')
 3 | from nltk.tokenize import word_tokenize
 4 | import re
 5 | import string
 6 | 
 7 | def is_number(s):
 8 |     if re.match(r'[.,0-9]+', s):
 9 |         return True
10 |     else:
11 |         return False
12 | 
13 | def stick_dollar_sign(tokens):
14 |     '''
15 |     '$', '1000' -> '$1000'
16 |     '''
17 |     new_tokens = []
18 |     i = 0
19 |     while i < len(tokens):
20 |         token = tokens[i]
21 |         if token == '$':
22 |             # $100
23 |             if i < len(tokens) - 1 and is_number(tokens[i+1]):
24 |                 new_tokens.append(token + tokens[i+1])
25 |                 i += 2
26 |             # 100$
27 |             elif i > 0 and is_number(tokens[i-1]):
28 |                 new_tokens[-1] = new_tokens[-1] + token
29 |                 i += 1
30 |             else:
31 |                 new_tokens.append(token)
32 |                 i += 1
33 |         else:
34 |             new_tokens.append(token)
35 |             i += 1
36 |     return new_tokens
37 | 
38 | def stick_marker_sign(tokens):
39 |     '''
40 |     Don't split on markers <>
41 |     '<', 'x', '>' -> '<x>'
42 |     '''
43 |     new_tokens = []
44 |     in_brackets = False
45 |     for tok in tokens:
46 |         if in_brackets:
47 |             new_tokens[-1] = new_tokens[-1] + tok
48 |         else:
49 |             new_tokens.append(tok)
50 |         if tok == '<':
51 |             in_brackets = True
52 |         if tok == '>':
53 |             in_brackets = False
54 |     return new_tokens
55 | 
56 | def tokenize(utterance, lowercase=True):
57 |     '''
58 |     'hi there!' => ['hi', 'there', '!']
59 |     '''
60 |     #utterance = utterance.encode('utf-8')
61 |     if lowercase:
62 |         utterance = utterance.lower()
63 |     # NLTK would not tokenize "xx..", so normalize dots to "...".
64 |     utterance = re.sub(r'\.{2,}', '...', utterance)
65 |     # Remove some weird chars
66 |     utterance = re.sub(r'\\|>|/', ' ', utterance)
67 |     tokens = word_tokenize(utterance)
68 |     #tokens = stick_marker_sign(tokens)
69 |     tokens = stick_dollar_sign(tokens)
70 |     return tokens
71 | 
72 | def detokenize(tokens):
73 |     new_tokens = []
74 |     for token in tokens:
75 |         if (token in string.punctuation or "'" in token) and len(new_tokens) > 0:
76 |             new_tokens[-1] += token
77 |         elif token == 'na' and len(new_tokens) > 0 and new_tokens[-1] in ('gon', 'wan'):
78 |             new_tokens[-1] += token
79 |         else:
80 |             new_tokens.append(token)
81 |     return ' '.join(new_tokens)
82 | 
83 | # ========= TEST ===========
84 | if __name__ == '__main__':
85 |     print tokenize("i have 10,000$!..")
86 |     print tokenize("i haven't $10,000")
87 | 
88 | 


--------------------------------------------------------------------------------
/craigslistbargain/data/craigslist-schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "values": {
 3 |     },
 4 |     "attributes": [
 5 |       {"name": "Role", "value_type": "role", "multivalued": false, "entity": false},
 6 |       {"name": "Target", "value_type": "price", "multivalued": false, "entity": false},
 7 |       {"name": "Bottomline", "value_type": "price", "multivalued": false, "entity": false},
 8 |       {"name": "Title", "value_type": "text", "multivalued": false, "entity": false},
 9 |       {"name": "Category", "value_type": "text", "multivalued": false, "entity": false},
10 |       {"name": "Price", "value_type": "price", "multivalued": false, "entity": false},
11 |       {"name": "Images", "value_type": "text", "multivalued": false, "entity": false},
12 |       {"name": "Description", "value_type": "text", "multivalued": false, "entity": false}
13 |     ]
14 | }
15 | 


--------------------------------------------------------------------------------
/craigslistbargain/evaluate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch.nn as nn
 3 | from torch import cuda
 4 | from onmt.Utils import use_gpu
 5 | 
 6 | from cocoa.io.utils import read_json, write_json, read_pickle, write_pickle, create_path
 7 | from cocoa.core.schema import Schema
 8 | 
 9 | from cocoa.neural.trainer import Trainer, Statistics
10 | from cocoa.neural.loss import SimpleLossCompute
11 | from cocoa.neural.beam import Scorer
12 | 
13 | from neural.utterance import UtteranceBuilder
14 | from neural import get_data_generator, make_model_mappings
15 | from neural import model_builder
16 | from neural.evaluator import Evaluator
17 | from neural.generator import get_generator
18 | import options
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument('--random-seed', help='Random seed', type=int, default=1)
24 |     options.add_data_generator_arguments(parser)
25 |     options.add_generator_arguments(parser)
26 |     args = parser.parse_args()
27 | 
28 |     # Know which arguments are for the models thus should not be
29 |     # overwritten during test
30 |     dummy_parser = argparse.ArgumentParser(description='duh')
31 |     options.add_model_arguments(dummy_parser)
32 |     options.add_data_generator_arguments(dummy_parser)
33 |     dummy_args = dummy_parser.parse_known_args([])[0]
34 | 
35 |     if cuda.is_available() and not args.gpuid:
36 |         print("WARNING: You have a CUDA device, should run with --gpuid 0")
37 | 
38 |     if args.gpuid:
39 |         cuda.set_device(args.gpuid[0])
40 | 
41 |     # Load the model.
42 |     mappings, model, model_args = \
43 |         model_builder.load_test_model(args.checkpoint, args, dummy_args.__dict__)
44 | 
45 |     # Figure out src and tgt vocab
46 |     make_model_mappings(model_args.model, mappings)
47 | 
48 |     schema = Schema(model_args.schema_path, None)
49 |     data_generator = get_data_generator(args, model_args, schema, test=True)
50 | 
51 |     # Prefix: [GO, CATEGORY]
52 |     # Just giving it GO seems okay as it can learn to copy the CATEGORY from the input
53 |     scorer = Scorer(args.alpha)
54 |     generator = get_generator(model, mappings['tgt_vocab'], scorer, args, model_args)
55 |     builder = UtteranceBuilder(mappings['tgt_vocab'], args.n_best, has_tgt=True)
56 |     evaluator = Evaluator(model, mappings, generator, builder, gt_prefix=1)
57 |     evaluator.evaluate(args, model_args, data_generator)
58 | 


--------------------------------------------------------------------------------
/craigslistbargain/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/model/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/model/dialogue_state.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.dialogue_state import DialogueState as State
 2 | 
 3 | class DialogueState(State):
 4 |     def __init__(self, agent, kb):
 5 |         super(DialogueState, self).__init__(agent, kb)
 6 |         self.price = [None, None]
 7 |         self.curr_price = None
 8 |         self.num_inquiry = 0
 9 | 
10 |     @property
11 |     def listing_price(self):
12 |         return self.kb.listing_price
13 | 
14 |     @property
15 |     def my_price(self):
16 |         return self.price[self.agent]
17 | 
18 |     @my_price.setter
19 |     def my_price(self, price):
20 |         self.price[self.agent] = price
21 | 
22 |     @property
23 |     def partner_price(self):
24 |         return self.price[self.partner]
25 | 
26 |     def update(self, agent, utterance):
27 |         super(DialogueState, self).update(agent, utterance)
28 |         if not utterance:
29 |             return
30 |         lf = utterance.lf
31 |         if hasattr(lf, 'price') and lf.price is not None:
32 |             self.price[agent] = lf.price
33 |             self.curr_price = lf.price
34 |         if agent == self.agent and lf.intent == 'inquiry':
35 |             self.num_inquiry += 1
36 | 


--------------------------------------------------------------------------------
/craigslistbargain/model/manager.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.manager import Manager as BaseManager
 2 | 
 3 | class Manager(BaseManager):
 4 |     def available_actions(self, state):
 5 |         actions = super(Manager, self).available_actions(state)
 6 |         # These actions should be decided by the bot's strategy
 7 |         masked_actions = ['reject', 'accept', 'quit', 'agree']
 8 |         if state.num_inquiry > 1:
 9 |             masked_actions.append('inquiry')
10 |             if state.curr_price is None:
11 |                 actions = ['init-price']
12 |         if state.partner_price is None or state.curr_price is None:
13 |             masked_actions.append('offer')
14 |         actions = [a for a in actions if not a in masked_actions]
15 |         return actions
16 | 
17 |     def choose_action(self, state, context=None):
18 |         action = super(Manager, self).choose_action(state, context)
19 |         if action == 'offer' and state.partner_act == 'unknown':
20 |             return 'agree'
21 |         return action
22 | 
23 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/__init__.py:
--------------------------------------------------------------------------------
 1 | import onmt
 2 | 
 3 | def get_data_generator(args, model_args, schema, test=False):
 4 |     from cocoa.core.scenario_db import ScenarioDB
 5 |     from cocoa.core.dataset import read_dataset
 6 |     from cocoa.core.util import read_json
 7 | 
 8 |     from core.scenario import Scenario
 9 |     from core.price_tracker import PriceTracker
10 |     from preprocess import DataGenerator, Preprocessor
11 |     import os.path
12 | 
13 |     # TODO: move this to dataset
14 |     dataset = read_dataset(args, Scenario)
15 | 
16 |     mappings_path = model_args.mappings
17 | 
18 |     lexicon = PriceTracker(model_args.price_tracker_model)
19 | 
20 |     preprocessor = Preprocessor(schema, lexicon, model_args.entity_encoding_form,
21 |         model_args.entity_decoding_form, model_args.entity_target_form,
22 |         model=model_args.model)
23 | 
24 |     if test:
25 |         model_args.dropout = 0
26 |         train, dev, test = None, None, dataset.test_examples
27 |     else:
28 |         train, dev, test = dataset.train_examples, dataset.test_examples, None
29 |     data_generator = DataGenerator(train, dev, test, preprocessor, schema, mappings_path,
30 |         cache=args.cache, ignore_cache=args.ignore_cache,
31 |         num_context=model_args.num_context,
32 |         batch_size=args.batch_size,
33 |         model=model_args.model)
34 | 
35 |     return data_generator
36 | 
37 | def check_model_args(args):
38 |     if args.pretrained_wordvec:
39 |         if isinstance(args.pretrained_wordvec, list):
40 |             pretrained = args.pretrained_wordvec[0]
41 |         else:
42 |             pretrained = args.pretrained_wordvec
43 |         with open(pretrained, 'r') as fin:
44 |             pretrained_word_embed_size = len(fin.readline().strip().split()) - 1
45 |         assert pretrained_word_embed_size == args.word_embed_size
46 | 
47 |         if args.context and args.context_encoder == 'bow':
48 |             assert pretrained_word_embed_size == args.context_size
49 | 
50 |     if args.decoder == 'rnn-attn':
51 |         assert args.attention_memory is not None
52 | 
53 |     if args.num_context > 0:
54 |         assert not args.stateful
55 | 
56 |     assert args.temperature >= 0
57 | 
58 | def make_model_mappings(model, mappings):
59 |     mappings['src_vocab'] = mappings['utterance_vocab']
60 |     mappings['tgt_vocab'] = mappings['utterance_vocab']
61 |     return mappings
62 | 
63 | def build_optim(opt, model, checkpoint):
64 |     print('Making optimizer for training.')
65 |     optim = onmt.Optim(
66 |         opt.optim, opt.learning_rate, opt.max_grad_norm,
67 |         model_size=opt.rnn_size)
68 | 
69 |     optim.set_parameters(model.parameters())
70 | 
71 |     return optim
72 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/evaluator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from itertools import count
 4 | 
 5 | from onmt.Utils import use_gpu
 6 | 
 7 | from cocoa.neural.evaluator import Evaluator as BaseEvaluator
 8 | 
 9 | from neural.generator import get_generator
10 | 
11 | class Evaluator(BaseEvaluator):
12 |     def print_results(self, model_opt, batch, utterances):
13 |         titles = batch.title_inputs.transpose(0,1)
14 |         enc_inputs = batch.encoder_inputs.transpose(0,1)
15 |         for i, response in enumerate(utterances):
16 |             sent_number = next(counter)
17 |             title = self.builder.var_to_sent(titles[i], self.mappings['kb_vocab'])
18 |             summary = self.builder.var_to_sent(enc_inputs[i])
19 |             print("--------- {0}: {1} -----------".format(sent_number, title))
20 |             if model_opt.model in ["sum2sum", "sum2seq"]:
21 |                 print("SUMMARY: {}".format(summary) )
22 |             output = response.log(sent_number)
23 |             os.write(1, output.encode('utf-8'))
24 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/models.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from cocoa.neural.models import NMTModel
 4 | 
 5 | class NegotiationModel(NMTModel):
 6 | 
 7 |     def __init__(self, encoder, decoder, context_embedder, kb_embedder, stateful=False):
 8 |         super(NegotiationModel, self).__init__(encoder, decoder, stateful=stateful)
 9 |         self.context_embedder = context_embedder
10 |         self.kb_embedder = kb_embedder
11 | 
12 |     def forward(self, src, tgt, context, title, desc, lengths, dec_state=None, enc_state=None, tgt_lengths=None):
13 |         enc_final, enc_memory_bank = self.encoder(src, lengths, enc_state)
14 |         _, context_memory_bank = self.context_embedder(context)
15 |         if self.kb_embedder:
16 |             _, title_memory_bank = self.kb_embedder(title)
17 |             _, desc_memory_bank = self.kb_embedder(desc)
18 |             memory_banks = [enc_memory_bank, context_memory_bank, title_memory_bank, desc_memory_bank]
19 |         else:
20 |             memory_banks = [enc_memory_bank, context_memory_bank]
21 | 
22 |         enc_state = self.decoder.init_decoder_state(src, enc_memory_bank, enc_final)
23 |         dec_state = enc_state if dec_state is None else dec_state
24 |         decoder_outputs, dec_state, attns = self.decoder(tgt, memory_banks,
25 |                 dec_state, memory_lengths=lengths, lengths=tgt_lengths)
26 | 
27 |         return decoder_outputs, attns, dec_state
28 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/symbols.py:
--------------------------------------------------------------------------------
 1 | #from collections import namedtuple
 2 | #
 3 | #SpecialSymbols = namedtuple('SpecialSymbols',
 4 | #        ['EOS', 'END_SUM', 'GO_S', 'GO_B', 'OFFER', 'QUIT', 'ACCEPT', 'REJECT', 'PAD', 'C_car', 'C_phone', 'C_housing', 'C_electronics', 'C_furniture', 'C_bike'])
 5 | #
 6 | #markers = SpecialSymbols(EOS='</s>', END_SUM='</sum>', GO_S='<go-s>', GO_B='<go-b>', OFFER='<offer>', QUIT='<quit>', ACCEPT='<accept>', REJECT='<reject>', PAD='<pad>', C_car='<car>', C_phone='<phone>', C_housing='<housing>', C_electronics='<electronics>', C_furniture='<furniture>', C_bike='<bike>')
 7 | 
 8 | from cocoa.neural.symbols import Marker as BaseMarker
 9 | 
10 | class Marker(BaseMarker):
11 |     # Sequence
12 |     END_SUM = '</sum>'
13 |     GO_S = '<go-s>'
14 |     GO_B = '<go-b>'
15 | 
16 |     # Actions
17 |     OFFER = '<offer>'
18 |     QUIT = '<quit>'
19 |     ACCEPT = '<accept>'
20 |     REJECT = '<reject>'
21 | 
22 |     # Categories
23 |     C_car = '<car>'
24 |     C_phone = '<phone>'
25 |     C_housing = '<housing>'
26 |     C_electronics = '<electronics>'
27 |     C_furniture = '<furniture>'
28 |     C_bike = '<bike>'
29 | 
30 | markers = Marker
31 | 
32 | category_markers = [markers.C_car, markers.C_phone, markers.C_housing, markers.C_electronics, markers.C_furniture, markers.C_bike]
33 | 
34 | action_markers = [markers.ACCEPT, markers.REJECT, markers.OFFER, markers.QUIT]
35 | 
36 | sequence_markers = [markers.EOS, markers.GO_S, markers.GO_B, markers.PAD]
37 | 
38 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/trainer.py:
--------------------------------------------------------------------------------
 1 | from cocoa.neural.trainer import Trainer as BaseTrainer
 2 | 
 3 | class Trainer(BaseTrainer):
 4 |     ''' Class that controls the training process which inherits from Cocoa '''
 5 | 
 6 |     def _run_batch(self, batch, dec_state=None, enc_state=None):
 7 |         encoder_inputs = batch.encoder_inputs
 8 |         decoder_inputs = batch.decoder_inputs
 9 |         targets = batch.targets
10 |         lengths = batch.lengths
11 |         #tgt_lengths = batch.tgt_lengths
12 | 
13 |         # running forward() method in the NegotiationModel
14 |         if hasattr(self.model, 'context_embedder'):
15 |             context_inputs = batch.context_inputs
16 |             title_inputs = batch.title_inputs
17 |             desc_inputs = batch.desc_inputs
18 | 
19 |             outputs, attns, dec_state = self.model(encoder_inputs,
20 |                     decoder_inputs, context_inputs, title_inputs,
21 |                     desc_inputs, lengths, dec_state, enc_state)
22 |         # running forward() method in NMT Model
23 |         else:
24 |             outputs, attns, dec_state = self.model(encoder_inputs,
25 |                   decoder_inputs, lengths, dec_state, enc_state)
26 | 
27 |         return outputs, attns, dec_state
28 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/utterance.py:
--------------------------------------------------------------------------------
 1 | from cocoa.neural.utterance import Utterance
 2 | from cocoa.neural.utterance import UtteranceBuilder as BaseUtteranceBuilder
 3 | 
 4 | from symbols import markers, category_markers
 5 | from core.price_tracker import PriceScaler
 6 | from cocoa.core.entity import is_entity
 7 | 
 8 | class UtteranceBuilder(BaseUtteranceBuilder):
 9 |     """
10 |     Build a word-based utterance from the batch output
11 |     of generator and the underlying dictionaries.
12 |     """
13 |     def build_target_tokens(self, predictions, kb=None):
14 |         tokens = super(UtteranceBuilder, self).build_target_tokens(predictions, kb)
15 |         tokens = [x for x in tokens if not x in category_markers]
16 |         return tokens
17 | 
18 |     def _entity_to_str(self, entity_token, kb):
19 |         raw_price = PriceScaler.unscale_price(kb, entity_token)
20 |         human_readable_price = "${}".format(raw_price.canonical.value)
21 |         return human_readable_price
22 | 
23 |     def get_price_number(self, entity, kb):
24 |         raw_price = PriceScaler.unscale_price(kb, entity)
25 |         return raw_price.canonical.value
26 | 


--------------------------------------------------------------------------------
/craigslistbargain/neural/vocab_builder.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.vocab import Vocabulary
 2 | from cocoa.neural.vocab_builder import build_utterance_vocab
 3 | 
 4 | from symbols import markers, sequence_markers
 5 | 
 6 | def build_kb_vocab(dialogues, special_symbols=[]):
 7 |     kb_vocab = Vocabulary(offset=0, unk=True)
 8 |     cat_vocab = Vocabulary(offset=0, unk=False)
 9 | 
10 |     for dialogue in dialogues:
11 |         assert dialogue.is_int is False
12 |         kb_vocab.add_words(dialogue.title)
13 |         kb_vocab.add_words(dialogue.description)
14 |         cat_vocab.add_word(dialogue.category)
15 | 
16 |     kb_vocab.add_words(special_symbols, special=True)
17 |     kb_vocab.finish(freq_threshold=5)
18 |     cat_vocab.add_words(['bike', 'car', 'electronics', 'furniture', 'housing', 'phone'], special=True)
19 |     cat_vocab.finish()
20 | 
21 |     print 'KB vocab size:', kb_vocab.size
22 |     print 'Category vocab size:', cat_vocab.size
23 |     return kb_vocab, cat_vocab
24 | 
25 | def build_lf_vocab(dialogues):
26 |     vocab = Vocabulary(offset=0, unk=True)
27 |     for dialogue in dialogues:
28 |         assert dialogue.is_int is False
29 |         for lf in dialogue.lfs:
30 |             vocab.add_words(lf)
31 |     vocab.add_words(sequence_markers, special=True)
32 |     vocab.finish()
33 |     print 'LF vocabulary size:', vocab.size
34 |     return vocab
35 | 
36 | def create_mappings(dialogues, schema, entity_forms):
37 |     utterance_vocab = build_utterance_vocab(dialogues, sequence_markers, entity_forms)
38 |     kb_vocab, cat_vocab = build_kb_vocab(dialogues, [markers.PAD])
39 |     lf_vocab = build_lf_vocab(dialogues)
40 |     return {'utterance_vocab': utterance_vocab,
41 |             'kb_vocab': kb_vocab,
42 |             'cat_vocab': cat_vocab,
43 |             'lf_vocab': lf_vocab,
44 |             }
45 | 


--------------------------------------------------------------------------------
/craigslistbargain/onmt:
--------------------------------------------------------------------------------
1 | ../onmt


--------------------------------------------------------------------------------
/craigslistbargain/reinforce.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Takes two agent (Session) implementations, generates the dialogues,
 3 | and run REINFORCE.
 4 | """
 5 | 
 6 | import argparse
 7 | import random
 8 | import json
 9 | import numpy as np
10 | 
11 | from onmt.Utils import use_gpu
12 | 
13 | from cocoa.core.util import read_json
14 | from cocoa.core.schema import Schema
15 | from cocoa.core.scenario_db import ScenarioDB
16 | from cocoa.neural.loss import ReinforceLossCompute
17 | import cocoa.options
18 | 
19 | from core.scenario import Scenario
20 | from core.controller import Controller
21 | from systems import get_system
22 | from neural.rl_trainer import RLTrainer
23 | from neural import build_optim
24 | import options
25 | 
26 | def make_loss(opt, model, tgt_vocab):
27 |     loss = ReinforceLossCompute(model.generator, tgt_vocab)
28 |     if use_gpu(opt):
29 |         loss.cuda()
30 |     return loss
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     parser = argparse.ArgumentParser(conflict_handler='resolve')
35 |     parser.add_argument('--agents', help='What kind of agent to use. The first agent is always going to be updated and the second is fixed.', nargs='*', required=True)
36 |     parser.add_argument('--agent-checkpoints', nargs='+', help='Directory to learned models')
37 |     parser.add_argument('--random-seed', help='Random seed', type=int, default=1)
38 |     parser.add_argument('--verbose', default=False, action='store_true', help='Whether or not to have verbose prints')
39 |     parser.add_argument('--valid-scenarios-path', help='Output path for the validation scenarios')
40 |     cocoa.options.add_scenario_arguments(parser)
41 |     options.add_system_arguments(parser)
42 |     options.add_rl_arguments(parser)
43 |     options.add_model_arguments(parser)
44 |     args = parser.parse_args()
45 | 
46 |     if args.random_seed:
47 |         random.seed(args.random_seed)
48 |         np.random.seed(args.random_seed)
49 | 
50 |     schema = Schema(args.schema_path)
51 |     scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario)
52 |     valid_scenario_db = ScenarioDB.from_dict(schema, read_json(args.valid_scenarios_path), Scenario)
53 | 
54 |     assert len(args.agent_checkpoints) <= len(args.agents)
55 |     systems = [get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents)]
56 | 
57 |     rl_agent = 0
58 |     system = systems[rl_agent]
59 |     model = system.env.model
60 |     loss = make_loss(args, model, system.mappings['tgt_vocab'])
61 |     optim = build_optim(args, model, None)
62 | 
63 |     scenarios = {'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list}
64 |     trainer = RLTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward)
65 |     trainer.learn(args)
66 | 


--------------------------------------------------------------------------------
/craigslistbargain/results.txt:
--------------------------------------------------------------------------------
 1 | make dump outdir=2017-12-08-test
 2 | 
 3 | ct(<type 'int'>, {(u'human', u'neural-gen'): 272, (u'human', u'rulebased'): 252, (u'human', u'human'): 168})
 4 | neural-gen {'success rate': 0.7316176470588235, 'average margin': -0.13129977119911687}
 5 | rulebased {'success rate': 0.7341269841269841, 'average margin': 0.37129803906567777}
 6 | =========== mean ===========
 7 | neural-gen [72 46 30 73 51]
 8 | rulebased [44 37  9 78 84]
 9 | human [  4  10  22  71 170]
10 | ============= Humanlikeness ===============
11 | agent        avg_score  error      #score     win
12 | ---------------------------------------
13 | Neural       2.9        0.09       272
14 | Rule-based   3.5        0.09       252        n
15 | Human        4.4        0.05       277        nr
16 | 


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scraper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/scraper/scraper/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scraper/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class ScraperItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scraper/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class ScraperSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scraper/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | from scrapy.pipelines.images import ImagesPipeline
 9 | from scrapy.exceptions import DropItem
10 | import os.path
11 | import logging
12 | import scrapy
13 | from scraper.settings import IMAGES_STORE
14 | 
15 | class ScraperPipeline(object):
16 |     def process_item(self, item, spider):
17 |         return item
18 | 
19 | class CraigslistValidationPipeline(object):
20 |     def process_item(self, item, spider):
21 |         if item == {}:
22 |             raise DropItem('parse error')
23 |         else:
24 |             return item
25 | 
26 | class CraigslistImagesPipeline(ImagesPipeline):
27 |     def get_media_requests(self, item, info):
28 |         for i, image_url in enumerate(item['image_urls']):
29 |             meta = {'filename': '%s/%s_%d.jpg' % (item['category'], item['post_id'], i)}
30 |             yield scrapy.Request(image_url, meta=meta)
31 | 
32 |     def file_path(self, request, response=None, info=None):
33 |         filename = request.meta['filename']
34 |         return filename
35 | 
36 |     def item_completed(self, results, item, info):
37 |         for i, result in enumerate([x for ok, x in results if ok]):
38 |             path = result['path']  # path is relative to IMAGES_STORE
39 |             item['images'].append(path)
40 |         return item
41 | 
42 | 


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scraper/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/craigslistbargain/scraper/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = scraper.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = scraper
12 | 


--------------------------------------------------------------------------------
/craigslistbargain/scripts/visualize_transcripts.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from cocoa.core.util import write_json
 4 | 
 5 | from analysis.visualizer import Visualizer
 6 | from analysis.html_visualizer import HTMLVisualizer
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = ArgumentParser()
10 |     parser.add_argument('--survey-transcripts', nargs='+',
11 |             help='Path to directory containing evaluation transcripts')
12 |     parser.add_argument('--dialogue-transcripts', nargs='+',
13 |             help='Path to directory containing dialogue transcripts')
14 |     parser.add_argument('--worker-ids', nargs='+',
15 |             help='Path to json file containing chat_id to worker_id mappings')
16 |     parser.add_argument('--summary', default=False, action='store_true',
17 |             help='Summarize human ratings')
18 |     parser.add_argument('--hist', default=False, action='store_true',
19 |             help='Plot histgram of ratings')
20 |     parser.add_argument('--html-visualize', action='store_true',
21 |             help='Output html files')
22 |     parser.add_argument('--outdir', default='.', help='Output dir')
23 |     parser.add_argument('--stats', default='stats.json',
24 |             help='Path to stats file')
25 |     parser.add_argument('--partner', default=False, action='store_true',
26 |             help='Whether this is from partner survey')
27 |     HTMLVisualizer.add_html_visualizer_arguments(parser)
28 |     args = parser.parse_args()
29 | 
30 |     visualizer = Visualizer(args.dialogue_transcripts,
31 |             args.survey_transcripts, args.worker_ids)
32 | 
33 |     visualizer.compute_effectiveness()
34 | 
35 |     # TODO: move summary and hist to analyzer
36 |     if args.hist:
37 |         visualizer.hist(question_scores, args.outdir, partner=args.partner)
38 | 
39 |     if args.summary:
40 |         summary = visualizer.summarize()
41 |         write_json(summary, args.stats)
42 | 
43 |     if args.worker_ids:
44 |         visualizer.worker_stats()
45 | 
46 |     if args.html_output:
47 |         visualizer.html_visualize(args.viewer_mode, args.html_output,
48 |             css_file=args.css_file, img_path=args.img_path,
49 |             worker_ids=visualizer.worker_ids)
50 | 


--------------------------------------------------------------------------------
/craigslistbargain/sessions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/sessions/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/sessions/cmd_session.py:
--------------------------------------------------------------------------------
 1 | from session import Session
 2 | 
 3 | 
 4 | class CmdSession(Session):
 5 |     def __init__(self, agent, kb):
 6 |         super(CmdSession, self).__init__(agent)
 7 |         self.kb = kb
 8 | 
 9 |     def send(self):
10 |         message = raw_input()
11 |         event = self.parse_input(message)
12 |         return event
13 | 
14 |     def parse_input(self, message):
15 |         """Parse user input from the command line.
16 |         Args:  message (str)
17 |         Returns: Event
18 |         """
19 |         raw_tokens = message.split()
20 |         tokens = self.remove_nonprintable(raw_tokens)
21 | 
22 |         if len(tokens) >= 2 and tokens[0] == '<offer>':
23 |             return self.offer({'price': int(tokens[1]), 'sides': ''})
24 |         elif tokens[0] == '<accept>':
25 |             return self.accept()
26 |         elif tokens[0] == '<reject>':
27 |             return self.reject()
28 |         elif tokens[0] == '<quit>':
29 |             return self.quit()
30 |         else:
31 |             return self.message(message)
32 | 
33 |     def receive(self, event):
34 |         print event.data
35 | 


--------------------------------------------------------------------------------
/craigslistbargain/sessions/session.py:
--------------------------------------------------------------------------------
 1 | from cocoa.sessions.session import Session as BaseSession
 2 | from core.event import Event
 3 | 
 4 | class Session(BaseSession):
 5 |     def offer(self, offer, metadata=None):
 6 |         """Send an offer event.
 7 | 
 8 |         Args:
 9 |             offer ({'price': float, 'sides', str})
10 | 
11 |         Returns:
12 |             OfferEvent
13 | 
14 |         """
15 |         return Event.OfferEvent(self.agent, offer, time=self.timestamp(), metadata=None)
16 | 
17 |     def accept(self, metadata=None):
18 |         return Event.AcceptEvent(self.agent, time=self.timestamp(), metadata=None)
19 | 
20 |     def reject(self, metadata=None):
21 |         return Event.RejectEvent(self.agent, time=self.timestamp(), metadata=None)
22 | 
23 |     def quit(self, metadata=None):
24 |         return Event.QuitEvent(self.agent, time=self.timestamp(), metadata=None)
25 | 


--------------------------------------------------------------------------------
/craigslistbargain/systems/__init__.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.util import read_json, read_pickle
 2 | 
 3 | import options
 4 | 
 5 | 
 6 | def get_system(name, args, schema=None, timed=False, model_path=None):
 7 |     from core.price_tracker import PriceTracker
 8 |     lexicon = PriceTracker(args.price_tracker_model)
 9 | 
10 |     if name == 'rulebased':
11 |         from rulebased_system import RulebasedSystem
12 |         from model.generator import Templates, Generator
13 |         from model.manager import Manager
14 |         templates = Templates.from_pickle(args.templates)
15 |         generator = Generator(templates)
16 |         manager = Manager.from_pickle(args.policy)
17 |         return RulebasedSystem(lexicon, generator, manager, timed)
18 |     elif name == 'hybrid':
19 |         from hybrid_system import HybridSystem
20 |         templates = Templates.from_pickle(args.templates)
21 |         manager = PytorchNeuralSystem(args, schema, lexicon, model_path, timed)
22 |         generator = Generator(templates)
23 |         return HybridSystem(lexicon, generator, manager, timed)
24 |     elif name == 'cmd':
25 |         from cmd_system import CmdSystem
26 |         return CmdSystem()
27 |     elif name == 'pt-neural':
28 |         from neural_system import PytorchNeuralSystem
29 |         assert model_path
30 |         return PytorchNeuralSystem(args, schema, lexicon, model_path, timed)
31 |     else:
32 |         raise ValueError('Unknown system %s' % name)
33 | 


--------------------------------------------------------------------------------
/craigslistbargain/systems/cmd_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.system import System as BaseSystem
 2 | from sessions.cmd_session import CmdSession
 3 | 
 4 | class CmdSystem(BaseSystem):
 5 |     def __init__(self):
 6 |         super(CmdSystem, self).__init__()
 7 | 
 8 |     @classmethod
 9 |     def name(cls):
10 |         return 'cmd'
11 | 
12 |     def new_session(self, agent, kb):
13 |         return CmdSession(agent, kb)
14 | 


--------------------------------------------------------------------------------
/craigslistbargain/systems/hybrid_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem
 2 | from sessions.hybrid_session import HybridSession
 3 | 
 4 | 
 5 | class HybridSystem(BaseRulebasedSystem):
 6 | 
 7 |     def _new_session(self, agent, kb, config=None):
 8 |         self.manager.timed_session = False
 9 |         manager_session = self.manager.new_session(agent, kb)
10 |         return HybridSession.get_session(agent, kb, self.lexicon,
11 |                 self.generator, manager_session)
12 | 
13 |     @classmethod
14 |     def name(cls):
15 |         return 'hybrid'
16 | 
17 | 


--------------------------------------------------------------------------------
/craigslistbargain/systems/rulebased_system.py:
--------------------------------------------------------------------------------
1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem
2 | from sessions.rulebased_session import RulebasedSession
3 | 
4 | class RulebasedSystem(BaseRulebasedSystem):
5 |     def _new_session(self, agent, kb, config=None):
6 |         return RulebasedSession.get_session(agent, kb, self.lexicon, config, self.generator, self.manager)
7 | 
8 | 


--------------------------------------------------------------------------------
/craigslistbargain/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/web/app_params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status_params": {
 3 | 	"waiting": {
 4 | 	    "num_seconds": 180
 5 | 	},
 6 | 
 7 | 	"chat": {
 8 | 	    "num_seconds": 1200
 9 | 	},
10 | 
11 | 	"finished": {
12 | 	    "num_seconds": -1
13 | 	},
14 | 
15 | 	"survey": {
16 | 	    "num_seconds": -1
17 | 	},
18 | 	"redirected": {
19 | 	    "num_seconds": -1
20 | 	},
21 | 	"incomplete": {
22 | 	    "num_seconds": -1
23 | 	},
24 | 	"reporting": {
25 | 	    "num_seconds": -1
26 | 	}
27 |     },
28 |     "connection_timeout_num_seconds": 3,
29 |     "idle_timeout_num_seconds": 210,
30 |     "templates_dir": "web/templates",
31 |     "images_base": "web/static/images",
32 |     "instructions": "web/templates/craigslist-instructions.html",
33 |     "task_title": "Let's Negotiate!",
34 |     "icon": "handshake.jpg",
35 |     "end_survey": 1,
36 |     "skip_chat_enabled": false,
37 |     "num_chats_per_scenario": {"human": 1},
38 |     "debug": false
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/craigslistbargain/web/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/main/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/web/main/db_reader.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import json
 3 | 
 4 | from cocoa.web.main.db_reader import DatabaseReader as BaseDatabaseReader
 5 | from cocoa.core.util import write_json
 6 | 
 7 | class DatabaseReader(BaseDatabaseReader):
 8 |     @classmethod
 9 |     def get_chat_outcome(cls, cursor, chat_id):
10 |         outcome = super(DatabaseReader, cls).get_chat_outcome(cursor, chat_id)
11 |         try:
12 |             if math.isnan(outcome['offer']['price']):
13 |                 outcome['offer']['price'] = None
14 |         except (ValueError, TypeError, KeyError) as e:
15 |             pass
16 |         return outcome
17 | 
18 |     @classmethod
19 |     def get_chat_example(cls, cursor, chat_id, scenario_db):
20 |         ex = super(DatabaseReader, cls).get_chat_example(cursor, chat_id, scenario_db)
21 |         if not ex is None:
22 |             cursor.execute('SELECT config FROM bot where chat_id=?', (chat_id,))
23 |             result = cursor.fetchone()
24 |             if result:
25 |                 ex.agents_info = {'config': result[0]}
26 |         return ex
27 | 
28 |     @classmethod
29 |     def process_event_data(cls, action, data):
30 |         if action == 'offer':
31 |             data = json.loads(data)
32 |             try:
33 |                 if math.isnan(data['price']):
34 |                     data['price'] = None
35 |             except (ValueError, TypeError) as e:
36 |                 pass
37 |         return data
38 | 
39 |     @classmethod
40 |     def dump_surveys(cls, cursor, json_path):
41 |         questions = ['fluent', 'honest', 'persuasive', 'fair', 'negotiator', 'coherent', 'comments']
42 | 
43 |         cursor.execute('''SELECT * FROM survey''')
44 |         logged_surveys = cursor.fetchall()
45 |         survey_data = {}
46 |         agent_types = {}
47 | 
48 |         for survey in logged_surveys:
49 |             # todo this is pretty lazy - support variable # of questions per task eventually..
50 |             (userid, cid, _, q1, q2, q3, q4, q5, q6, comments) = survey
51 |             responses = dict(zip(questions, [q1, q2, q3, q4, q5, q6, comments]))
52 |             cursor.execute('''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''', (cid,))
53 |             chat_result = cursor.fetchone()
54 |             agents = json.loads(chat_result[0])
55 |             agent_ids = json.loads(chat_result[1])
56 |             agent_types[cid] = agents
57 |             if cid not in survey_data.keys():
58 |                 survey_data[cid] = {0: {}, 1: {}}
59 |             partner_idx = 0 if agent_ids['1'] == userid else 1
60 |             survey_data[cid][partner_idx] = responses
61 | 
62 |         write_json([agent_types, survey_data], json_path)
63 | 


--------------------------------------------------------------------------------
/craigslistbargain/web/main/utils.py:
--------------------------------------------------------------------------------
 1 | from cocoa.web.main.utils import Messages as BaseMessages
 2 | 
 3 | class Messages(BaseMessages):
 4 |     ChatCompleted = "Great, you reached a final offer!"
 5 |     ChatIncomplete = "Sorry, you weren't able to reach a deal. :("
 6 |     Redirect = "Sorry, that chat did not meet our acceptance criteria."
 7 |     #BetterDeal = "Congratulations, you got the better deal! We'll award you a bonus on Mechanical Turk."
 8 |     #WorseDeal = "Sorry, your partner got the better deal. :("
 9 | 
10 | 


--------------------------------------------------------------------------------
/craigslistbargain/web/static/img/handshake.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/static/img/handshake.jpg


--------------------------------------------------------------------------------
/craigslistbargain/web/templates/finished.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>Thank You!</title>
 4 |         <!--<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/bootstrap.min.css')}}">-->
 5 |         <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
 6 |         <style>
 7 |             #content > p {
 8 |                 font-size:18px;
 9 |             }
10 |         </style>
11 |         <script type="text/javascript" src="//code.jquery.com/jquery-1.4.2.min.js"></script>
12 |         <script type="text/javascript" charset="utf-8">
13 |         	var BASE_URL = 'http://' + document.domain + ':' + location.port;
14 |         	function visualize() {
15 |         		window.location.href = BASE_URL +'/visualize?uid={{uid}}';
16 |         	}
17 |         </script>
18 |     </head>
19 |     <body>
20 |     <div id="content">
21 |         <h2>Let's Negotiate!</h2>
22 |         <h4>{{ finished_message }}</h4>
23 | 	</div>
24 | 	<div id="mturk">
25 | 		{% if mturk_code is not none %}
26 | 		<hr>	
27 | 		<h3>Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}</h3>
28 | 		{% endif %}
29 | 		<hr>
30 | 		{% if visualize %}
31 | 		<h4><a href="javascript:visualize();">Click here</a> to visualize the dialogue you just completed</h4>
32 | 		{% endif %}
33 | 	</div>
34 | 
35 |     </body>
36 | </html>
37 | 


--------------------------------------------------------------------------------
/craigslistbargain/web/templates/third_party_eval_finished.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>Thank You!</title>
 4 |         <style>
 5 |         #content > p{
 6 |         font-size:24px;
 7 |         }
 8 |         </style>
 9 |     </head>
10 |     <body>
11 |     <center><div id="content">
12 |         <h2>Evaluating Dialogues</h2>
13 |         {{ finished_message }}
14 |     </div></center>
15 |     <center>
16 |     <div id="mturk">
17 |         {% if mturk_code is not none %}
18 |         <hr>
19 |         <h3>Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}</h3>
20 |         {% endif %}
21 |     </div>
22 |     </center>
23 | 
24 |     </body>
25 | </html>


--------------------------------------------------------------------------------
/craigslistbargain/web/templates/visualize.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html>
2 | <html lang="en">
3 | {{ dialogue }}
4 | </html>


--------------------------------------------------------------------------------
/craigslistbargain/web/views/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/craigslistbargain/web/views/__init__.py


--------------------------------------------------------------------------------
/craigslistbargain/web/views/action.py:
--------------------------------------------------------------------------------
 1 | from flask import Blueprint, jsonify, request
 2 | from cocoa.web.views.utils import userid, format_message
 3 | 
 4 | from web.main.backend import Backend
 5 | get_backend = Backend.get_backend
 6 | 
 7 | action = Blueprint('action', __name__)
 8 | 
 9 | @action.route('/_offer/', methods=['GET'])
10 | def offer():
11 |     backend = get_backend()
12 |     price = float(request.args.get('price'))
13 |     sides = request.args.get('sides')
14 | 
15 |     offer = {'price': price,
16 |              'sides': sides}
17 | 
18 |     if offer is None or price == -1:
19 |         return jsonify(message=format_message("You made an invalid offer. Please try again.", True))
20 |     backend.make_offer(userid(), offer)
21 | 
22 |     displayed_message = format_message("You made an offer!", True)
23 |     return jsonify(message=displayed_message)
24 | 
25 | 
26 | @action.route('/_accept_offer/', methods=['GET'])
27 | def accept_offer():
28 |     backend = get_backend()
29 |     backend.accept_offer(userid())
30 | 
31 |     msg = format_message("You accepted the offer!", True)
32 |     return jsonify(message=msg)
33 | 
34 | 
35 | @action.route('/_reject_offer/', methods=['GET'])
36 | def reject_offer():
37 |     backend = get_backend()
38 |     backend.reject_offer(userid())
39 | 
40 |     msg = format_message("You rejected the offer.", True)
41 |     return jsonify(message=msg)
42 | 
43 | 
44 | @action.route('/_quit/', methods=['GET'])
45 | def quit():
46 |     backend = get_backend()
47 |     backend.quit(userid())
48 |     displayed_message = format_message("You chose to quit this task.", True)
49 |     return jsonify(message=displayed_message)
50 | 


--------------------------------------------------------------------------------
/data/turk/sample_aws_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "secret_key": "secretkey",
3 |   "access_key": "accesskey",
4 |   "username": "user"
5 | }


--------------------------------------------------------------------------------
/data/web/matchmaking-instructions.html:
--------------------------------------------------------------------------------
 1 | <p>
 2 |     You and another user online want to set two of your friends up on a date. For each of your friends, you know their <b>favorite hobby and their location/time preferences</b>. Find the best match by chatting with your partner and finding the pair of friends who have the most in common! <b>(Hint: There's exactly one pair that has all three in common.)</b>
 3 | </p>
 4 | <h2>Instructions</h2>
 5 | <ul>
 6 |     <li style="color:#FF0000">Please use <b> natural sentences </b> as much as possible.
 7 |         <ul>
 8 |             <li style="color:#009933"> <b>Do:</b> i have 3 friends who like graffiti and being outdoors </li>
 9 |             <li style="color:#FF0000"> <b>Don't do:</b> 3 graffiti outdoors</li>
10 |         </ul>
11 |      </li>
12 |     <li style="color:#FF0000">Avoid simply listing out the hobbies (or preferences) of your friends.</li>
13 |     <li> Look at your <b>list of friends</b> at right. </li>
14 |     <li> Use the <b>chat box below</b> to find out more about your partner's friends. </li>
15 |     <li> Once you've found the pair, select your friend by clicking <b>Select</b> in the first column.
16 |     <li style="color:#FF0000"><b>Please select carefully</b>. If you select a friend once, the website will not allow you to select another friend for 10 seconds.</li>
17 | 
18 | </ul>


--------------------------------------------------------------------------------
/data/web/matchmaking_params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status_params": {
 3 | 	"waiting": {
 4 | 	    "num_seconds": 180
 5 | 	},
 6 | 
 7 | 	"chat": {
 8 | 	    "num_seconds": 300
 9 | 	},
10 | 
11 | 	"finished": {
12 | 	    "num_seconds": 15
13 | 	},
14 | 
15 |     "survey": {
16 | 	    "num_seconds": -1
17 | 	}
18 |     },
19 | 
20 |     "scenarios_json_file": "/Users/anushabala/projects/game-dialogue/data/matchmaking-scenarios-large.json",
21 |     "connection_timeout_num_seconds": 5,
22 |     "templates_dir": "/Users/anushabala/projects/game-dialogue/src/web/templates",
23 |     "end_survey": 1,
24 |     "instructions": "/Users/anushabala/projects/game-dialogue/data/web/matchmaking-instructions.html",
25 |     "task_title": "Let's Set Our Friends Up!",
26 |     "icon": "heart.png"
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/data/web/negotiation/app_params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status_params": {
 3 | 	"waiting": {
 4 | 	    "num_seconds": 180
 5 | 	},
 6 | 
 7 | 	"chat": {
 8 | 	    "num_seconds": 1200
 9 | 	},
10 | 
11 | 	"finished": {
12 | 	    "num_seconds": -1
13 | 	},
14 | 
15 | 	"survey": {
16 | 	    "num_seconds": -1
17 | 	},
18 | 	"redirected": {
19 | 	    "num_seconds": -1
20 | 	},
21 | 	"incomplete": {
22 | 	    "num_seconds": -1
23 | 	},
24 | 	"reporting": {
25 | 	    "num_seconds": -1
26 | 	}
27 |     },
28 |     "connection_timeout_num_seconds": 3,
29 |     "idle_timeout_num_seconds": 210,
30 |     "templates_dir": "/home/hehe/game-dialogue/src/web/templates/negotiation",
31 |     "images_base": "/home/hehe/game-dialogue/images",
32 |     "instructions": "/home/hehe/game-dialogue/data/web/negotiation/craiglist-instructions.html",
33 |     "task_title": "Let's Negotiate!",
34 |     "icon": "handshake.jpg",
35 |     "end_survey": 1,
36 |     "skip_chat_enabled": false,
37 |     "num_chats_per_scenario": 1,
38 |     "debug": false,
39 |     "models": {
40 |         "rulebased": {
41 |             "active": true,
42 |             "type": "rulebased"
43 |         }
44 |     }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/data/web/negotiation/rent-instructions.html:
--------------------------------------------------------------------------------
 1 | <h3>
 2 |     You and another user online will negotiate the monthly rent of a 1B1B apartment.
 3 | </h3>
 4 | <h3>Instructions</h3>
 5 | <ul>
 6 | 
 7 |     <li>Your <b>role</b> (tenant or landlord) is to the right.</li>
 8 |     <li>Your expected <b>price range</b> is provided as well. We will award <b><font color="green">bonuses</font></b> to those who get a better deal! </li>
 9 |     <li> Use the <b>chat box below</b> to negotiate with your partner given facts listed on the right. </li>
10 |     <li> When you and your partner have agreed on a rent, enter it in the text box and hit 'Submit'. The task ends when you both <b>offer the same price.</b></li>
11 |     <li> You <b>must</b> negotiate a final price <b>within the time limit</b> in order to receive credit for this HIT.</li>
12 |     <li style="color:#FF0000">Please do <b>not</b> state facts about the apartment or about the terms of the lease that are not explicitly given to you. It's okay to embellish certain aspects as a negotiation tactic!
13 |     <li> <b>To the landlord</b>:</li>
14 |         <ul>
15 |             <li>The tenant cannot see all properties of the apartment and may ask you questions about them.</li>
16 |             <li>It's okay to embellish facts, but don't make up new properties not provided to you (or contradict given properties).</li>
17 |             <li style="color:#009933"> <b>Do:</b> this neighborhood is really beautiful, the neighbors are all really friendly</li>
18 |             <li style="color:#FF0000"> <b>Don't do:</b> (if "no parking" is given) I'd be happy to give you a parking spot if you can pay $50 more.</li>
19 |         </ul>
20 |      </li>
21 |     <li> <b>To the tenant</b>:</li>
22 |         <ul>
23 |             <li>If a property is marked with "?", it means that its value is unknown to you, but you can ask the landlord about it.</li>
24 |             <li>It's okay to make up preferences as a negotiation tactic, but don't make up properties that are not provided (or contradict given properties).</li>
25 |             <li style="color:#009933"> <b>Do:</b> It is next to a highway and seems like it can get quite noisy. </li>
26 |             <li style="color:#FF0000"> <b>Don't do:</b> (if the facts did not mention a broken window) The window is broken. </li>
27 |         </ul>
28 |      </li>
29 | </ul>
30 | 


--------------------------------------------------------------------------------
/dealornodeal/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/analysis/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/core/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/core/event.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.event import Event as BaseEvent
 2 | 
 3 | class Event(BaseEvent):
 4 |     @staticmethod
 5 |     def SelectEvent(agent, data, time=None, metadata=None):
 6 |         return Event(agent, time, 'select', data, metadata=metadata)
 7 | 
 8 |     @staticmethod
 9 |     def QuitEvent(agent, time=None, metadata=None):
10 |         return Event(agent, time, 'quit', None, metadata=metadata)
11 | 
12 | 


--------------------------------------------------------------------------------
/dealornodeal/core/kb.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.kb import KB as BaseKB
 2 | 
 3 | class KB(BaseKB):
 4 |     def __init__(self, attributes, items):
 5 |         super(KB, self).__init__(attributes)
 6 |         self.items = items
 7 |         self.item_counts = {item['Name']: item['Count'] for item in items}
 8 |         self.item_values = {item['Name']: item['Value'] for item in items}
 9 | 
10 |     def to_dict(self):
11 |         return self.items
12 | 
13 |     @classmethod
14 |     def from_dict(cls, attributes, raw):
15 |         return cls(attributes, raw)
16 | 
17 |     @classmethod
18 |     def from_ints(cls, attributes, names, ints):
19 |         """Build KB from integers.
20 | 
21 |         Args:
22 |             names (list[str])
23 |             ints (list[int]): [count1, value1, count2, value2, ...]
24 | 
25 |         """
26 |         items = []
27 |         assert 1. * len(ints) / len(names) == 2
28 |         for i, name in enumerate(names):
29 |             item = {'Name': name, 'Count': ints[i*2], 'Value': ints[i*2+1]}
30 |             items.append(item)
31 |         return cls(attributes, items)
32 | 
33 |     def dump(self):
34 |         item_counts = ', '.join(['{count} {item}s'.format(count=c, item=n) for n, c in self.item_counts.iteritems()])
35 |         print 'Items Available: {}'.format(item_counts)
36 | 
37 |         for item, value in self.item_values.iteritems():
38 |             print 'How you value {0}: {1} points'.format(item, value)
39 |         print '----------------'
40 | 


--------------------------------------------------------------------------------
/dealornodeal/core/lexicon.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from cocoa.core.entity import Entity
 3 | 
 4 | class Lexicon(object):
 5 |     """Detect item and numbers in a list of tokens.
 6 | 
 7 |     Example:
 8 |         ['i', 'would', 'like', 'the', 'ball', 'and', '2', 'books'] ->
 9 |         ['i', 'would', 'like', 'the', ('ball', ('ball', 'item')), 'and', ('2', (2, 'number')), ('books', ('book', 'item'))]
10 | 
11 |     """
12 | 
13 |     word_to_num = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10}
14 | 
15 |     def __init__(self, items):
16 |         self.items = items
17 | 
18 |     def detect_item(self, token):
19 |         for item in self.items:
20 |             if re.match(r'{}s?'.format(item), token) or \
21 |                 (item == 'ball' and re.match(r'(basket)?balls?', token)):
22 |                     return Entity.from_elements(surface=token, value=item, type='item')
23 |         return False
24 | 
25 |     def detect_number(self, token):
26 |         try:
27 |             n = int(token)
28 |         except ValueError:
29 |             try:
30 |                 n = self.word_to_num[token]
31 |             except KeyError:
32 |                 n = None
33 |         if n is not None:
34 |             return Entity.from_elements(surface=token, value=n, type='number')
35 |         return False
36 | 
37 |     def link_entity(self, tokens):
38 |         return [(self.detect_item(token) or self.detect_number(token) or token) for token in tokens]
39 | 
40 | ############### TEST ###############
41 | if __name__ == '__main__':
42 |     lexicon = Lexicon(['ball', 'hat', 'book'])
43 |     print lexicon.link_entity('i need 3 books'.split())
44 | 


--------------------------------------------------------------------------------
/dealornodeal/core/scenario.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.scenario_db import Scenario as BaseScenario
 2 | from cocoa.core.schema import Attribute
 3 | from kb import KB
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     ## Agent ids
 7 |     #FIRST = 0
 8 |     #SECOND = 1
 9 | 
10 |     #def __init__(self, uuid, attributes, kbs):
11 |     #    super(Scenario, self).__init__(uuid, attributes, kbs)
12 |     #    # self.bottom_line = 8
13 |     #    # self.post_id = post_id      // bunch of random numbers: 923461346
14 |     #    # self.category = category    // phone, housing, bike, furniture, electronics
15 |     #    # self.images = images        // link to product image: bike/6123601035_0.jpg
16 | 
17 |     #def to_dict(self):
18 |     #    d = super(Scenario, self).to_dict()
19 |     #    return d
20 | 
21 |     @classmethod
22 |     def from_dict(cls, schema, raw):
23 |         scenario_attributes = None
24 |         if schema is not None:
25 |             scenario_attributes = schema.attributes
26 |         if 'attributes' in raw.keys():
27 |             scenario_attributes = [Attribute.from_json(a) for a in raw['attributes']]
28 | 
29 |         if scenario_attributes is None:
30 |             raise ValueError("No scenario attributes found. "
31 |                              "Either schema must not be None (and have valid attributes) or "
32 |                              "scenario dict must have valid attributes field.")
33 |         kb_list = [KB.from_dict(scenario_attributes, kb) for kb in raw['kbs']]
34 |         return cls(raw['uuid'], scenario_attributes, kb_list)
35 | 


--------------------------------------------------------------------------------
/dealornodeal/core/tokenizer.py:
--------------------------------------------------------------------------------
1 | from cocoa.core.tokenizer import tokenize, detokenize
2 | 


--------------------------------------------------------------------------------
/dealornodeal/data/bookhatball-schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "values": {
 3 |         "item": ["book", "hat", "ball"]
 4 |     },
 5 |     "attributes": [
 6 |       {"name": "Name", "value_type": "item", "entity": true},
 7 |       {"name": "Count", "value_type": "integer", "entity": true},
 8 |       {"name": "Value", "value_type": "integer", "entity": true}
 9 |     ]
10 | }
11 | 


--------------------------------------------------------------------------------
/dealornodeal/evaluate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch.nn as nn
 3 | from torch import cuda
 4 | from onmt.Utils import use_gpu
 5 | 
 6 | from cocoa.io.utils import read_json, write_json, read_pickle, write_pickle, create_path
 7 | from cocoa.core.schema import Schema
 8 | from cocoa.lib import logstats
 9 | 
10 | from cocoa.neural.trainer import add_trainer_arguments, Trainer, Statistics
11 | from cocoa.neural.loss import SimpleLossCompute
12 | from cocoa.neural.beam import Scorer
13 | 
14 | from neural.utterance import UtteranceBuilder
15 | from neural.model_builder import add_model_arguments
16 | from neural import add_data_generator_arguments, get_data_generator, make_model_mappings
17 | from neural import model_builder
18 | from neural.evaluator import Evaluator, add_evaluator_arguments
19 | 
20 | if __name__ == '__main__':
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('--random-seed', help='Random seed', type=int, default=1)
23 |     parser.add_argument('--stats-file', help='Path to save json statistics (dataset, training etc.) file')
24 |     add_data_generator_arguments(parser)
25 |     add_evaluator_arguments(parser)
26 |     args = parser.parse_args()
27 | 
28 |     # Know which arguments are for the models thus should not be
29 |     # overwritten during test
30 |     dummy_parser = argparse.ArgumentParser(description='duh')
31 |     add_model_arguments(dummy_parser)
32 |     add_data_generator_arguments(dummy_parser)
33 |     dummy_args = dummy_parser.parse_known_args([])[0]
34 | 
35 |     if cuda.is_available() and not args.gpuid:
36 |         print("WARNING: You have a CUDA device, should run with --gpuid 0")
37 | 
38 |     if args.gpuid:
39 |         cuda.set_device(args.gpuid[0])
40 | 
41 |     # Load the model.
42 |     mappings, model, model_args = \
43 |         model_builder.load_test_model(args.checkpoint, args, dummy_args.__dict__)
44 | 
45 |     # Figure out src and tgt vocab
46 |     make_model_mappings(model_args.model, mappings)
47 | 
48 |     schema = Schema(model_args.schema_path, None)
49 |     data_generator = get_data_generator(args, model_args, schema, test=True)
50 | 
51 |     # Prefix: [GO]
52 |     scorer = Scorer(args.alpha)
53 |     generator = get_generator(model, mappings['tgt_vocab'], scorer, args, model_args)
54 |     builder = UtteranceBuilder(mappings['tgt_vocab'], args.n_best, has_tgt=True)
55 |     evaluator = Evaluator(model, mappings, generator, builder, gt_prefix=1)
56 |     evaluator.evaluate(args, model_args, data_generator)
57 | 


--------------------------------------------------------------------------------
/dealornodeal/fb_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/fb_model/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/fb_model/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Configuration script. Stores variables and settings used across application
 8 | """
 9 | 
10 | import logging
11 | 
12 | log_level = logging.INFO
13 | log_format = '%(asctime)s : %(levelname)s : %(filename)s : %(message)s'
14 | 
15 | # default training settings
16 | data_dir = 'data/negotiate' # data corpus directory
17 | nembed_word = 256 # size of word embeddings
18 | nembed_ctx = 64 # size of context embeddings
19 | nhid_lang = 256 # size of the hidden state for the language model
20 | nhid_ctx = 64 # size of the hidden state for the context model
21 | nhid_strat = 64 # size of the hidden state for the strategy model
22 | nhid_attn = 64 # size of the hidden state for the attention module
23 | nhid_sel = 64 # size of the hidden state for the selection module
24 | lr = 20.0 # initial learning rate
25 | min_lr = 1e-5 # min thresshold for learning rate annealing
26 | decay_rate = 9.0 # decrease learning rate by this factor
27 | decay_every = 1 # decrease learning rate after decay_every epochs
28 | momentum = 0.0 # momentum for SGD
29 | nesterov = False # enable Nesterov momentum
30 | clip = 0.2 # gradient clipping
31 | dropout = 0.5 # dropout rate in embedding layer
32 | init_range = 0.1 #initialization range
33 | max_epoch = 30 # max number of epochs
34 | bsz = 25 # batch size
35 | unk_threshold = 20 # minimum word frequency to be in dictionary
36 | temperature = 0.1 # temperature
37 | sel_weight = 1.0 # selection weight
38 | seed = 1 # random seed
39 | cuda = False # use CUDA
40 | plot_graphs = False # use visdom
41 | domain = "object_division" # domain for the dialogue
42 | rnn_ctx_encoder = False # Whether to use RNN for encoding the context
43 | 
44 | # rl settings
45 | rl_temperature = 0.1
46 | verbose = True
47 | rl_score_threshold = 6
48 | rl_gamma = 0.95
49 | rl_eps = 0
50 | rl_momentum = 0.1
51 | rl_lr = 0.5
52 | rl_reinforcement_lr = 0.1
53 | rl_reinforcement_clip = 1
54 | rl_clip = 1
55 | rl_bsz = 16
56 | rl_sv_train_freq = 4
57 | rl_nepoch = 1
58 | 


--------------------------------------------------------------------------------
/dealornodeal/fb_model/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/fb_model/models/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/fb_model/test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Performs evaluation of the model on the test dataset.
 8 | """
 9 | 
10 | import argparse
11 | 
12 | import numpy as np
13 | import torch
14 | from torch.autograd import Variable
15 | 
16 | import data
17 | import utils
18 | from engine import Engine, Criterion
19 | 
20 | 
21 | def main():
22 |     parser = argparse.ArgumentParser(description='testing script')
23 |     parser.add_argument('--data', type=str, default='data/negotiate',
24 |         help='location of the data corpus')
25 |     parser.add_argument('--unk_threshold', type=int, default=20,
26 |         help='minimum word frequency to be in dictionary')
27 |     parser.add_argument('--model_file', type=str,
28 |         help='pretrained model file')
29 |     parser.add_argument('--seed', type=int, default=1,
30 |         help='random seed')
31 |     parser.add_argument('--hierarchical', action='store_true', default=False,
32 |         help='use hierarchical model')
33 |     parser.add_argument('--bsz', type=int, default=16,
34 |         help='batch size')
35 |     parser.add_argument('--cuda', action='store_true', default=False,
36 |         help='use CUDA')
37 |     args = parser.parse_args()
38 | 
39 |     device_id = utils.use_cuda(args.cuda)
40 |     utils.set_seed(args.seed)
41 | 
42 |     corpus = data.WordCorpus(args.data, freq_cutoff=args.unk_threshold, verbose=True)
43 |     model = utils.load_model(args.model_file)
44 | 
45 |     crit = Criterion(model.word_dict, device_id=device_id)
46 |     sel_crit = Criterion(model.item_dict, device_id=device_id,
47 |         bad_toks=['<disconnect>', '<disagree>'])
48 | 
49 | 
50 |     testset, testset_stats = corpus.test_dataset(args.bsz, device_id=device_id)
51 |     test_loss, test_select_loss = 0, 0
52 | 
53 |     N = len(corpus.word_dict)
54 |     for batch in testset:
55 |         # run forward on the batch, produces output, hidden, target,
56 |         # selection output and selection target
57 |         out, hid, tgt, sel_out, sel_tgt = Engine.forward(model, batch, volatile=False)
58 | 
59 |         # compute LM and selection losses
60 |         test_loss += tgt.size(0) * crit(out.view(-1, N), tgt).data[0]
61 |         test_select_loss += sel_crit(sel_out, sel_tgt).data[0]
62 | 
63 |     test_loss /= testset_stats['nonpadn']
64 |     test_select_loss /= len(testset)
65 |     print('testloss %.3f | testppl %.3f' % (test_loss, np.exp(test_loss)))
66 |     print('testselectloss %.3f | testselectppl %.3f' % (test_select_loss, np.exp(test_select_loss)))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/dealornodeal/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/model/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/model/dialogue_state.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.dialogue_state import DialogueState as State
 2 | 
 3 | class DialogueState(State):
 4 |     def __init__(self, agent, kb):
 5 |         super(DialogueState, self).__init__(agent, kb)
 6 |         self.proposal = [None, None]
 7 |         self.curr_proposal = None
 8 | 
 9 |     @property
10 |     def my_proposal(self):
11 |         return self.proposal[self.agent]
12 | 
13 |     @my_proposal.setter
14 |     def my_proposal(self, proposal):
15 |         self.proposal[self.agent] = proposal
16 | 
17 |     @property
18 |     def partner_proposal(self):
19 |         return self.proposal[self.partner]
20 | 
21 |     def update(self, agent, utterance):
22 |         super(DialogueState, self).update(agent, utterance)
23 |         if not utterance:
24 |             return
25 |         lf = utterance.lf
26 |         if hasattr(lf, 'proposal') and lf.proposal is not None:
27 |             self.proposal[agent] = lf.proposal
28 |             self.curr_proposal = lf.proposal
29 | 


--------------------------------------------------------------------------------
/dealornodeal/model/manager.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.manager import Manager as BaseManager
 2 | 
 3 | class Manager(BaseManager):
 4 |     def available_actions(self, state):
 5 |         actions = super(Manager, self).available_actions(state)
 6 |         masked_actions = []
 7 |         if state.curr_proposal is None:
 8 |             masked_actions.extend(['select', 'agree'])
 9 |         actions = [a for a in actions if not a in masked_actions]
10 |         return actions
11 | 
12 |     def choose_action(self, state, context=None):
13 |         action = super(Manager, self).choose_action(state, context)
14 |         if action in ('select', 'agree') and not state.my_act in ('clarify', 'agree'):
15 |             return 'clarify'
16 |         if state.my_act == 'clarify':
17 |             if state.partner_act in ('propose', 'insist'):
18 |                 if state.partner_proposal and state.partner_proposal != state.my_proposal:
19 |                     return 'propose'
20 |             if state.partner_act == 'disagree':
21 |                 return 'propose'
22 |             return 'select'
23 |         return action
24 | 


--------------------------------------------------------------------------------
/dealornodeal/neural/__init__.py:
--------------------------------------------------------------------------------
 1 | import onmt
 2 | 
 3 | def get_data_generator(args, model_args, schema, test=False):
 4 |     from cocoa.core.scenario_db import ScenarioDB
 5 |     from cocoa.core.dataset import read_dataset
 6 |     from cocoa.core.util import read_json
 7 | 
 8 |     from core.scenario import Scenario
 9 |     from core.lexicon import Lexicon
10 |     from preprocess import DataGenerator, Preprocessor
11 |     import os.path
12 | 
13 |     # TODO: move this to dataset
14 |     dataset = read_dataset(args, Scenario)
15 | 
16 |     mappings_path = model_args.mappings
17 | 
18 |     lexicon = Lexicon(schema.values['item'])
19 |     preprocessor = Preprocessor(schema, lexicon, model_args.entity_encoding_form,
20 |         model_args.entity_decoding_form, model_args.entity_target_form,
21 |         model=model_args.model)
22 | 
23 |     if test:
24 |         model_args.dropout = 0
25 |         train, dev, test = None, None, dataset.test_examples
26 |     else:
27 |         train, dev, test = dataset.train_examples, dataset.test_examples, None
28 |     data_generator = DataGenerator(train, dev, test, preprocessor, args, schema, mappings_path,
29 |         cache=args.cache, ignore_cache=args.ignore_cache,
30 |         num_context=model_args.num_context,
31 |         batch_size=args.batch_size,
32 |         model=model_args.model)
33 | 
34 |     return data_generator
35 | 
36 | def check_model_args(args):
37 |     if args.pretrained_wordvec:
38 |         if isinstance(args.pretrained_wordvec, list):
39 |             pretrained = args.pretrained_wordvec[0]
40 |         else:
41 |             pretrained = args.pretrained_wordvec
42 |         with open(pretrained, 'r') as fin:
43 |             pretrained_word_embed_size = len(fin.readline().strip().split()) - 1
44 |         assert pretrained_word_embed_size == args.word_embed_size
45 | 
46 |         if args.context and args.context_encoder == 'bow':
47 |             assert pretrained_word_embed_size == args.context_size
48 | 
49 |     if args.decoder == 'rnn-attn':
50 |         assert args.attention_memory is not None
51 | 
52 |     if args.num_context > 0:
53 |         assert not args.stateful
54 | 
55 |     assert args.temperature >= 0
56 | 
57 | def make_model_mappings(model, mappings):
58 |     if model == 'seq2lf':
59 |         mappings['src_vocab'] = mappings['utterance_vocab']
60 |         mappings['tgt_vocab'] = mappings['lf_vocab']
61 |     else:
62 |         mappings['src_vocab'] = mappings['utterance_vocab']
63 |         mappings['tgt_vocab'] = mappings['utterance_vocab']
64 |     return mappings
65 | 
66 | def build_optim(opt, model, checkpoint):
67 |     print('Making optimizer for training.')
68 |     optim = onmt.Optim(
69 |         opt.optim, opt.learning_rate, opt.max_grad_norm,
70 |         model_size=opt.rnn_size)
71 | 
72 |     optim.set_parameters(model.parameters())
73 | 
74 |     return optim
75 | 


--------------------------------------------------------------------------------
/dealornodeal/neural/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from cocoa.neural.models import NMTModel
 4 | 
 5 | class NegotiationModel(NMTModel):
 6 | 
 7 |     def __init__(self, encoder, decoder, context_embedder, selectors, scene_settings,
 8 |             dropout, stateful=False):
 9 |         super(NegotiationModel, self).__init__(encoder, decoder, stateful=stateful)
10 |         self.context_embedder = context_embedder
11 |         self.kb_embedder = nn.Embedding(*scene_settings)
12 |         self.dropout = dropout
13 |         self.select_encoder = selectors['enc']   # 1 encoder
14 |         self.select_decoders = selectors['dec']  # 6 decoders
15 | 
16 |     def forward(self, src, tgt, context, scene, lengths, dec_state=None, enc_state=None, tgt_lengths=None):
17 |         # ---- ENCODING PROCESS -----
18 |         enc_final, enc_memory_bank = self.encoder(src, lengths, enc_state)
19 |         # the memory bas are the RNN hidden states
20 |         context_output, context_memory_bank = self.context_embedder(context)
21 |         scene_memory_bank = self.kb_embedder(scene)
22 | 
23 |         # memory_banks are each (seq_len x batch_size x hidden_size)
24 |         memory_banks = [enc_memory_bank, context_memory_bank, scene_memory_bank]
25 | 
26 |         # ---- DECODING PROCESS ----
27 |         enc_state = self.decoder.init_decoder_state(src, enc_memory_bank, enc_final)
28 |         dec_state = enc_state if dec_state is None else dec_state
29 |         outputs, dec_state, attns = self.decoder(tgt, memory_banks,
30 |                 dec_state, memory_lengths=lengths, lengths=tgt_lengths)
31 | 
32 |         return outputs, attns, dec_state
33 | 


--------------------------------------------------------------------------------
/dealornodeal/neural/symbols.py:
--------------------------------------------------------------------------------
 1 | from cocoa.neural.symbols import Marker as BaseMarker
 2 | 
 3 | # Facebook Negotiation
 4 | class Marker(BaseMarker):
 5 |     # Sequence
 6 |     GO = '<go>'
 7 | 
 8 |     # Actions
 9 |     SELECT = '<select>'
10 |     # OFFER = '<offer>'
11 |     # ACCEPT = '<accept>'
12 |     # REJECT = '<reject>'
13 |     QUIT = '<quit>'
14 | 
15 | markers = Marker
16 | sequence_markers = [markers.EOS, markers.GO, markers.PAD]
17 | action_markers = [markers.SELECT, markers.QUIT]
18 | 


--------------------------------------------------------------------------------
/dealornodeal/neural/utterance.py:
--------------------------------------------------------------------------------
 1 | from cocoa.neural.utterance import Utterance
 2 | from cocoa.neural.utterance import UtteranceBuilder as BaseUtteranceBuilder
 3 | 
 4 | from symbols import markers
 5 | from cocoa.core.entity import is_entity
 6 | 
 7 | class UtteranceBuilder(BaseUtteranceBuilder):
 8 |     """
 9 |     Build a word-based utterance from the batch output
10 |     of generator and the underlying dictionaries.
11 |     """
12 |     def scene_to_sent(self, variables, vocab):
13 |         sent_ids = variables.data.cpu().numpy()
14 |         # <pad> token removed from kb_vocab, so no need to check
15 |         # pad_id = vocab.to_ind(markers.PAD)
16 |         sent_words = [vocab.to_word(x) for x in sent_ids]
17 |         title = "KB SCENARIO:"
18 |         book = "  Book count: {}, value: {}".format(sent_words[0], sent_words[1])
19 |         hat = "  Hat count: {}, value: {}".format(sent_words[2], sent_words[3])
20 |         ball = "  Ball count: {}, value: {}".format(sent_words[4], sent_words[5])
21 |         return [title, book, hat, ball]
22 | 
23 |     def selection_to_sent(self, variables, vocab):
24 |         select_ids = variables.data.cpu().numpy()
25 |         sel = [vocab.to_word(x) for x in select_ids]
26 | 
27 |         title = "OUTCOME PRED:"
28 |         mine = "  My book: {}, hat: {}, ball {}".format(sel[0], sel[1], sel[2])
29 |         theirs = "  Their book: {}, hat: {}, ball: {}".format(sel[3], sel[4], sel[5])
30 |         return [title, mine, theirs]
31 | 
32 |     def _entity_to_str(self, entity_token, kb):
33 |         # there is no price scaling here, so we can just return the entity
34 |         return str(entity_token.canonical.value)
35 | 
36 | 


--------------------------------------------------------------------------------
/dealornodeal/neural/vocab_builder.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.vocab import Vocabulary
 2 | from cocoa.neural.vocab_builder import build_utterance_vocab
 3 | 
 4 | from symbols import markers, sequence_markers
 5 | 
 6 | def build_kb_vocab(dialogues, special_symbols=[]):
 7 |     kb_vocab = Vocabulary(offset=0, unk=False)
 8 |     for dialogue in dialogues:
 9 |         assert dialogue.is_int is False
10 |         kb_vocab.add_words(dialogue.scenario)
11 | 
12 |     kb_vocab.add_words(special_symbols, special=True)
13 |     kb_vocab.finish()
14 | 
15 |     print 'KB vocab size:', kb_vocab.size
16 |     return kb_vocab
17 | 
18 | def build_lf_vocab(dialogues):
19 |     vocab = Vocabulary(offset=0, unk=True)
20 |     for dialogue in dialogues:
21 |         assert dialogue.is_int is False
22 |         for lf in dialogue.lfs:
23 |             vocab.add_words(lf)
24 |     vocab.add_words(sequence_markers, special=True)
25 |     vocab.finish()
26 |     print 'LF vocabulary size:', vocab.size
27 |     return vocab
28 | 
29 | def create_mappings(dialogues, schema, entity_forms):
30 |     utterance_vocab = build_utterance_vocab(dialogues, sequence_markers, entity_forms)
31 |     kb_vocab = build_kb_vocab(dialogues)
32 |     return {'utterance_vocab': utterance_vocab,
33 |             'kb_vocab': kb_vocab,
34 |             }
35 | 


--------------------------------------------------------------------------------
/dealornodeal/onmt:
--------------------------------------------------------------------------------
1 | ../onmt/


--------------------------------------------------------------------------------
/dealornodeal/reinforce.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Takes two agent (Session) implementations, generates the dialogues,
 3 | and run REINFORCE.
 4 | """
 5 | 
 6 | import argparse
 7 | import random
 8 | import json
 9 | import numpy as np
10 | 
11 | from onmt.Utils import use_gpu
12 | 
13 | from cocoa.core.util import read_json
14 | from cocoa.core.schema import Schema
15 | from cocoa.core.scenario_db import ScenarioDB
16 | from cocoa.neural.loss import ReinforceLossCompute
17 | import cocoa.options
18 | 
19 | from core.scenario import Scenario
20 | from core.controller import Controller
21 | from systems import get_system
22 | from neural.rl_trainer import RLTrainer
23 | from neural import build_optim
24 | import options
25 | 
26 | def make_loss(opt, model, tgt_vocab):
27 |     loss = ReinforceLossCompute(model.generator, tgt_vocab)
28 |     if use_gpu(opt):
29 |         loss.cuda()
30 |     return loss
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     parser = argparse.ArgumentParser(conflict_handler='resolve')
35 |     parser.add_argument('--agents', help='What kind of agent to use. The first agent is always going to be updated and the second is fixed.', nargs='*', required=True)
36 |     parser.add_argument('--agent-checkpoints', nargs='+', help='Directory to learned models')
37 |     parser.add_argument('--random-seed', help='Random seed', type=int, default=1)
38 |     parser.add_argument('--verbose', default=False, action='store_true', help='Whether or not to have verbose prints')
39 |     parser.add_argument('--valid-scenarios-path', help='Output path for the validation scenarios')
40 |     cocoa.options.add_scenario_arguments(parser)
41 |     options.add_system_arguments(parser)
42 |     options.add_rl_arguments(parser)
43 |     options.add_model_arguments(parser)
44 |     args = parser.parse_args()
45 | 
46 |     if args.random_seed:
47 |         random.seed(args.random_seed)
48 |         np.random.seed(args.random_seed)
49 | 
50 |     schema = Schema(args.schema_path)
51 |     scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario)
52 |     valid_scenario_db = ScenarioDB.from_dict(schema, read_json(args.valid_scenarios_path), Scenario)
53 | 
54 |     assert len(args.agent_checkpoints) <= len(args.agents)
55 |     systems = [get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents)]
56 | 
57 |     rl_agent = 0
58 |     system = systems[rl_agent]
59 |     model = system.env.model
60 |     loss = make_loss(args, model, system.mappings['tgt_vocab'])
61 |     optim = build_optim(args, model, None)
62 | 
63 |     scenarios = {'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list}
64 |     trainer = RLTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward)
65 |     trainer.learn(args)
66 | 


--------------------------------------------------------------------------------
/dealornodeal/scripts/analyze.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from cocoa.core.schema import Schema
 4 | from cocoa.core.dataset import read_examples
 5 | from cocoa.model.dialogue_parser import parse_example
 6 | from cocoa.analysis.analyzer import Analyzer
 7 | 
 8 | from core.scenario import Scenario
 9 | from core.lexicon import Lexicon
10 | from model.generator import Templates
11 | from model.manager import Manager
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--transcripts', nargs='*', help='JSON transcripts to extract templates')
16 |     parser.add_argument('--max-examples', default=-1, type=int)
17 |     parser.add_argument('--templates', help='Path to load templates')
18 |     parser.add_argument('--policy', help='Path to load model')
19 |     parser.add_argument('--schema-path', help='Path to schema')
20 |     parser.add_argument('--agent', help='Only consider examples with the given type of agent')
21 |     args = parser.parse_args()
22 | 
23 |     schema = Schema(args.schema_path)
24 |     lexicon = Lexicon(schema.values['item'])
25 |     #templates = Templates.from_pickle(args.templates)
26 |     templates = Templates()
27 |     manager = Manager.from_pickle(args.policy)
28 |     analyzer = Analyzer(lexicon)
29 | 
30 |     # TODO: skip examples
31 |     examples = read_examples(args.transcripts, args.max_examples, Scenario)
32 |     agent = args.agent
33 |     if agent is not None:
34 |         examples = [e for e in examples if agent in e.agents.values()]
35 |     analyzer.example_stats(examples, agent=agent)
36 |     #import sys; sys.exit()
37 | 
38 |     parsed_dialogues = []
39 |     for example in examples:
40 |         utterances = parse_example(example, lexicon, templates)
41 |         parsed_dialogues.append(utterances)
42 | 
43 |     analyzer.parser_stats(parsed_dialogues, agent=agent)
44 |     #analyzer.manager_stats(manager)
45 | 


--------------------------------------------------------------------------------
/dealornodeal/scripts/create_scenarios.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import copy
 4 | from cocoa.core.scenario_db import ScenarioDB
 5 | from cocoa.core.schema import Schema
 6 | from cocoa.core.util import generate_uuid, write_json, read_json
 7 | from core.kb import KB
 8 | from core.scenario import Scenario
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--schema-path')
12 | parser.add_argument('--scenario-ints-file', help='Path to the file containing 6 integers per line that describes the scenario')
13 | parser.add_argument('--output', help='Path to the output JSON scenario file')
14 | args = parser.parse_args()
15 | 
16 | schema = Schema(args.schema_path)
17 | 
18 | scenarios = []
19 | with open(args.scenario_ints_file) as fin:
20 |     kbs = []
21 |     names = ['book', 'hat', 'ball']
22 |     for line in fin:
23 |         ints = [int(x) for x in line.strip().split()]
24 |         kb = KB.from_ints(schema.attributes, names, ints)
25 |         kbs.append(kb)
26 |         if len(kbs) == 2:
27 |             if kbs[0].item_counts != kbs[1].item_counts:
28 |                 del kbs[0]
29 |                 continue
30 |             assert kbs[0].item_counts == kbs[1].item_counts
31 |             scenario = Scenario(generate_uuid("FB"), schema.attributes, kbs)
32 |             scenarios.append(scenario)
33 |             kbs = []
34 | 
35 | scenario_db = ScenarioDB(scenarios)
36 | write_json(scenario_db.to_dict(), args.output)
37 | 


--------------------------------------------------------------------------------
/dealornodeal/scripts/fb_data_to_int_scenario.py:
--------------------------------------------------------------------------------
1 | import sys
2 | data_file = sys.argv[1]
3 | out_file = sys.argv[2]
4 | 
5 | with open(data_file, 'r') as fin, open(out_file, 'w') as fout:
6 |     for line in fin:
7 |         ss = line.strip().split()
8 |         fout.write(' '.join(ss[1:7]) + '\n')
9 | 


--------------------------------------------------------------------------------
/dealornodeal/sessions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/sessions/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/sessions/cmd_session.py:
--------------------------------------------------------------------------------
 1 | from session import Session
 2 | 
 3 | class CmdSession(Session):
 4 |     def __init__(self, agent, kb):
 5 |         super(CmdSession, self).__init__(agent)
 6 |         self.kb = kb
 7 |         print("End game using <select> x y z; which corresponds to book, hat, ball")
 8 | 
 9 |     def send(self):
10 |         message = raw_input()
11 |         event = self.parse_input(message)
12 |         return event
13 | 
14 |     def parse_input(self, message):
15 |         """Parse user input from the command line.
16 |         Args:  message (str)
17 |         Returns: Event
18 |         """
19 |         raw_tokens = message.split()
20 |         tokens = self.remove_nonprintable(raw_tokens)
21 | 
22 |         print tokens
23 | 
24 |         if len(tokens) >= 2 and tokens[0] == '<select>':
25 |             t = [int(token) for idx, token in enumerate(tokens) if idx > 0]
26 |             proposal = {'book': t[0], 'hat': t[1], 'ball': t[2]}
27 |             return self.select(proposal)
28 |         elif tokens[0] == '<quit>':
29 |             return self.quit()
30 |         else:
31 |             return self.message(" ".join(tokens))
32 | 
33 |     def receive(self, event):
34 |         print event.data
35 | 


--------------------------------------------------------------------------------
/dealornodeal/sessions/session.py:
--------------------------------------------------------------------------------
 1 | from cocoa.sessions.session import Session as BaseSession
 2 | from core.event import Event
 3 | 
 4 | class Session(BaseSession):
 5 |     def select(self, outcome, metadata=None):
 6 |         return Event.SelectEvent(self.agent, data=outcome, time=self.timestamp(), metadata=metadata)
 7 | 
 8 |     def quit(self, metadata=None):
 9 |         return Event.QuitEvent(self.agent, time=self.timestamp(), metadata=metadata)
10 | 


--------------------------------------------------------------------------------
/dealornodeal/systems/__init__.py:
--------------------------------------------------------------------------------
 1 | from core.lexicon import Lexicon
 2 | from model.manager import Manager
 3 | from model.generator import Templates, Generator
 4 | 
 5 | from rulebased_system import RulebasedSystem
 6 | from cmd_system import CmdSystem
 7 | from neural_system import FBNeuralSystem, PytorchNeuralSystem
 8 | from hybrid_system import HybridSystem
 9 | 
10 | def get_system(name, args, schema=None, timed=False, model_path=None):
11 |     lexicon = Lexicon(schema.values['item'])
12 |     if name == 'rulebased':
13 |         templates = Templates.from_pickle(args.templates)
14 |         generator = Generator(templates)
15 |         manager = Manager.from_pickle(args.policy)
16 |         return RulebasedSystem(lexicon, generator, manager, timed)
17 |     elif name == 'hybrid':
18 |         assert model_path
19 |         templates = Templates.from_pickle(args.templates)
20 |         manager = PytorchNeuralSystem(args, schema, lexicon, model_path, timed)
21 |         generator = Generator(templates)
22 |         return HybridSystem(lexicon, generator, manager, timed)
23 |     elif name == 'cmd':
24 |         return CmdSystem()
25 |     elif name == 'fb-neural':
26 |         assert model_path
27 |         return FBNeuralSystem(model_path, args.temperature, timed_session=timed, gpu=False)
28 |     elif name == 'pt-neural':
29 |         assert model_path
30 |         return PytorchNeuralSystem(args, schema, lexicon, model_path, timed)
31 |     else:
32 |         raise ValueError('Unknown system %s' % name)
33 | 


--------------------------------------------------------------------------------
/dealornodeal/systems/cmd_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.system import System as BaseSystem
 2 | from sessions.cmd_session import CmdSession
 3 | 
 4 | class CmdSystem(BaseSystem):
 5 |     def __init__(self):
 6 |         super(CmdSystem, self).__init__()
 7 | 
 8 |     @classmethod
 9 |     def name(cls):
10 |         return 'cmd'
11 | 
12 |     def new_session(self, agent, kb):
13 |         return CmdSession(agent, kb)
14 | 


--------------------------------------------------------------------------------
/dealornodeal/systems/hybrid_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem
 2 | from sessions.hybrid_session import HybridSession
 3 | 
 4 | class HybridSystem(BaseRulebasedSystem):
 5 | 
 6 |     def _new_session(self, agent, kb, use_rl=False):
 7 |         self.manager.timed_session = False
 8 |         manager_session = self.manager.new_session(agent, kb)
 9 |         return HybridSession(agent, kb, self.lexicon, None,
10 |                 self.generator, manager_session)
11 | 
12 |     @classmethod
13 |     def name(cls):
14 |         return 'hybrid'
15 | 
16 | 


--------------------------------------------------------------------------------
/dealornodeal/systems/rulebased_system.py:
--------------------------------------------------------------------------------
1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem
2 | from sessions.rulebased_session import RulebasedSession
3 | 
4 | class RulebasedSystem(BaseRulebasedSystem):
5 |     def _new_session(self, agent, kb, config=None):
6 |         return RulebasedSession(agent, kb, self.lexicon, config, self.generator, self.manager)
7 | 


--------------------------------------------------------------------------------
/dealornodeal/turk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/turk/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/turk/eval_data.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | 
 3 | from cocoa.turk.eval_data import EvalData as BaseEvalData, add_eval_data_arguments
 4 | 
 5 | class EvalData(BaseEvalData):
 6 |     @classmethod
 7 |     def get_agent_name(cls, turns):
 8 |         names = []
 9 |         for utterance in turns:
10 |             if utterance[0] == '<go-b>':
11 |                 role = 'buyer'
12 |             elif utterance[0] == '<go-s>':
13 |                 role = 'seller'
14 |             elif len(names) > 0:
15 |                 role = 'buyer' if names[-1] == 'seller' else 'seller'
16 |             else:
17 |                 raise Exception('Cannot infer agent name')
18 |             names.append(role)
19 |         return names
20 | 
21 |     @classmethod
22 |     def process_utterance(cls, utterance, role=''):
23 |         tokens = []
24 |         for w in utterance:
25 |             if not isinstance(w, basestring):
26 |                 if w[1][1] == 'price' or w[1] == 'price':
27 |                     tokens.append('PRICE')
28 |                 else:
29 |                     raise ValueError
30 |             elif w in ('<select>', '<quit>'):
31 |                 tokens.append(w[1:-1].upper())
32 |             # Category markers
33 |             elif len(w) > 2 and w[0] == '<' and w[-1] == '>':
34 |                 continue
35 |             # De-tokenize
36 |             elif (w in string.punctuation or "'" in w) and len(tokens) > 0:
37 |                 tokens[-1] += w
38 |             else:
39 |                 tokens.append(w)
40 |         return super(EvalData, cls).process_utterance(' '.join(tokens), role)
41 | 
42 |     @classmethod
43 |     def valid_example(cls, example, num_context_utterances):
44 |         last_utterance = example['prev_turns'][-1]
45 |         if '<select>' in last_utterance:
46 |             return False
47 |         return super(EvalData, cls).valid_example(example, num_context_utterances)
48 | 


--------------------------------------------------------------------------------
/dealornodeal/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/web/app_params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status_params": {
 3 | 	"waiting": {
 4 |       "num_seconds": 210 
 5 |   },
 6 | 	"chat": {
 7 | 	    "num_seconds": 1200
 8 | 	},
 9 | 
10 | 	"finished": {
11 | 	    "num_seconds": -1
12 | 	},
13 | 
14 | 	"survey": {
15 | 	    "num_seconds": -1
16 | 	},
17 | 	"redirected": {
18 | 	    "num_seconds": -1
19 | 	},
20 | 	"incomplete": {
21 | 	    "num_seconds": -1
22 | 	},
23 | 	"reporting": {
24 | 	    "num_seconds": -1
25 | 	}
26 |     },
27 |     "connection_timeout_num_seconds": 3,
28 |     "idle_timeout_num_seconds": 210,
29 |     "templates_dir": "web/templates",
30 |     "images_base": "web/static/images",
31 |     "instructions": "web/templates/instructions.html",
32 |     "task_title": "Deal or No Deal",
33 |     "icon": "handshake.jpg",
34 |     "end_survey": 1,
35 |     "skip_chat_enabled": false,
36 |     "debug": false,
37 |     "num_chats_per_scenario": {"rulebased": 1, "neural": 1, "human": 0},
38 |     "models": {
39 |         "rulebased": {
40 |             "active": true,
41 |             "type": "rulebased"
42 |         },
43 |         "neural": {
44 |             "active": true,
45 |             "type": "neural"
46 |         }
47 |     }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/dealornodeal/web/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/main/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/web/main/utils.py:
--------------------------------------------------------------------------------
1 | from cocoa.web.main.utils import Messages as BaseMessages
2 | 
3 | class Messages(BaseMessages):
4 |     ChatCompleted = "Great, you were able to successfully reach a deal!"
5 |     ChatIncomplete = "Sorry, you weren't able to reach a deal. :("
6 |     Redirect = "Sorry, you and your partner entered mismatched offers."
7 | 


--------------------------------------------------------------------------------
/dealornodeal/web/static/img/ball.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/static/img/ball.png


--------------------------------------------------------------------------------
/dealornodeal/web/static/img/book.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/static/img/book.png


--------------------------------------------------------------------------------
/dealornodeal/web/static/img/handshake.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/static/img/handshake.jpg


--------------------------------------------------------------------------------
/dealornodeal/web/static/img/hat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/static/img/hat.png


--------------------------------------------------------------------------------
/dealornodeal/web/templates/finished.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>Thank You!</title>
 4 |         <!--<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/bootstrap.min.css')}}">-->
 5 |         <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
 6 |         <link rel="stylesheet" type="text/css" href="../static/css/chat.css">
 7 |         <style>
 8 |             #content > p {
 9 |                 font-size:18px;
10 |                 margin-bottom: 40px 70px;
11 |             }
12 |         </style>
13 |         <script type="text/javascript" src="//code.jquery.com/jquery-1.4.2.min.js"></script>
14 |         <script type="text/javascript" charset="utf-8">
15 |         	var BASE_URL = 'http://' + document.domain + ':' + location.port;
16 |         	function visualize() {
17 |         		window.location.href = BASE_URL +'/visualize?uid={{uid}}';
18 |         	}
19 |         </script>
20 |     </head>
21 |     <body>
22 |     <div id="content">
23 |         <h2>Deal or No Deal?</h2>
24 |         <h4>{{ finished_message }}</h4>
25 | 	</div>
26 | 	<div id="mturk">
27 | 		{% if mturk_code is not none %}
28 | 		<hr>
29 | 		<h3>Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}</h3>
30 | 		{% endif %}
31 | 		<hr>
32 | 		{% if visualize %}
33 | 		<h4><a href="javascript:visualize();">Click here</a> to visualize the dialogue you just completed</h4>
34 | 		{% endif %}
35 | 	</div>
36 | 
37 |     </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/dealornodeal/web/templates/instructions.html:
--------------------------------------------------------------------------------
1 | <h4 class="help">Help Text</h4>
2 | <ul class="help">
3 |   <li>Please do not leave the chat unattended. If you are inactive for more than 3 minutes your connection will time out.</li>
4 |   <li>Be careful. You can only mark the deal as agreed <b>once</b>.</li>
5 |   <li>Remember that if no deal is reached, both sides get no points.</li>
6 |   <li>Use the button on the lower right to report any problems with the interface.</li>
7 | </ul>


--------------------------------------------------------------------------------
/dealornodeal/web/templates/visualize.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html>
2 | <html lang="en">
3 | {{ dialogue }}
4 | </html>


--------------------------------------------------------------------------------
/dealornodeal/web/views/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/dealornodeal/web/views/__init__.py


--------------------------------------------------------------------------------
/dealornodeal/web/views/action.py:
--------------------------------------------------------------------------------
 1 | from flask import Blueprint, jsonify, request
 2 | from cocoa.web.views.utils import userid, format_message
 3 | from web.main.backend import get_backend
 4 | 
 5 | action = Blueprint('action', __name__)
 6 | 
 7 | @action.route('/_offer/', methods=['GET'])
 8 | def offer():
 9 |     backend = get_backend()
10 |     price = float(request.args.get('price'))
11 |     sides = request.args.get('sides')
12 | 
13 |     offer = {'price': price,
14 |              'sides': sides}
15 | 
16 |     if offer is None or price == -1:
17 |         return jsonify(message=format_message("You made an invalid offer. Please try again.", True))
18 |     backend.make_offer(userid(), offer)
19 | 
20 |     displayed_message = format_message("You made an offer!", True)
21 |     return jsonify(message=displayed_message)
22 | 
23 | 
24 | @action.route('/_select/', methods=['GET'])
25 | def select():
26 |     backend = get_backend()
27 |     book = int(request.args.get('book-split'))
28 |     hat = int(request.args.get('hat-split'))
29 |     ball = int(request.args.get('ball-split'))
30 | 
31 |     proposal = {'book': book, 'hat': hat, 'ball': ball}
32 |     backend.select(userid(), proposal)
33 | 
34 |     msg = format_message("You selected items and marked deal as agreed!", True)
35 |     return jsonify(message=msg)
36 | 
37 | @action.route('/_reject/', methods=['GET'])
38 | def reject():
39 |     backend = get_backend()
40 |     backend.reject(userid())
41 | 
42 |     msg = format_message("You declared there was no deal!", True)
43 |     return jsonify(message=msg)
44 | 
45 | 
46 | @action.route('/_quit/', methods=['GET'])
47 | def quit():
48 |     backend = get_backend()
49 |     backend.quit(userid())
50 |     displayed_message = format_message("You chose to quit this task.", True)
51 |     return jsonify(message=displayed_message)
52 | 


--------------------------------------------------------------------------------
/mutualfriends/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/analysis/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/analysis/html_visualizer.py:
--------------------------------------------------------------------------------
 1 | from cocoa.analysis.html_visualizer import HTMLVisualizer as BaseHTMLVisualizer
 2 | 
 3 | class HTMLVisualizer(BaseHTMLVisualizer):
 4 |     agent_labels = {'human': 'Human', 'rulebased': 'Rule-based', 'static-neural': 'StanoNet', 'dynamic-neural': 'DynoNet', 'rule_bot': 'Rule-based', 'neural': 'Neural'}
 5 |     #questions = ("fluent", "correct", 'cooperative', "humanlike")
 6 |     questions = ('cooperative', "humanlike")
 7 | 
 8 |     @classmethod
 9 |     def render_scenario(cls, scenario, img_path=None):
10 |         html = ["<div class=\"scenario\">", '<div class=\"divTitle\">Scenario %s</div>' % scenario.uuid]
11 |         for (idx, kb) in enumerate(scenario.kbs):
12 |             kb_dict = kb.to_dict()
13 |             attributes = [attr.name for attr in scenario.attributes]
14 |             scenario_alphas = scenario.alphas
15 |             if len(scenario_alphas) == 0:
16 |                 scenario_alphas = ['default' * len(scenario.attributes)]
17 |             alphas = dict((attr.name, alpha) for (attr, alpha) in zip(scenario.attributes, scenario_alphas))
18 |             html.append("<div class=\"kb%d\"><table><tr>"
19 |                         "<td colspan=\"%d\" class=\"agentLabel\">Agent %d</td></tr>" % (idx, len(attributes), idx))
20 | 
21 |             for attr in attributes:
22 |                 html.append("<th>%s (%.1f)</th>" % (attr, alphas[attr]))
23 |             html.append("</tr>")
24 | 
25 |             for item in kb_dict:
26 |                 html.append("<tr>")
27 |                 for attr in attributes:
28 |                     html.append("<td>%s</td>" % item[attr])
29 |                 html.append("</tr>")
30 | 
31 |             html.append("</table></div>")
32 | 
33 |         html.append("</div>")
34 |         return html
35 | 
36 |     @classmethod
37 |     def render_event(cls, event):
38 |         if event.action == 'select':
39 |             s = 'SELECT (' + ' || '.join(event.data.values()) + ')'
40 |         else:
41 |             s = super(HTMLVisualizer, cls).render_event(event)
42 |         return s
43 | 


--------------------------------------------------------------------------------
/mutualfriends/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/core/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/core/controller.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.controller import Controller as BaseController
 2 | 
 3 | class Controller(BaseController):
 4 |     def __init__(self, scenario, sessions, chat_id=None):
 5 |         super(Controller, self).__init__(scenario, sessions, chat_id, allow_cross_talk=True)
 6 |         self.selections = [None, None]
 7 | 
 8 |     def event_callback(self, event):
 9 |         if event.action == 'select':
10 |             self.selections[event.agent] = event.data
11 | 
12 |     def get_outcome(self):
13 |         if self.selections[0] is not None and self.selections[0] == self.selections[1]:
14 |             reward = 1
15 |         else:
16 |             reward = 0
17 |         return {'reward': reward}
18 | 
19 |     def game_over(self):
20 |         return not self.inactive() and self.selections[0] is not None and self.selections[0] == self.selections[1]
21 | 
22 |     def complete(self):
23 |         return self.selections[0] is not None and self.selections[0] == self.selections[1]
24 | 


--------------------------------------------------------------------------------
/mutualfriends/core/event.py:
--------------------------------------------------------------------------------
1 | from cocoa.core.event import Event as BaseEvent
2 | 
3 | class Event(BaseEvent):
4 |     @staticmethod
5 |     def SelectionEvent(agent, data, time=None):
6 |         return Event(agent, time, 'select', data)
7 | 
8 | 


--------------------------------------------------------------------------------
/mutualfriends/core/kb.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.kb import KB as BaseKB
 2 | 
 3 | class KB(BaseKB):
 4 |     def __init__(self, attributes, items):
 5 |         super(KB, self).__init__(attributes)
 6 |         self.items = items
 7 |         self.entity_set = set([value.lower() for item in items for value in item.values()])
 8 |         self.entity_type_set = set([attr.value_type for attr in self.attributes])
 9 | 
10 |     def to_dict(self):
11 |         return self.items
12 | 
13 |     @staticmethod
14 |     def from_dict(attributes, raw):
15 |         return KB(attributes, raw)
16 | 
17 |     def dump(self):
18 |         header_item = dict((attr.name, attr.name) for attr in self.attributes)
19 |         rows = [header_item] + self.items
20 |         widths = [max(len(str(row[attr.name])) for row in rows) for attr in self.attributes]
21 |         print '----------------'
22 |         for i, row in enumerate(rows):
23 |             id_ = '{:3s}'.format('') if i == 0 else '{:<3d}'.format(i-1)
24 |             print id_, ' ', '  '.join(('%%-%ds' % widths[i]) % (row[attr.name],) for i, attr in enumerate(self.attributes))
25 | 
26 |     def get_item(self, idx):
27 |         return self.items[idx]
28 | 


--------------------------------------------------------------------------------
/mutualfriends/core/scenario.py:
--------------------------------------------------------------------------------
 1 | from cocoa.core.scenario_db import Scenario as BaseScenario
 2 | from cocoa.core.schema import Attribute
 3 | from kb import KB
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def __init__(self, uuid, attributes, kbs, alphas=[]):
 7 |         super(Scenario, self).__init__(uuid, attributes, kbs)
 8 |         self.alphas = alphas
 9 | 
10 |     @staticmethod
11 |     def from_dict(schema, raw):
12 |         alphas = []
13 |         # compatibility with older data format
14 |         if schema is not None:
15 |             attributes = schema.attributes
16 |         else:
17 |             assert 'attributes' in raw
18 |         if 'attributes' in raw:
19 |             attributes = [Attribute.from_json(raw_attr) for raw_attr in raw['attributes']]
20 |         if 'alphas' in raw:
21 |             alphas = raw['alphas']
22 |         return Scenario(raw['uuid'], attributes, [KB.from_dict(attributes, kb) for kb in raw['kbs']], alphas)
23 | 
24 |     def to_dict(self):
25 |         d = super(Scenario, self).to_dict()
26 |         d['alphas'] = self.alphas
27 |         return d
28 | 


--------------------------------------------------------------------------------
/mutualfriends/core/tokenizer.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from cocoa.core.tokenizer import detokenize
 3 | 
 4 | def tokenize(utterance):
 5 |     '''
 6 |     'hi there!' => ['hi', 'there', '!']
 7 |     '''
 8 |     utterance = utterance.encode('utf-8').lower()
 9 |     # Remove '-' to match lexicon preprocess
10 |     for s in (' - ', '-'):
11 |         utterance = utterance.replace(s, ' ')
12 |     # Split on punctuation
13 |     tokens = re.findall(r"[\w']+|[.,!?;&-]", utterance)
14 |     return tokens
15 | 
16 | 


--------------------------------------------------------------------------------
/mutualfriends/data/friends-schema-old.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "values": {
 3 |     "major": ["Computer Science", "Physics", "Math", "Economics", "Linguistics"],
 4 |     "school": ["Stanford", "Columbia", "University of Pennsylvania", "University of California - Berkeley"],
 5 |     "company": ["Google", "Facebook", "Apple", "Uber", "Amazon"]
 6 |   },
 7 |   "attributes": [
 8 |     {"name": "Major", "value_type": "major", "unique": false},
 9 |     {"name": "School", "value_type": "school", "unique": false},
10 |     {"name": "Company", "value_type": "company", "unique": false}
11 |   ]
12 | }


--------------------------------------------------------------------------------
/mutualfriends/data/friends-schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "values": {
 3 |     "person": ["Alice", "Bob", "Carol", "Doug", "Edward", "Fiona", "Genevieve", "Herbert", "Ian", "Jill", "Kennedy", "Lisa", "Molly", "Nathan", "Oscar", "Paul"],
 4 |     "major": ["computer science", "mathematics", "philosophy", "linguistics"],
 5 |     "company": ["Microsoft", "Google", "Apple", "Facebook", "Uber", "Dropbox", "Intel", "Sony", "Panasonic"],
 6 |     "hobby": ["hiking", "surfing", "cooking", "reading", "biking"]
 7 |   },
 8 |   "attributes": [
 9 |     {"name": "Name", "value_type": "person", "unique": false},
10 |     {"name": "Company", "value_type": "company", "unique": false},
11 |     {"name": "Bachelors major", "value_type": "major", "unique": false},
12 |     {"name": "Hobby", "value_type": "hobby", "unique": false}
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/mutualfriends/model/dialogue_state.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.dialogue_state import DialogueState as State
 2 | 
 3 | class DialogueState(State):
 4 |     def __init__(self, agent, kb):
 5 |         super(DialogueState, self).__init__(agent, kb)
 6 |         self.entities = [[], []]
 7 |         self.exclude_entities = [[], []]
 8 |         self.mentioned_entities = set()
 9 |         self.recent_mentioned_entities = []
10 |         self.matched_item = None
11 |         self.selected_items = []
12 | 
13 |     @property
14 |     def my_entities(self):
15 |         return self.entities[self.agent]
16 | 
17 |     @my_entities.setter
18 |     def my_entities(self, entities):
19 |         self.entities[self.agent] = entities
20 | 
21 |     @property
22 |     def partner_entities(self):
23 |         return self.entities[self.partner]
24 | 
25 |     @property
26 |     def partner_exclude_entities(self):
27 |         return self.exclude_entities[self.partner]
28 | 
29 |     def update(self, agent, utterance):
30 |         super(DialogueState, self).update(agent, utterance)
31 |         lf = utterance.lf
32 |         if lf.intent != 'select':
33 |             if hasattr(lf, 'entities'):
34 |                 self.entities[agent] = lf.entities
35 |                 self.mentioned_entities.update(lf.entities)
36 |                 self.recent_mentioned_entities.extend(lf.entities)
37 |                 self.recent_mentioned_entities = self.recent_mentioned_entities[-10:]
38 |             if hasattr(lf, 'exclude_entities'):
39 |                 self.exclude_entities[agent] = lf.exclude_entities
40 |                 self.mentioned_entities.update(lf.exclude_entities)
41 |         else:
42 |             self.selected_items.append(lf.item)
43 |             if lf.matched:
44 |                 self.matched_item = lf.item
45 | 


--------------------------------------------------------------------------------
/mutualfriends/model/graph_embedder_config.py:
--------------------------------------------------------------------------------
 1 | class GraphEmbedderConfig(object):
 2 |     def __init__(self, node_embed_size, edge_embed_size, graph_metadata, entity_embed_size=None, use_entity_embedding=False, mp_iters=2, decay=1, msg_agg='sum', learned_decay=False):
 3 |         self.node_embed_size = node_embed_size
 4 | 
 5 |         self.num_edge_labels = graph_metadata.relation_map.size
 6 |         self.edge_embed_size = edge_embed_size
 7 | 
 8 |         # RNN output size
 9 |         self.utterance_size = graph_metadata.utterance_size
10 |         self.decay = decay
11 |         self.learned_decay = learned_decay
12 | 
13 |         # Size of input features from Graph
14 |         self.feat_size = graph_metadata.feat_size
15 | 
16 |         # Number of message passing iterations
17 |         self.mp_iters = mp_iters
18 |         self.msg_agg = msg_agg
19 | 
20 |         self.context_size = self.node_embed_size * mp_iters
21 |         # x2 because we encoder and decoder utterances are concatenated
22 |         self.context_size += (self.utterance_size * 2 + self.feat_size)
23 |         if use_entity_embedding:
24 |             self.context_size += entity_embed_size
25 | 
26 |         self.use_entity_embedding = use_entity_embedding
27 |         if use_entity_embedding:
28 |             self.num_entities = graph_metadata.entity_map.size
29 |             self.entity_embed_size = entity_embed_size
30 | 
31 |         # padding
32 |         self.pad_path_id = graph_metadata.PAD_PATH_ID
33 |         self.node_pad = graph_metadata.NODE_PAD
34 | 
35 | 


--------------------------------------------------------------------------------
/mutualfriends/model/manager.py:
--------------------------------------------------------------------------------
 1 | from cocoa.model.manager import Manager as BaseManager
 2 | 
 3 | class Manager(BaseManager):
 4 |     def available_actions(self, state):
 5 |         actions = super(Manager, self).available_actions(state)
 6 |         masked_actions = ['negative']
 7 |         actions = [a for a in actions if not a in masked_actions]
 8 |         return actions
 9 | 
10 |     def choose_action(self, state, context=None):
11 |         if state.matched_item:
12 |             return 'select'
13 |         action = super(Manager, self).choose_action(state, context)
14 |         return action
15 | 


--------------------------------------------------------------------------------
/mutualfriends/results.txt:
--------------------------------------------------------------------------------
 1 | 2017-12-26-neural
 2 | 2017-12-26-rulebased
 3 | 2017-12-26-test
 4 | defaultdict(<type 'int'>, {(u'human', u'rulebased'): 209, (u'dynamic-neural', u'human'): 162, (u'human', u'human'): 236})
 5 | <start>    0.1599 0.0000
 6 | inquire    0.1767 0.0000
 7 | unknown    0.0505 0.0000
 8 | negative   0.1415 0.0000
 9 | greet      0.1171 0.1599
10 | inform     0.3542 0.8401
11 | DYNAMIC-NEURAL
12 | success per select 0.27
13 | success per turn 0.05
14 | success         0.89
15 | RULEBASED
16 | success per select 0.36
17 | success per turn 0.05
18 | success         0.81
19 | HUMAN
20 | success per select 0.42
21 | success per turn 0.07
22 | success         0.93
23 | =========== mean ===========
24 | dynamic-neural [20 22 14 53 53]
25 | rulebased [33 40 19 52 65]
26 | human [  8  15  25 121 298]
27 | dynamic-neural [15 19 15 45 68]
28 | rulebased [23 27 16 62 81]
29 | human [ 15  22  18 139 273]
30 | ============= Cooperation ===============
31 | agent        avg_score  error      #score     win
32 | ---------------------------------------
33 | DynoNet      3.6        0.11       162
34 | Rule-based   3.4        0.10       209
35 | Human        4.5        0.04       467        dr
36 | ============= Human-likeness ===============
37 | agent        avg_score  error      #score     win
38 | ---------------------------------------
39 | DynoNet      3.8        0.11       162
40 | Rule-based   3.7        0.10       209
41 | Human        4.4        0.05       467        dr
42 | 


--------------------------------------------------------------------------------
/mutualfriends/scripts/analyze.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from cocoa.core.schema import Schema
 4 | from cocoa.core.dataset import read_examples
 5 | from cocoa.model.dialogue_parser import parse_example
 6 | from cocoa.analysis.analyzer import Analyzer
 7 | 
 8 | from core.scenario import Scenario
 9 | from core.lexicon import Lexicon, add_lexicon_arguments
10 | from model.generator import Templates
11 | from model.manager import Manager
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--transcripts', nargs='*', help='JSON transcripts to extract templates')
16 |     parser.add_argument('--max-examples', default=-1, type=int)
17 |     parser.add_argument('--templates', help='Path to load templates')
18 |     parser.add_argument('--policy', help='Path to load model')
19 |     parser.add_argument('--schema-path', help='Path to schema')
20 |     parser.add_argument('--agent', help='Only consider examples with the given type of agent')
21 |     add_lexicon_arguments(parser)
22 |     args = parser.parse_args()
23 | 
24 |     schema = Schema(args.schema_path)
25 |     lexicon = Lexicon(schema, False, stop_words=args.stop_words, lexicon_path=args.lexicon)
26 |     #templates = Templates.from_pickle(args.templates)
27 |     templates = Templates()
28 |     manager = Manager.from_pickle(args.policy)
29 |     analyzer = Analyzer(lexicon)
30 | 
31 |     examples = read_examples(args.transcripts, args.max_examples, Scenario)
32 |     agent = args.agent
33 |     if agent is not None:
34 |         examples = [e for e in examples if agent in e.agents.values()]
35 |     analyzer.example_stats(examples, agent=agent)
36 |     #import sys; sys.exit()
37 | 
38 |     parsed_dialogues = []
39 |     for example in examples:
40 |         utterances = parse_example(example, lexicon, templates)
41 |         parsed_dialogues.append(utterances)
42 | 
43 |     analyzer.parser_stats(parsed_dialogues, agent=agent)
44 |     #analyzer.manager_stats(manager)
45 | 


--------------------------------------------------------------------------------
/mutualfriends/sessions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/sessions/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/sessions/cmd_session.py:
--------------------------------------------------------------------------------
 1 | from session import Session
 2 | 
 3 | class CmdSession(Session):
 4 |     def __init__(self, agent, kb):
 5 |         super(CmdSession, self).__init__(agent)
 6 |         self.kb = kb
 7 | 
 8 |     def send(self):
 9 |         message = raw_input()
10 |         event = self.parse_input(message)
11 |         return event
12 | 
13 |     def parse_input(self, message):
14 |         """Parse user input from the command line.
15 | 
16 |         Args:
17 |             message (str)
18 | 
19 |         Returns:
20 |             Event
21 | 
22 |         """
23 |         tokens = message.split()
24 |         if len(tokens) >= 2 and tokens[0] == '<select>':
25 |             return self.select(self.kb.items[int(tokens[1])])
26 |         return self.message(message)
27 | 
28 |     def receive(self, event):
29 |         print event.data
30 | 


--------------------------------------------------------------------------------
/mutualfriends/sessions/session.py:
--------------------------------------------------------------------------------
 1 | from cocoa.sessions.session import Session as BaseSession
 2 | from core.event import Event
 3 | 
 4 | class Session(BaseSession):
 5 |     def select(self, item):
 6 |         """Select an item from the KB.
 7 | 
 8 |         Args:
 9 |             item ({attribute_name: attribute_value})
10 | 
11 |         Returns:
12 |             SelectionEvent
13 | 
14 |         """
15 |         return Event.SelectionEvent(self.agent, item, time=self.timestamp())
16 | 


--------------------------------------------------------------------------------
/mutualfriends/systems/__init__.py:
--------------------------------------------------------------------------------
 1 | from core.lexicon import Lexicon, add_lexicon_arguments
 2 | from model.manager import Manager
 3 | from model.generator import Templates, Generator
 4 | from core.inverse_lexicon import InverseLexicon, DefaultInverseLexicon
 5 | from rulebased_system import RulebasedSystem, add_rulebased_arguments
 6 | from neural_system import NeuralSystem, add_neural_system_arguments
 7 | from cmd_system import CmdSystem
 8 | 
 9 | def add_system_arguments(parser):
10 |     add_lexicon_arguments(parser)
11 |     add_neural_system_arguments(parser)
12 |     add_rulebased_arguments(parser)
13 | 
14 | def get_system(name, args, schema=None, timed=False, model_path=None):
15 |     if name in ('rulebased', 'neural'):
16 |         lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words, lexicon_path=args.lexicon)
17 |         if args.inverse_lexicon:
18 |             realizer = InverseLexicon.from_file(args.inverse_lexicon)
19 |         else:
20 |             realizer = DefaultInverseLexicon()
21 |     if name == 'rulebased':
22 |         templates = Templates.from_pickle(args.templates)
23 |         generator = Generator(templates)
24 |         manager = Manager.from_pickle(args.policy)
25 |         return RulebasedSystem(lexicon, generator, manager, timed)
26 |     elif name == 'neural':
27 |         assert args.model_path
28 |         return NeuralSystem(schema, lexicon, args.model_path, args.fact_check, args.decoding, realizer=realizer)
29 |     elif name == 'cmd':
30 |         return CmdSystem()
31 |     else:
32 |         raise ValueError('Unknown system %s' % name)
33 | 
34 | 


--------------------------------------------------------------------------------
/mutualfriends/systems/cmd_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.system import System as BaseSystem
 2 | from sessions.cmd_session import CmdSession
 3 | 
 4 | class CmdSystem(BaseSystem):
 5 |     def __init__(self):
 6 |         super(CmdSystem, self).__init__()
 7 | 
 8 |     @classmethod
 9 |     def name(cls):
10 |         return 'cmd'
11 | 
12 |     def new_session(self, agent, kb):
13 |         return CmdSession(agent, kb)
14 | 


--------------------------------------------------------------------------------
/mutualfriends/systems/heuristic_system.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'anushabala'
 2 | from cocoa.core.systems.system import System
 3 | from cocoa.core.sessions.mutualfriends.heuristic_session import HeuristicSession
 4 | 
 5 | def add_heuristic_system_arguments(parser):
 6 |     parser.add_argument('--joint-facts', default=False, action='store_true', help='Generate joint attributes, e.g., hiking and philosophy')
 7 |     parser.add_argument('--ask', default=False, action='store_true', help='Ask questions, e.g., do you have ...')
 8 | 
 9 | class HeuristicSystem(System):
10 |     def __init__(self, joint_facts, ask):
11 |         super(HeuristicSystem, self).__init__()
12 |         # Control difficulty
13 |         self.joint_facts = joint_facts
14 |         self.ask = ask
15 | 
16 |     @classmethod
17 |     def name(cls):
18 |         return 'heuristic'
19 | 
20 |     def new_session(self, agent, kb):
21 |         return HeuristicSession(agent, kb, self.joint_facts, self.ask)
22 | 


--------------------------------------------------------------------------------
/mutualfriends/systems/rulebased_system.py:
--------------------------------------------------------------------------------
 1 | from cocoa.systems.rulebased_system import RulebasedSystem as BaseRulebasedSystem, add_rulebased_arguments
 2 | from sessions.rulebased_session import RulebasedSession
 3 | 
 4 | class RulebasedSystem(BaseRulebasedSystem):
 5 | 
 6 |     def __init__(self, lexicon, generator, manager, timed_session, realizer=None):
 7 |         super(RulebasedSystem, self).__init__(lexicon, generator, manager, timed_session)
 8 |         self.realizer = realizer
 9 | 
10 |     def _new_session(self, agent, kb, config):
11 |         return RulebasedSession(agent, kb, self.lexicon, config, self.generator, self.manager, realizer=self.realizer)
12 | 


--------------------------------------------------------------------------------
/mutualfriends/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/web/app_params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status_params": {
 3 | 	    "waiting": {
 4 | 	        "num_seconds": 180
 5 | 	    },
 6 | 	    "chat": {
 7 | 	        "num_seconds": 300
 8 | 	    },
 9 | 	    "finished": {
10 | 	        "num_seconds": 15
11 | 	    },
12 | 	    "redirected": {
13 | 	        "num_seconds": -1
14 | 	    },
15 | 	    "incomplete": {
16 | 	        "num_seconds": -1
17 | 	    },
18 | 	    "reporting": {
19 | 	        "num_seconds": -1
20 | 	    },
21 |         "survey": {
22 | 	        "num_seconds": -1
23 | 	    }
24 |     },
25 | 
26 |     "scenarios_json_file": "data/test-scenarios.json",
27 |     "idle_timeout_num_seconds": 210,
28 |     "connection_timeout_num_seconds": 3,
29 |     "templates_dir": "web/templates",
30 |     "images_base": "web/static/images",
31 |     "end_survey": 1,
32 |     "instructions": "web/templates/instructions.html",
33 |     "task_title": "Who's Our Mutual Friend?",
34 |     "icon": "handshake.jpg",
35 |     "num_chats_per_scenario": {"rulebased": 1, "human": 1},
36 |     "debug": false,
37 |     "models": {
38 |         "rulebased": {
39 |             "active": true,
40 |             "type": "rulebased"
41 |         }
42 |     }
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/mutualfriends/web/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/main/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/web/main/db_reader.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from cocoa.core.util import write_json
 3 | from cocoa.web.main.db_reader import DatabaseReader as BaseDatabaseReader
 4 | 
 5 | class DatabaseReader(BaseDatabaseReader):
 6 |     @classmethod
 7 |     def process_event_data(cls, action, data):
 8 |         if action == 'select':
 9 |             data = json.loads(data)
10 |         return data
11 | 
12 |     # TODO: move this to cocoa. factor survey questions
13 |     @classmethod
14 |     def dump_surveys(cls, cursor, json_path):
15 |         questions = ['humanlike', 'cooperative', 'comments']
16 | 
17 |         cursor.execute('''SELECT * FROM survey''')
18 |         logged_surveys = cursor.fetchall()
19 |         survey_data = {}
20 |         agent_types = {}
21 | 
22 |         for survey in logged_surveys:
23 |             # todo this is pretty lazy - support variable # of questions per task eventually..
24 |             (userid, cid, _, q1, q2, comments) = survey
25 |             responses = dict(zip(questions, [q1, q2, comments]))
26 |             cursor.execute('''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''', (cid,))
27 |             chat_result = cursor.fetchone()
28 |             agents = json.loads(chat_result[0])
29 |             agent_ids = json.loads(chat_result[1])
30 |             agent_types[cid] = agents
31 |             if cid not in survey_data.keys():
32 |                 survey_data[cid] = {0: {}, 1: {}}
33 |             partner_idx = 0 if agent_ids['1'] == userid else 1
34 |             survey_data[cid][partner_idx] = responses
35 | 
36 |         write_json([agent_types, survey_data], json_path)
37 | 


--------------------------------------------------------------------------------
/mutualfriends/web/main/utils.py:
--------------------------------------------------------------------------------
1 | from cocoa.web.main.utils import Messages as BaseMessages
2 | 
3 | class Messages(BaseMessages):
4 |     ChatCompleted = "Congratulations, you successfully completed the task!"
5 |     ChatIncomplete = "Sorry, you weren't able to complete the task."
6 |     Redirect = "Sorry, that chat did not meet our acceptance criteria."
7 | 
8 | 


--------------------------------------------------------------------------------
/mutualfriends/web/static/img/handshake.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/static/img/handshake.jpg


--------------------------------------------------------------------------------
/mutualfriends/web/templates/finished.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>Thank You!</title>
 4 |         <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/bootstrap.min.css')}}">
 5 |         <style>
 6 |             #content > p {
 7 |                 font-size:18px;
 8 |             }
 9 |         </style>
10 |         <script type="text/javascript" src="//code.jquery.com/jquery-1.4.2.min.js"></script>
11 |         <script type="text/javascript" charset="utf-8">
12 |         	var BASE_URL = 'http://' + document.domain + ':' + location.port;
13 |         	function visualize() {
14 |         		window.location.href = BASE_URL +'/visualize?uid={{uid}}';
15 |         	}
16 |         </script>
17 |     </head>
18 |     <body>
19 |     <div id="content">
20 |         <h2>Who's Our Mutual Friend?</h2>
21 |         <h4>{{ finished_message }}</h4>
22 | 	</div>
23 | 	<div id="mturk">
24 | 		{% if mturk_code is not none %}
25 | 		<hr>	
26 | 		<h3>Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}</h3>
27 | 		{% endif %}
28 | 		<hr>
29 | 		{% if visualize %}
30 | 		<h4><a href="javascript:visualize();">Click here</a> to visualize the dialogue you just completed</h4>
31 | 		{% endif %}
32 | 	</div>
33 | 
34 |     </body>
35 | </html>


--------------------------------------------------------------------------------
/mutualfriends/web/templates/instructions.html:
--------------------------------------------------------------------------------
 1 | <p>
 2 |     You and another user online have exactly one mutual friend in common. You know some attributes for each of your friends (like name, school, etc.). Your goal is to find the mutual friend using those attributes!
 3 | </p>
 4 | <h2>Instructions</h2>
 5 | <ul>
 6 |     <li style="color:#FF0000">Please use <b> natural sentences </b> as much as possible.
 7 |         <ul>
 8 |             <li style="color:#009933"> <b>Do:</b> three of my friends went to ut austin </li>
 9 |             <li style="color:#FF0000"> <b>Don't do:</b> 3 ut austin</li>
10 |         </ul>
11 |      </li>
12 |     <li style="color:#FF0000">Avoid simply listing out any attributes (name, company, etc.) of your friends .</li>
13 |     <li> Look at your <b>list of friends</b> at right. </li>
14 |     <li> Use the <b>chat box below</b> to find out more about your partner's friends. </li>
15 |     <li> Once you've found out who it is, select them by clicking <b>Select</b> in the first column.</li>
16 |     <li> <b>If you run out of time while doing the chat, we will still award you money for the HIT</b> if you made a good effort to complete the task.</li>
17 |     <li style="color:#FF0000"><b>Please select carefully</b>. If you select a friend once, the website will not allow you to select another friend for 10 seconds.</li>
18 |     <!--<li style="color:#FF0000"><b>If you experience trouble with your partner</b> and would like to quit (and receive credit for this HIT), a link will appear at the <b>top right</b> of the screen after 2.5 minutes. <b>Note: If you quit the chat you will only receive credit for this HIT if you made a good attempt to finish the chat.</b> </li>-->
19 | 
20 | </ul>


--------------------------------------------------------------------------------
/mutualfriends/web/templates/third_party_eval_finished.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>Thank You!</title>
 4 |         <style>
 5 |         #content > p{
 6 |         font-size:24px;
 7 |         }
 8 |         </style>
 9 |     </head>
10 |     <body>
11 |     <center><div id="content">
12 |         <h2>Evaluating Dialogues</h2>
13 |         {{ finished_message }}
14 |     </div></center>
15 |     <center>
16 |     <div id="mturk">
17 |         {% if mturk_code is not none %}
18 |         <hr>
19 |         <h3>Thanks for completing this HIT! Please copy and paste this code into the HIT on Mechanical Turk: {{ mturk_code }}</h3>
20 |         {% endif %}
21 |     </div>
22 |     </center>
23 | 
24 |     </body>
25 | </html>


--------------------------------------------------------------------------------
/mutualfriends/web/templates/visualize.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html>
2 | <html lang="en">
3 | {{ dialogue }}
4 | </html>


--------------------------------------------------------------------------------
/mutualfriends/web/views/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanfordnlp/cocoa/52fb2d47a28537c7892f146a8972a75bcde4812c/mutualfriends/web/views/__init__.py


--------------------------------------------------------------------------------
/mutualfriends/web/views/action.py:
--------------------------------------------------------------------------------
 1 | from flask import Blueprint, jsonify, request
 2 | from cocoa.web.views.utils import userid, format_message
 3 | from web.main.backend import get_backend
 4 | 
 5 | action = Blueprint('action', __name__)
 6 | 
 7 | @action.route('/_select_option/', methods=['GET'])
 8 | def select():
 9 |     backend = get_backend()
10 |     selection_id = int(request.args.get('selection'))
11 |     if selection_id == -1:
12 |         return
13 |     selected_item = backend.select(userid(), selection_id)
14 | 
15 |     ordered_item = backend.schema.get_ordered_item(selected_item)
16 |     displayed_message = format_message("You selected: {}".format(", ".join([v[1] for v in ordered_item])), True)
17 |     return jsonify(message=displayed_message)
18 | 


--------------------------------------------------------------------------------
/onmt/Utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def aeq(*args):
 5 |     """
 6 |     Assert all arguments have the same value
 7 |     """
 8 |     arguments = (arg for arg in args)
 9 |     first = next(arguments)
10 |     assert all(arg == first for arg in arguments), \
11 |         "Not all arguments have the same value: " + str(args)
12 | 
13 | 
14 | def sequence_mask(lengths, max_len=None):
15 |     """
16 |     Creates a boolean mask from sequence lengths.
17 |     """
18 |     batch_size = lengths.numel()
19 |     max_len = max_len or lengths.max()
20 |     return (torch.arange(0, max_len)
21 |             .type_as(lengths)
22 |             .repeat(batch_size, 1)
23 |             .lt(lengths.unsqueeze(1)))
24 | 
25 | 
26 | def use_gpu(opt):
27 |     return (hasattr(opt, 'gpuid') and len(opt.gpuid) > 0) or \
28 |         (hasattr(opt, 'gpu') and opt.gpu > -1)
29 | 


--------------------------------------------------------------------------------
/onmt/__init__.py:
--------------------------------------------------------------------------------
 1 | import onmt.io
 2 | import onmt.translate
 3 | import onmt.Models
 4 | import onmt.Loss
 5 | from onmt.Trainer import Trainer, Statistics
 6 | from onmt.Optim import Optim
 7 | 
 8 | # For flake8 compatibility
 9 | __all__ = [onmt.Loss, onmt.Models,
10 |            Trainer, Optim, Statistics, onmt.io, onmt.translate]
11 | 


--------------------------------------------------------------------------------
/onmt/io/__init__.py:
--------------------------------------------------------------------------------
 1 | from onmt.io.IO import collect_feature_vocabs, make_features, \
 2 |                        collect_features, get_num_features, \
 3 |                        load_fields_from_vocab, get_fields, \
 4 |                        save_fields_to_vocab, build_dataset, \
 5 |                        build_vocab, merge_vocabs, OrderedIterator
 6 | from onmt.io.DatasetBase import ONMTDatasetBase, PAD_WORD, BOS_WORD, \
 7 |                                 EOS_WORD, UNK
 8 | from onmt.io.TextDataset import TextDataset, ShardedTextCorpusIterator
 9 | from onmt.io.ImageDataset import ImageDataset
10 | from onmt.io.AudioDataset import AudioDataset
11 | 
12 | 
13 | __all__ = [PAD_WORD, BOS_WORD, EOS_WORD, UNK, ONMTDatasetBase,
14 |            collect_feature_vocabs, make_features,
15 |            collect_features, get_num_features,
16 |            load_fields_from_vocab, get_fields,
17 |            save_fields_to_vocab, build_dataset,
18 |            build_vocab, merge_vocabs, OrderedIterator,
19 |            TextDataset, ImageDataset, AudioDataset,
20 |            ShardedTextCorpusIterator]
21 | 


--------------------------------------------------------------------------------
/onmt/modules/AudioEncoder.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class AudioEncoder(nn.Module):
 7 |     """
 8 |     A simple encoder convolutional -> recurrent neural network for
 9 |     audio input.
10 | 
11 |     Args:
12 |         num_layers (int): number of encoder layers.
13 |         bidirectional (bool): bidirectional encoder.
14 |         rnn_size (int): size of hidden states of the rnn.
15 |         dropout (float): dropout probablity.
16 |         sample_rate (float): input spec
17 |         window_size (int): input spec
18 | 
19 |     """
20 |     def __init__(self, num_layers, bidirectional, rnn_size, dropout,
21 |                  sample_rate, window_size):
22 |         super(AudioEncoder, self).__init__()
23 |         self.num_layers = num_layers
24 |         self.num_directions = 2 if bidirectional else 1
25 |         self.hidden_size = rnn_size
26 | 
27 |         self.layer1 = nn.Conv2d(1,   32, kernel_size=(41, 11),
28 |                                 padding=(0, 10), stride=(2, 2))
29 |         self.batch_norm1 = nn.BatchNorm2d(32)
30 |         self.layer2 = nn.Conv2d(32,  32, kernel_size=(21, 11),
31 |                                 padding=(0, 0), stride=(2, 1))
32 |         self.batch_norm2 = nn.BatchNorm2d(32)
33 | 
34 |         input_size = int(math.floor((sample_rate * window_size) / 2) + 1)
35 |         input_size = int(math.floor(input_size - 41) / 2 + 1)
36 |         input_size = int(math.floor(input_size - 21) / 2 + 1)
37 |         input_size *= 32
38 |         self.rnn = nn.LSTM(input_size, rnn_size,
39 |                            num_layers=num_layers,
40 |                            dropout=dropout,
41 |                            bidirectional=bidirectional)
42 | 
43 |     def load_pretrained_vectors(self, opt):
44 |         # Pass in needed options only when modify function definition.
45 |         pass
46 | 
47 |     def forward(self, input, lengths=None):
48 |         "See :obj:`onmt.modules.EncoderBase.forward()`"
49 |         # (batch_size, 1, nfft, t)
50 |         # layer 1
51 |         input = self.batch_norm1(self.layer1(input[:, :, :, :]))
52 | 
53 |         # (batch_size, 32, nfft/2, t/2)
54 |         input = F.hardtanh(input, 0, 20, inplace=True)
55 | 
56 |         # (batch_size, 32, nfft/2/2, t/2)
57 |         # layer 2
58 |         input = self.batch_norm2(self.layer2(input))
59 | 
60 |         # (batch_size, 32, nfft/2/2, t/2)
61 |         input = F.hardtanh(input, 0, 20, inplace=True)
62 | 
63 |         batch_size = input.size(0)
64 |         length = input.size(3)
65 |         input = input.view(batch_size, -1, length)
66 |         input = input.transpose(0, 2).transpose(1, 2)
67 | 
68 |         output, hidden = self.rnn(input)
69 | 
70 |         return hidden, output
71 | 


--------------------------------------------------------------------------------
/onmt/modules/StackedRNN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class StackedLSTM(nn.Module):
 6 |     """
 7 |     Our own implementation of stacked LSTM.
 8 |     Needed for the decoder, because we do input feeding.
 9 |     """
10 |     def __init__(self, num_layers, input_size, rnn_size, dropout):
11 |         super(StackedLSTM, self).__init__()
12 |         self.dropout = nn.Dropout(dropout)
13 |         self.num_layers = num_layers
14 |         self.layers = nn.ModuleList()
15 | 
16 |         for i in range(num_layers):
17 |             self.layers.append(nn.LSTMCell(input_size, rnn_size))
18 |             input_size = rnn_size
19 | 
20 |     def forward(self, input, hidden):
21 |         h_0, c_0 = hidden
22 |         h_1, c_1 = [], []
23 |         for i, layer in enumerate(self.layers):
24 |             h_1_i, c_1_i = layer(input, (h_0[i], c_0[i]))
25 |             input = h_1_i
26 |             if i + 1 != self.num_layers:
27 |                 input = self.dropout(input)
28 |             h_1 += [h_1_i]
29 |             c_1 += [c_1_i]
30 | 
31 |         h_1 = torch.stack(h_1)
32 |         c_1 = torch.stack(c_1)
33 | 
34 |         return input, (h_1, c_1)
35 | 
36 | 
37 | class StackedGRU(nn.Module):
38 | 
39 |     def __init__(self, num_layers, input_size, rnn_size, dropout):
40 |         super(StackedGRU, self).__init__()
41 |         self.dropout = nn.Dropout(dropout)
42 |         self.num_layers = num_layers
43 |         self.layers = nn.ModuleList()
44 | 
45 |         for i in range(num_layers):
46 |             self.layers.append(nn.GRUCell(input_size, rnn_size))
47 |             input_size = rnn_size
48 | 
49 |     def forward(self, input, hidden):
50 |         h_1 = []
51 |         for i, layer in enumerate(self.layers):
52 |             h_1_i = layer(input, hidden[0][i])
53 |             input = h_1_i
54 |             if i + 1 != self.num_layers:
55 |                 input = self.dropout(input)
56 |             h_1 += [h_1_i]
57 | 
58 |         h_1 = torch.stack(h_1)
59 |         return input, (h_1,)
60 | 


--------------------------------------------------------------------------------
/onmt/modules/StructuredAttention.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.cuda
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | class MatrixTree(nn.Module):
 8 |     """Implementation of the matrix-tree theorem for computing marginals
 9 |     of non-projective dependency parsing. This attention layer is used
10 |     in the paper "Learning Structured Text Representations."
11 | 
12 | 
13 |     :cite:`DBLP:journals/corr/LiuL17d`
14 |     """
15 |     def __init__(self, eps=1e-5):
16 |         self.eps = eps
17 |         super(MatrixTree, self).__init__()
18 | 
19 |     def forward(self, input):
20 |         laplacian = input.exp() + self.eps
21 |         output = input.clone()
22 |         for b in range(input.size(0)):
23 |             lap = laplacian[b].masked_fill(
24 |                 Variable(torch.eye(input.size(1)).cuda().ne(0)), 0)
25 |             lap = -lap + torch.diag(lap.sum(0))
26 |             # store roots on diagonal
27 |             lap[0] = input[b].diag().exp()
28 |             inv_laplacian = lap.inverse()
29 | 
30 |             factor = inv_laplacian.diag().unsqueeze(1)\
31 |                                          .expand_as(input[b]).transpose(0, 1)
32 |             term1 = input[b].exp().mul(factor).clone()
33 |             term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone()
34 |             term1[:, 0] = 0
35 |             term2[0] = 0
36 |             output[b] = term1 - term2
37 |             roots_output = input[b].diag().exp().mul(
38 |                 inv_laplacian.transpose(0, 1)[0])
39 |             output[b] = output[b] + torch.diag(roots_output)
40 |         return output
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     dtree = MatrixTree()
45 |     q = torch.rand(1, 5, 5).cuda()
46 |     marg = dtree.forward(Variable(q))
47 |     print(marg.sum(1))
48 | 


--------------------------------------------------------------------------------
/onmt/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | from onmt.modules.UtilClass import LayerNorm, Bottle, BottleLinear, \
 2 |     BottleLayerNorm, BottleSoftmax, Elementwise
 3 | from onmt.modules.Gate import context_gate_factory, ContextGate
 4 | from onmt.modules.GlobalAttention import GlobalAttention
 5 | from onmt.modules.ConvMultiStepAttention import ConvMultiStepAttention
 6 | from onmt.modules.ImageEncoder import ImageEncoder
 7 | from onmt.modules.AudioEncoder import AudioEncoder
 8 | from onmt.modules.CopyGenerator import CopyGenerator, CopyGeneratorLossCompute
 9 | from onmt.modules.StructuredAttention import MatrixTree
10 | from onmt.modules.Transformer import \
11 |    TransformerEncoder, TransformerDecoder, PositionwiseFeedForward
12 | from onmt.modules.Conv2Conv import CNNEncoder, CNNDecoder
13 | from onmt.modules.MultiHeadedAttn import MultiHeadedAttention
14 | from onmt.modules.StackedRNN import StackedLSTM, StackedGRU
15 | from onmt.modules.Embeddings import Embeddings, PositionalEncoding
16 | from onmt.modules.WeightNorm import WeightNormConv2d
17 | 
18 | from onmt.Models import EncoderBase, MeanEncoder, StdRNNDecoder, \
19 |     RNNDecoderBase, InputFeedRNNDecoder, RNNEncoder, NMTModel
20 | 
21 | from onmt.modules.SRU import check_sru_requirement
22 | can_use_sru = check_sru_requirement()
23 | if can_use_sru:
24 |     from onmt.modules.SRU import SRU
25 | 
26 | 
27 | # For flake8 compatibility.
28 | __all__ = [EncoderBase, MeanEncoder, RNNDecoderBase, InputFeedRNNDecoder,
29 |            RNNEncoder, NMTModel,
30 |            StdRNNDecoder, ContextGate, GlobalAttention, ImageEncoder,
31 |            PositionwiseFeedForward, PositionalEncoding,
32 |            CopyGenerator, MultiHeadedAttention,
33 |            LayerNorm, Bottle, BottleLinear, BottleLayerNorm, BottleSoftmax,
34 |            TransformerEncoder, TransformerDecoder, Embeddings, Elementwise,
35 |            MatrixTree, WeightNormConv2d, ConvMultiStepAttention,
36 |            CNNEncoder, CNNDecoder, StackedLSTM, StackedGRU,
37 |            context_gate_factory, CopyGeneratorLossCompute, AudioEncoder]
38 | 
39 | if can_use_sru:
40 |     __all__.extend([SRU, check_sru_requirement])
41 | 


--------------------------------------------------------------------------------
/onmt/translate/__init__.py:
--------------------------------------------------------------------------------
1 | from onmt.translate.Translator import Translator
2 | from onmt.translate.Translation import Translation, TranslationBuilder
3 | from onmt.translate.Beam import Beam, GNMTGlobalScorer
4 | 
5 | __all__ = [Translator, Translation, Beam, GNMTGlobalScorer, TranslationBuilder]
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | nltk==3.2.4
 2 | whoosh==2.7.4
 3 | ujson=1.35
 4 | numpy=1.13.3
 5 | matplotlib=2.0.2
 6 | flask=0.12.2
 7 | flask-socketio=2.8.5
 8 | scipy=1.0.1
 9 | scikit-learn==0.19.0
10 | datasketch==1.2.3
11 | langdetect==1.0.7
12 | 


--------------------------------------------------------------------------------
/scripts/analyze.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from cocoa.core.dataset import read_examples
 4 | from cocoa.model.dialogue_parser import parse_example
 5 | from cocoa.analysis.analyzer import Analyzer
 6 | 
 7 | from core.scenario import Scenario
 8 | from core.lexicon import Lexicon
 9 | from model.generator import Templates
10 | from model.manager import Manager
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--lexicon', help='Path to pickled lexicon')
15 |     parser.add_argument('--transcripts', nargs='*', help='JSON transcripts to extract templates')
16 |     parser.add_argument('--max-examples', default=-1, type=int)
17 |     parser.add_argument('--templates', help='Path to load templates')
18 |     parser.add_argument('--policy', help='Path to load model')
19 |     args = parser.parse_args()
20 | 
21 |     lexicon = Lexicon.from_pickle(args.lexicon)
22 |     #templates = Templates.from_pickle(args.templates)
23 |     templates = Templates()
24 |     manager = Manager.from_pickle(args.policy)
25 |     analyzer = Analyzer(lexicon)
26 | 
27 |     examples = read_examples(args.transcripts, args.max_examples, Scenario)
28 | 
29 |     parsed_dialogues = []
30 |     for example in examples:
31 |         utterances = parse_example(example, lexicon, templates)
32 |         parsed_dialogues.append(utterances)
33 | 
34 |     analyzer.example_stats(examples)
35 |     analyzer.parser_stats(parsed_dialogues)
36 |     #analyzer.manager_stats(manager)
37 | 


--------------------------------------------------------------------------------
/scripts/chat_to_scenarios.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from cocoa.core.util import read_json, write_json
 3 | from cocoa.core.scenario_db import ScenarioDB
 4 | from cocoa.core.schema import Schema
 5 | from core.scenario import Scenario
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument('--chats')
 9 | parser.add_argument('--scenarios')
10 | parser.add_argument('--max', type=int)
11 | args = parser.parse_args()
12 | 
13 | chats = read_json(args.chats)
14 | scenarios = []
15 | n = args.max or len(chats)
16 | for chat in chats[:n]:
17 |     scenarios.append(Scenario.from_dict(None, chat['scenario']))
18 | scenario_db = ScenarioDB(scenarios)
19 | write_json(scenario_db.to_dict(), args.scenarios)
20 | 


--------------------------------------------------------------------------------
/scripts/combine_json.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Combine different batches of data (transcripts.json and surveys.json).
 3 | '''
 4 | 
 5 | import argparse
 6 | import os
 7 | from cocoa.core.util import read_json, write_json
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--paths', nargs='+', help='Paths to transcripts directories')
11 | parser.add_argument('--output', help='Output directory')
12 | args = parser.parse_args()
13 | 
14 | all_chats = []
15 | # survey data structure: [{}, {}]
16 | all_surveys = [{}, {}]
17 | 
18 | for d in args.paths:
19 |     transcript_file = os.path.join(d, 'transcripts/transcripts.json')
20 |     survey_file = os.path.join(d, 'transcripts/surveys.json')
21 | 
22 |     chats = read_json(transcript_file)
23 |     all_chats.extend(chats)
24 | 
25 |     surveys = read_json(survey_file)
26 |     for i, s in enumerate(surveys):
27 |         all_surveys[i].update(s)
28 |     print "Combined data from {}".format(d)
29 | 
30 | output_dir = args.output + '/transcripts'
31 | if not os.path.isdir(output_dir):
32 |     os.makedirs(output_dir)
33 | write_json(all_chats, os.path.join(output_dir, 'transcripts.json'))
34 | write_json(all_surveys, os.path.join(output_dir, 'surveys.json'))
35 | 


--------------------------------------------------------------------------------
/scripts/split_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from cocoa.core.util import read_json, write_json
 3 | import numpy as np
 4 | from itertools import izip
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('--example-paths', help='Transciprts paths', nargs='*', default=[])
 8 | parser.add_argument('--train-frac', help='Fraction of training examples', type=float, default=0.6)
 9 | parser.add_argument('--test-frac', help='Fraction of test examples', type=float, default=0.2)
10 | parser.add_argument('--dev-frac', help='Fraction of dev examples', type=float, default=0.2)
11 | parser.add_argument('--output-path', help='Output path for splits')
12 | args = parser.parse_args()
13 | 
14 | np.random.seed(0)
15 | json_data = ([], [], [])
16 | for path in args.example_paths:
17 |     examples = read_json(path)
18 |     folds = np.random.choice(3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac])
19 |     for ex, fold in izip(examples, folds):
20 |         json_data[fold].append(ex)
21 | 
22 | for fold, dataset in izip(('train', 'dev', 'test'), json_data):
23 |     if len(dataset) > 0:
24 |         write_json(dataset, '%s%s.json' % (args.output_path, fold))
25 | 


--------------------------------------------------------------------------------
/scripts/split_transcripts.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from collections import defaultdict
 4 | from src.core.util import read_json, write_json
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('--transcripts', help='Path to transcritps of mixed partners')
 8 | parser.add_argument('--output', help='Output directories')
 9 | args = parser.parse_args()
10 | 
11 | chats = read_json(args.transcripts)
12 | chats_by_agents = defaultdict(list)
13 | scenario_agents = defaultdict(set)
14 | for chat in chats:
15 |     agents = chat['agents']
16 |     if agents['0'] == 'human':
17 |         agents = (agents['0'], agents['1'])
18 |     else:
19 |         agents = (agents['1'], agents['0'])
20 |     chats_by_agents[agents].append(chat)
21 | 
22 |     scenario_id = chat['scenario_uuid']
23 |     scenario_agents[scenario_id].add(agents)
24 | 
25 | # Only keep scenarios with all 4 agents
26 | scenario_subset = set([s for s, a in scenario_agents.iteritems() if len(a) == 4])
27 | print 'Number of scenarios:', len(scenario_subset)
28 | 
29 | for agents, chats in chats_by_agents.iteritems():
30 |     chats = [c for c in chats if c['scenario_uuid'] in scenario_subset]
31 |     print agents, len(chats)
32 |     path = os.path.join(args.output, '%s_transcripts.json' % '-'.join(agents))
33 |     if not os.path.isdir(args.output):
34 |         os.makedirs(args.output)
35 |     write_json(chats, path)
36 | 


--------------------------------------------------------------------------------
/scripts/visualize_transcripts.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from cocoa.core.util import write_json
 4 | from analysis.visualizer import Visualizer
 5 | from analysis.html_visualizer import HTMLVisualizer
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = ArgumentParser()
 9 |     parser.add_argument('--survey-transcripts', nargs='+',
10 |             help='Path to directory containing evaluation transcripts')
11 |     parser.add_argument('--dialogue-transcripts', nargs='+',
12 |             help='Path to directory containing dialogue transcripts')
13 |     parser.add_argument('--summary', default=False, action='store_true',
14 |             help='Summarize human ratings')
15 |     parser.add_argument('--html-visualize', action='store_true',
16 |             help='Output html files')
17 |     parser.add_argument('--outdir', default='.', help='Output dir')
18 |     parser.add_argument('--stats', default='stats.json',
19 |             help='Path to stats file')
20 |     parser.add_argument('--partner', default=False, action='store_true',
21 |             help='Whether this is from partner survey')
22 |     parser.add_argument('--task', default='cl-neg',
23 |             choices=['cl-neg','fb-neg', 'mutual', 'movies'],
24 |             help='which task you are trying run')
25 |     parser.add_argument('--worker-ids', nargs='+',
26 |             help='Path to json file containing chat_id to worker_id mappings')
27 |     parser.add_argument('--hist', default=False, action='store_true',
28 |             help='Plot histgram of ratings')
29 |     parser.add_argument('--survey-only', default=False, action='store_true',
30 |             help='Only analyze dialogues with survey (completed)')
31 |     parser.add_argument('--base-agent', default='human',
32 |             help='Agent to compare against')
33 | 
34 |     HTMLVisualizer.add_html_visualizer_arguments(parser)
35 |     args = parser.parse_args()
36 | 
37 |     visualizer = Visualizer(args.dialogue_transcripts, args.survey_transcripts)
38 |     results = visualizer.compute_effectiveness(with_survey=args.survey_only, base_agent=args.base_agent)
39 |     visualizer.print_results(results)
40 | 
41 |     if args.hist:
42 |         visualizer.hist(question_scores, args.outdir, partner=args.partner)
43 |     if args.worker_ids:
44 |         visualizer.worker_stats()
45 | 
46 |     # TODO: move summary and hist to analyzer
47 |     if args.summary:
48 |         summary = visualizer.summarize()
49 |         write_json(summary, args.stats)
50 |     if args.html_output:
51 |         visualizer.html_visualize(args.viewer_mode, args.html_output,
52 |             css_file=args.css_file, img_path=args.img_path,
53 |             worker_ids=visualizer.worker_ids)
54 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import sys
3 | 
4 | setup(name='cocoa',
5 |       version='0.1',
6 |       description='platform for dialogue research',
7 |       packages=find_packages(exclude=('scraper', 'scripts', 'mutualfriends', 'negotiation', 'test')),
8 |      )
9 | 


--------------------------------------------------------------------------------