├── comp-graph.pdf
├── .gitignore
├── constants.py
├── modules
    ├── gumbel_softmax.py
    ├── word_counting.py
    ├── processing.py
    ├── goal_predicting.py
    ├── action.py
    ├── agent.py
    └── game.py
├── playground.py
├── README.md
├── notes.txt
├── visualize.py
├── train.py
└── configs.py


/comp-graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bkgoksel/emergent-language/HEAD/comp-graph.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .mypy_cache/
 2 | .ropeproject/
 3 | __pycache__/
 4 | mypy.ini
 5 | *.sw*
 6 | *.pt
 7 | *.out
 8 | *.pyc
 9 | venv/
10 | docker/
11 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | WORLD_DIMENSIONALITY = 2
2 | MOVEMENT_STEP_SIZE = 2
3 | COLOR_SCALE = 1
4 | PHYSICAL_EMBED_SIZE = 2
5 | GOAL_SIZE = int(WORLD_DIMENSIONALITY + 1)
6 | MOVEMENT_DIM_SIZE = int(WORLD_DIMENSIONALITY)
7 | 


--------------------------------------------------------------------------------
/modules/gumbel_softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | class GumbelSoftmax(nn.Module):
 6 |     def __init__(self, use_cuda=False):
 7 |         super(GumbelSoftmax, self).__init__()
 8 |         self.using_cuda = use_cuda
 9 |         self.softmax = nn.Softmax(dim=1)
10 |         self.temp = 1
11 | 
12 |     def forward(self, x):
13 |         if self.using_cuda:
14 |             U = Variable(torch.rand(x.size()).cuda())
15 |         else:
16 |             U = Variable(torch.rand(x.size()))
17 |         y = x -torch.log(-torch.log(U + 1e-20) + 1e-20)
18 |         return self.softmax(y/self.temp)
19 | 


--------------------------------------------------------------------------------
/modules/word_counting.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch import Tensor
 3 | from torch.autograd import Variable
 4 | 
 5 | class WordCountingModule(nn.Module):
 6 |     def __init__(self, config):
 7 |         super(WordCountingModule, self).__init__()
 8 |         self.oov_prob = config.oov_prob
 9 |         word_counts = Tensor(config.vocab_size)
10 |         if config.use_cuda:
11 |             word_counts.cuda()
12 |         self.word_counts = Variable(word_counts)
13 | 
14 |     def forward(self, utterances):
15 |         cost = -(utterances/(self.oov_prob + self.word_counts.sum() - 1)).sum()
16 |         self.word_counts = self.word_counts + utterances
17 |         return cost
18 | 


--------------------------------------------------------------------------------
/modules/processing.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | """
 4 |     A Processing module takes an input from a stream and the independent memory
 5 |     of that stream and runs a single timestep of a GRU cell, followed by
 6 |     dropout and finally a linear ELU layer on top of the GRU output.
 7 |     It returns the output of the fully connected layer as well as the update to
 8 |     the independent memory.
 9 | """
10 | class ProcessingModule(nn.Module):
11 |     def __init__(self, config):
12 |         super(ProcessingModule, self).__init__()
13 |         self.cell = nn.GRUCell(config.input_size, config.hidden_size)
14 |         self.fully_connected = nn.Sequential(
15 |                 nn.Dropout(config.dropout),
16 |                 nn.Linear(config.hidden_size, config.hidden_size),
17 |                 nn.ELU())
18 | 
19 |     def forward(self, x, m):
20 |         m = self.cell(x, m)
21 |         return self.fully_connected(m), m
22 | 
23 | 


--------------------------------------------------------------------------------
/playground.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from modules.game import GameModule
 3 | from configs import default_game_config, get_game_config
 4 | import code
 5 | 
 6 | 
 7 | config = {
 8 |         'batch_size': default_game_config.batch_size,
 9 |         'world_dim': default_game_config.world_dim,
10 |         'max_agents': default_game_config.max_agents,
11 |         'max_landmarks': default_game_config.max_landmarks,
12 |         'min_agents': default_game_config.min_agents,
13 |         'min_landmarks': default_game_config.min_landmarks,
14 |         'num_shapes': default_game_config.num_shapes,
15 |         'num_colors': default_game_config.num_colors,
16 |         'no_utterances': not default_game_config.use_utterances,
17 |         'vocab_size': default_game_config.vocab_size,
18 |         'memory_size': default_game_config.memory_size
19 |     }
20 | 
21 | agent = torch.load('latest.pt')
22 | agent.reset()
23 | agent.train(False)
24 | code.interact(local=locals())
25 | 


--------------------------------------------------------------------------------
/modules/goal_predicting.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from modules.processing import ProcessingModule
 3 | 
 4 | """
 5 |     A GoalPredictingProcessingModule acts like a regular processing module but
 6 |     also runs a goal predictor layer that is a two layer fully-connected
 7 |     network. It returns the regular processing module's output, its memory
 8 |     update and finally a goal vector sized goal prediction
 9 | """
10 | class GoalPredictingProcessingModule(nn.Module):
11 |     def __init__(self, config):
12 |         super(GoalPredictingProcessingModule, self).__init__()
13 |         self.processor = ProcessingModule(config.processor)
14 |         self.goal_predictor = nn.Sequential(
15 |                 nn.Dropout(config.dropout),
16 |                 nn.Linear(config.processor.hidden_size, config.hidden_size),
17 |                 nn.Dropout(config.dropout),
18 |                 nn.ELU(),
19 |                 nn.Linear(config.hidden_size, config.goal_size))
20 | 
21 |     def forward(self, x, mem):
22 |         processed, mem = self.processor(x, mem)
23 |         goal_prediction = self.goal_predictor(processed)
24 |         return processed, mem, goal_prediction
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # emergent-language
 2 | An implementation of Emergence of Grounded Compositional Language in Multi-Agent Populations by Igor Mordatch and Pieter Abbeel
 3 | 
 4 | To run, invoke `python3 train.py` in environment with PyTorch installed. To experiment with parameters, invoke `python3 train.py --help` to get a list of command line arguments that modify parameters. Currently training just prints out the loss of each game episode run, without any further analysis, and the model weights are not saved at the end. These features are coming soon.
 5 | 
 6 | * `game.py` provides a non-tensor based implementation of the game mechanics (used for game behavior exploration and random game generation during training
 7 | * `model.py` provides the full computational model including agent and game dynamics through an entire episode
 8 | * `train.py` provides the training harness that runs many games and trains the agents
 9 | * `configs.py` provides the data structures that are passed as configuration to various modules in the computational graph as well as the default values used in training now
10 | * `constants.py` provides constant factors that shouldn't need modification during regular running of the model
11 | * `visualize.py` provides a computational graph visualization tool taken from [here](https://github.com/szagoruyko/functional-zoo/blob/master/visualize.py)
12 | * `simple_model.py` provides a simple model that doesn't communicate and only moves based on its own goal (used for testing other components)
13 | * `comp-graph.pdf` is a pdf visualization of the computational graph of the game-agent mechanics
14 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
 1 | TODO:
 2 | --------
 3 | [x] Weight saving
 4 | [x] Add a test mode where utterances are argmaxed instead of Gumbel-softmaxed
 5 | [x] Add a way to "replay" an episode easily
 6 | [x] Better loss over time information printing
 7 | [x] Batching
 8 | [x] Test role of comms by testing in envs where each agent knows its goal vs it doesn't
 9 |     [x] Make it possible to have agent get its own goal, determinable with a flag
10 |     [x] Make this a training flag
11 |     [ ] Test how this affects performance
12 | [x] Get avg final distance of agents to their goals
13 | [x] Goal predictions(relative to yourself and agent index)
14 | 
15 | HYPERPARAMETER TUNING:
16 | -------------------------
17 | 
18 | 
19 | LEARNING END:
20 | -------------
21 | [ ] Try relative goals (left of landmark, above landmark), see if relative direction words evolve
22 | [ ] Give different award coefficients to goals, have a single agent have multiple goals, see if award values can be communicated
23 | 
24 | DISTANT FUTURE:
25 | --------------
26 | [ ] Visualization of a game
27 | [ ] Web interface to give an initial game state and see how the agents act and what they utter
28 | 
29 | 
30 | Unstructured thoughts:
31 | ----------------
32 | - Color words
33 |   - Simulate different visual systems, see how color words evolve
34 | - Verbs
35 |   - GO
36 |   - TAKE (i.e. make landmarks movable, have a goal be the moving of a landmark to another landmark)
37 | - Make sequential goals (i.e. go to blue, then green) and also multi-landmark but not ordered goals (go to green and blue in any order). See if a way to discriminate evolves
38 | - Narration (Agent A observes a certain environment, tries to describe it to Agent B, Agent B predicts the environment history)
39 | 
40 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
 1 | from graphviz import Digraph
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | def make_dot(var, params=None, filename=None):
 7 |     """ Produces Graphviz representation of PyTorch autograd graph
 8 |     Blue nodes are the Variables that require grad, orange are Tensors
 9 |     saved for backward in torch.autograd.Function
10 |     Args:
11 |         var: output Variable
12 |         params: dict of (name, Variable) to add names to node that
13 |             require grad (TODO: make optional)
14 |     """
15 |     if params is not None:
16 |         assert isinstance(params.values()[0], Variable)
17 |         param_map = {id(v): k for k, v in params.items()}
18 | 
19 |     node_attr = dict(style='filled',
20 |                      shape='box',
21 |                      align='left',
22 |                      fontsize='12',
23 |                      ranksep='0.1',
24 |                      height='0.2')
25 |     dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
26 |     seen = set()
27 | 
28 |     def size_to_str(size):
29 |         return '('+(', ').join(['%d' % v for v in size])+')'
30 | 
31 |     def add_nodes(var):
32 |         if var not in seen:
33 |             if torch.is_tensor(var):
34 |                 dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
35 |             elif hasattr(var, 'variable'):
36 |                 u = var.variable
37 |                 name = param_map[id(u)] if params is not None else ''
38 |                 node_name = '%s\n %s' % (name, size_to_str(u.size()))
39 |                 dot.node(str(id(var)), node_name, fillcolor='lightblue')
40 |             else:
41 |                 dot.node(str(id(var)), str(type(var).__name__))
42 |             seen.add(var)
43 |             if hasattr(var, 'next_functions'):
44 |                 for u in var.next_functions:
45 |                     if u[0] is not None:
46 |                         dot.edge(str(id(u[0])), str(id(var)))
47 |                         add_nodes(u[0])
48 |             if hasattr(var, 'saved_tensors'):
49 |                 for t in var.saved_tensors:
50 |                     dot.edge(str(id(t)), str(id(var)))
51 |                     add_nodes(t)
52 |     add_nodes(var.grad_fn)
53 |     if filename:
54 |         dot.render(filename, view=True)
55 |     return dot
56 | 


--------------------------------------------------------------------------------
/modules/action.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from modules.processing import ProcessingModule
 5 | from modules.gumbel_softmax import GumbelSoftmax
 6 | 
 7 | """
 8 |     An ActionModule takes in the physical observation feature vector, the
 9 |     utterance observation feature vector and the individual goal of an agent
10 |     (alongside the memory for the module), processes the goal to turn it into
11 |     a goal feature vector, and runs the concatenation of all three feature
12 |     vectors through a processing module. The output of the processing module
13 |     is then fed into two independent fully connected networks to output
14 |     utterance and movement actions
15 | """
16 | class ActionModule(nn.Module):
17 |     def __init__(self, config):
18 |         super(ActionModule, self).__init__()
19 |         self.using_utterances = config.use_utterances
20 |         self.using_cuda = config.use_cuda
21 |         self.goal_processor = ProcessingModule(config.goal_processor)
22 |         self.processor = ProcessingModule(config.action_processor)
23 |         self.movement_step_size = config.movement_step_size
24 |         self.movement_chooser = nn.Sequential(
25 |                 nn.Linear(config.action_processor.hidden_size, config.action_processor.hidden_size),
26 |                 nn.ELU(),
27 |                 nn.Linear(config.action_processor.hidden_size, config.movement_dim_size),
28 |                 nn.Tanh())
29 | 
30 |         if self.using_utterances:
31 |             self.utterance_chooser = nn.Sequential(
32 |                     nn.Linear(config.action_processor.hidden_size, config.hidden_size),
33 |                     nn.ELU(),
34 |                     nn.Linear(config.hidden_size, config.vocab_size))
35 |             self.gumbel_softmax = GumbelSoftmax(config.use_cuda)
36 | 
37 |     def forward(self, physical, goal, mem, training, utterance=None):
38 |         goal_processed, _ = self.goal_processor(goal, mem)
39 |         if self.using_utterances:
40 |             x = torch.cat([physical.squeeze(1), utterance.squeeze(1), goal_processed], 1).squeeze(1)
41 |         else:
42 |             x = torch.cat([physical.squeeze(0), goal_processed], 1).squeeze(1)
43 |         processed, mem = self.processor(x, mem)
44 |         movement = self.movement_chooser(processed)
45 |         if self.using_utterances:
46 |             utter = self.utterance_chooser(processed)
47 |             if training:
48 |                 utterance = self.gumbel_softmax(utter)
49 |             else:
50 |                 utterance = torch.zeros(utter.size())
51 |                 if self.using_cuda:
52 |                     utterance = utterance.cuda()
53 |                 max_utter = utter.max(1)[1]
54 |                 max_utter = max_utter.data[0]
55 |                 utterance[0, max_utter] = 1
56 |         else:
57 |             utterance = None
58 |         final_movement = (movement * 2 * self.movement_step_size) - self.movement_step_size
59 |         return final_movement, utterance, mem
60 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import torch
 4 | from torch.optim import RMSprop
 5 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 6 | import configs
 7 | from modules.agent import AgentModule
 8 | from modules.game import GameModule
 9 | from collections import defaultdict
10 | 
11 | parser = argparse.ArgumentParser(description="Trains the agents for cooperative communication task")
12 | parser.add_argument('--no-utterances', action='store_true', help='if specified disables the communications channel (default enabled)')
13 | parser.add_argument('--penalize-words', action='store_true', help='if specified penalizes uncommon word usage (default disabled)')
14 | parser.add_argument('--n-epochs', '-e', type=int, help='if specified sets number of training epochs (default 5000)')
15 | parser.add_argument('--learning-rate', type=float, help='if specified sets learning rate (default 1e-3)')
16 | parser.add_argument('--batch-size', type=int, help='if specified sets batch size(default 256)')
17 | parser.add_argument('--n-timesteps', '-t', type=int, help='if specified sets timestep length of each episode (default 32)')
18 | parser.add_argument('--num-shapes', '-s', type=int, help='if specified sets number of colors (default 3)')
19 | parser.add_argument('--num-colors', '-c', type=int, help='if specified sets number of shapes (default 3)')
20 | parser.add_argument('--max-agents', type=int, help='if specified sets maximum number of agents in each episode (default 3)')
21 | parser.add_argument('--min-agents', type=int, help='if specified sets minimum number of agents in each episode (default 1)')
22 | parser.add_argument('--max-landmarks', type=int, help='if specified sets maximum number of landmarks in each episode (default 3)')
23 | parser.add_argument('--min-landmarks', type=int, help='if specified sets minimum number of landmarks in each episode (default 1)')
24 | parser.add_argument('--vocab-size', '-v', type=int, help='if specified sets maximum vocab size in each episode (default 6)')
25 | parser.add_argument('--world-dim', '-w', type=int, help='if specified sets the side length of the square grid where all agents and landmarks spawn(default 16)')
26 | parser.add_argument('--oov-prob', '-o', type=int, help='higher value penalize uncommon words less when penalizing words (default 6)')
27 | parser.add_argument('--load-model-weights', type=str, help='if specified start with saved model weights saved at file given by this argument')
28 | parser.add_argument('--save-model-weights', type=str, help='if specified save the model weights at file given by this argument')
29 | parser.add_argument('--use-cuda', action='store_true', help='if specified enables training on CUDA (default disabled)')
30 | 
31 | def print_losses(epoch, losses, dists, game_config):
32 |     for a in range(game_config.min_agents, game_config.max_agents + 1):
33 |         for l in range(game_config.min_landmarks, game_config.max_landmarks + 1):
34 |             loss = losses[a][l][-1] if len(losses[a][l]) > 0 else 0
35 |             min_loss = min(losses[a][l]) if len(losses[a][l]) > 0 else 0
36 | 
37 |             dist = dists[a][l][-1] if len(dists[a][l]) > 0 else 0
38 |             min_dist = min(dists[a][l]) if len(dists[a][l]) > 0 else 0
39 | 
40 |             print("[epoch %d][%d agents, %d landmarks][%d batches][last loss: %f][min loss: %f][last dist: %f][min dist: %f]" % (epoch, a, l, len(losses[a][l]), loss, min_loss, dist, min_dist))
41 |     print("_________________________")
42 | 
43 | def main():
44 |     args = vars(parser.parse_args())
45 |     agent_config = configs.get_agent_config(args)
46 |     game_config = configs.get_game_config(args)
47 |     training_config = configs.get_training_config(args)
48 |     print("Training with config:")
49 |     print(training_config)
50 |     print(game_config)
51 |     print(agent_config)
52 |     agent = AgentModule(agent_config)
53 |     if training_config.use_cuda:
54 |         agent.cuda()
55 |     optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate)
56 |     scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5)
57 |     losses = defaultdict(lambda:defaultdict(list))
58 |     dists = defaultdict(lambda:defaultdict(list))
59 |     for epoch in range(training_config.num_epochs):
60 |         num_agents = np.random.randint(game_config.min_agents, game_config.max_agents+1)
61 |         num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks+1)
62 |         agent.reset()
63 |         game = GameModule(game_config, num_agents, num_landmarks)
64 |         if training_config.use_cuda:
65 |             game.cuda()
66 |         optimizer.zero_grad()
67 | 
68 |         total_loss, _ = agent(game)
69 |         per_agent_loss = total_loss.data[0] / num_agents / game_config.batch_size
70 |         losses[num_agents][num_landmarks].append(per_agent_loss)
71 | 
72 |         dist = game.get_avg_agent_to_goal_distance()
73 |         avg_dist = dist.data[0] / num_agents / game_config.batch_size
74 |         dists[num_agents][num_landmarks].append(avg_dist)
75 | 
76 |         print_losses(epoch, losses, dists, game_config)
77 | 
78 |         total_loss.backward()
79 |         optimizer.step()
80 | 
81 |         if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks:
82 |             scheduler.step(losses[game_config.max_agents][game_config.max_landmarks][-1])
83 | 
84 |     if training_config.save_model:
85 |         torch.save(agent, training_config.save_model_file)
86 |         print("Saved agent model weights at %s" % training_config.save_model_file)
87 |     """
88 |     import code
89 |     code.interact(local=locals())
90 |     """
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     main()
95 | 
96 | 


--------------------------------------------------------------------------------
/modules/agent.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | 
  5 | from modules.processing import ProcessingModule
  6 | from modules.goal_predicting import GoalPredictingProcessingModule
  7 | from modules.action import ActionModule
  8 | from modules.word_counting import WordCountingModule
  9 | 
 10 | 
 11 | """
 12 |     The AgentModule is the general module that's responsible for the execution of
 13 |     the overall policy throughout training. It holds all information pertaining to
 14 |     the whole training episode, and at each forward pass runs a given game until
 15 |     the end, returning the total cost all agents collected over the entire game
 16 | """
 17 | class AgentModule(nn.Module):
 18 |     def __init__(self, config):
 19 |         super(AgentModule, self).__init__()
 20 |         self.init_from_config(config)
 21 |         self.total_cost = Variable(self.Tensor(1).zero_())
 22 | 
 23 |         self.physical_processor = ProcessingModule(config.physical_processor)
 24 |         self.physical_pooling = nn.AdaptiveMaxPool2d((1,config.feat_vec_size))
 25 |         self.action_processor = ActionModule(config.action_processor)
 26 | 
 27 |         if self.using_utterances:
 28 |             self.utterance_processor = GoalPredictingProcessingModule(config.utterance_processor)
 29 |             self.utterance_pooling = nn.AdaptiveMaxPool2d((1,config.feat_vec_size))
 30 |             if self.penalizing_words:
 31 |                 self.word_counter = WordCountingModule(config.word_counter)
 32 | 
 33 |     def init_from_config(self, config):
 34 |         self.training = True
 35 |         self.using_utterances = config.use_utterances
 36 |         self.penalizing_words = config.penalize_words
 37 |         self.using_cuda = config.use_cuda
 38 |         self.time_horizon = config.time_horizon
 39 |         self.movement_dim_size = config.movement_dim_size
 40 |         self.vocab_size = config.vocab_size
 41 |         self.goal_size = config.goal_size
 42 |         self.processing_hidden_size = config.physical_processor.hidden_size
 43 |         self.Tensor = torch.cuda.FloatTensor if self.using_cuda else torch.FloatTensor
 44 | 
 45 |     def reset(self):
 46 |         self.total_cost = torch.zeros_like(self.total_cost)
 47 |         if self.using_utterances and self.penalizing_words:
 48 |             self.word_counter.word_counts = torch.zeros_like(self.word_counter.word_counts)
 49 | 
 50 |     def train(self, mode=True):
 51 |         super(AgentModule, self).train(mode)
 52 |         self.training = mode
 53 | 
 54 |     def update_mem(self, game, mem_str, new_mem, agent, other_agent=None):
 55 |         # TODO: Look into tensor copying from Variable
 56 |         new_big_mem = Variable(self.Tensor(game.memories[mem_str].data))
 57 |         if other_agent is not None:
 58 |             new_big_mem[:, agent, other_agent] = new_mem
 59 |         else:
 60 |             new_big_mem[:, agent] = new_mem
 61 |         game.memories[mem_str] = new_big_mem
 62 | 
 63 |     def process_utterances(self, game, agent, other_agent, utterance_processes, goal_predictions):
 64 |         utterance_processed, new_mem, goal_predicted = self.utterance_processor(game.utterances[:,other_agent], game.memories["utterance"][:, agent, other_agent])
 65 |         self.update_mem(game, "utterance", new_mem, agent, other_agent)
 66 |         utterance_processes[:, other_agent, :] = utterance_processed
 67 |         goal_predictions[:, agent, other_agent, :] = goal_predicted
 68 | 
 69 |     def process_physical(self, game, agent, other_entity, physical_processes):
 70 |         physical_processed, new_mem = self.physical_processor(torch.cat((game.observations[:,agent,other_entity],game.physical[:,other_entity]), 1), game.memories["physical"][:,agent, other_entity])
 71 |         self.update_mem(game, "physical", new_mem,agent, other_entity)
 72 |         physical_processes[:,other_entity,:] = physical_processed
 73 | 
 74 |     def get_physical_feat(self, game, agent):
 75 |         physical_processes = Variable(self.Tensor(game.batch_size, game.num_entities, self.processing_hidden_size))
 76 |         for entity in range(game.num_entities):
 77 |             self.process_physical(game, agent, entity, physical_processes)
 78 |         return self.physical_pooling(physical_processes)
 79 | 
 80 |     def get_utterance_feat(self, game, agent, goal_predictions):
 81 |         if self.using_utterances:
 82 |             utterance_processes = Variable(self.Tensor(game.batch_size, game.num_agents, self.processing_hidden_size))
 83 |             for other_agent in range(game.num_agents):
 84 |                 self.process_utterances(game, agent, other_agent, utterance_processes, goal_predictions)
 85 |             return self.utterance_pooling(utterance_processes)
 86 |         else:
 87 |             return None
 88 | 
 89 |     def get_action(self, game, agent, physical_feat, utterance_feat, movements, utterances):
 90 |         movement, utterance, new_mem = self.action_processor(physical_feat, game.observed_goals[:,agent], game.memories["action"][:,agent], self.training, utterance_feat)
 91 |         self.update_mem(game, "action", new_mem, agent)
 92 |         movements[:,agent,:] = movement
 93 |         if self.using_utterances:
 94 |             utterances[:,agent,:] = utterance
 95 | 
 96 |     def forward(self, game):
 97 |         timesteps = []
 98 |         for t in range(self.time_horizon):
 99 |             movements = Variable(self.Tensor(game.batch_size, game.num_entities, self.movement_dim_size).zero_())
100 |             utterances = None
101 |             goal_predictions = None
102 |             if self.using_utterances:
103 |                 utterances = Variable(self.Tensor(game.batch_size, game.num_agents, self.vocab_size))
104 |                 goal_predictions = Variable(self.Tensor(game.batch_size, game.num_agents, game.num_agents, self.goal_size))
105 | 
106 |             for agent in range(game.num_agents):
107 |                 physical_feat = self.get_physical_feat(game, agent)
108 |                 utterance_feat = self.get_utterance_feat(game, agent, goal_predictions)
109 |                 self.get_action(game, agent, physical_feat, utterance_feat, movements, utterances)
110 | 
111 |             cost = game(movements, goal_predictions, utterances)
112 |             if self.penalizing_words:
113 |                 cost = cost + self.word_counter(utterances)
114 | 
115 |             self.total_cost = self.total_cost + cost
116 |             if not self.training:
117 |                 timesteps.append({
118 |                     'locations': game.locations,
119 |                     'movements': movements,
120 |                     'loss': cost})
121 |                 if self.using_utterances:
122 |                     timesteps[-1]['utterances'] = utterances
123 |         return self.total_cost, timesteps
124 | 


--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | from typing import NamedTuple, Any, List
  3 | import numpy as np
  4 | import constants
  5 | 
  6 | DEFAULT_BATCH_SIZE = 512
  7 | DEFAULT_NUM_EPOCHS = 1000
  8 | DEFAULT_LR = 5e-4
  9 | SAVE_MODEL = True
 10 | DEFAULT_MODEL_FILE = 'latest.pt'
 11 | 
 12 | DEFAULT_HIDDEN_SIZE = 256
 13 | DEFAULT_DROPOUT = 0.1
 14 | DEFAULT_FEAT_VEC_SIZE = 256
 15 | DEFAULT_TIME_HORIZON = 16
 16 | 
 17 | USE_UTTERANCES = True
 18 | PENALIZE_WORDS = True
 19 | DEFAULT_VOCAB_SIZE = 20
 20 | DEFAULT_OOV_PROB = 1
 21 | 
 22 | DEFAULT_WORLD_DIM = 16
 23 | MAX_AGENTS = 3
 24 | MAX_LANDMARKS = 3
 25 | MIN_AGENTS = 2
 26 | MIN_LANDMARKS = 3
 27 | NUM_COLORS = 3
 28 | NUM_SHAPES = 2
 29 | 
 30 | TrainingConfig = NamedTuple('TrainingConfig', [
 31 |     ('num_epochs', int),
 32 |     ('learning_rate', float),
 33 |     ('load_model', bool),
 34 |     ('load_model_file', str),
 35 |     ('save_model', bool),
 36 |     ('save_model_file', str),
 37 |     ('use_cuda', bool)
 38 |     ])
 39 | 
 40 | GameConfig = NamedTuple('GameConfig', [
 41 |     ('batch_size', int),
 42 |     ('world_dim', Any),
 43 |     ('max_agents', int),
 44 |     ('max_landmarks', int),
 45 |     ('min_agents', int),
 46 |     ('min_landmarks', int),
 47 |     ('num_shapes', int),
 48 |     ('num_colors', int),
 49 |     ('use_utterances', bool),
 50 |     ('vocab_size', int),
 51 |     ('memory_size', int),
 52 |     ('use_cuda', bool),
 53 | ])
 54 | 
 55 | ProcessingModuleConfig = NamedTuple('ProcessingModuleConfig', [
 56 |     ('input_size', int),
 57 |     ('hidden_size', int),
 58 |     ('dropout', float)
 59 |     ])
 60 | 
 61 | WordCountingModuleConfig = NamedTuple('WordCountingModuleConfig', [
 62 |     ('vocab_size', int),
 63 |     ('oov_prob', float),
 64 |     ('use_cuda', bool)
 65 |     ])
 66 | 
 67 | GoalPredictingProcessingModuleConfig = NamedTuple("GoalPredictingProcessingModuleConfig", [
 68 |     ('processor', ProcessingModuleConfig),
 69 |     ('hidden_size', int),
 70 |     ('dropout', float),
 71 |     ('goal_size', int)
 72 |     ])
 73 | 
 74 | ActionModuleConfig = NamedTuple("ActionModuleConfig", [
 75 |     ('goal_processor', ProcessingModuleConfig),
 76 |     ('action_processor', ProcessingModuleConfig),
 77 |     ('hidden_size', int),
 78 |     ('dropout', float),
 79 |     ('movement_dim_size', int),
 80 |     ('movement_step_size', int),
 81 |     ('vocab_size', int),
 82 |     ('use_utterances', bool),
 83 |     ('use_cuda', bool)
 84 |     ])
 85 | 
 86 | AgentModuleConfig = NamedTuple("AgentModuleConfig", [
 87 |     ('time_horizon', int),
 88 |     ('feat_vec_size', int),
 89 |     ('movement_dim_size', int),
 90 |     ('goal_size', int),
 91 |     ('vocab_size', int),
 92 |     ('utterance_processor', GoalPredictingProcessingModuleConfig),
 93 |     ('physical_processor', ProcessingModuleConfig),
 94 |     ('action_processor', ActionModuleConfig),
 95 |     ('word_counter', WordCountingModuleConfig),
 96 |     ('use_utterances', bool),
 97 |     ('penalize_words', bool),
 98 |     ('use_cuda', bool)
 99 |     ])
100 | 
101 | default_training_config = TrainingConfig(
102 |         num_epochs=DEFAULT_NUM_EPOCHS,
103 |         learning_rate=DEFAULT_LR,
104 |         load_model=False,
105 |         load_model_file="",
106 |         save_model=SAVE_MODEL,
107 |         save_model_file=DEFAULT_MODEL_FILE,
108 |         use_cuda=False)
109 | 
110 | default_word_counter_config = WordCountingModuleConfig(
111 |         vocab_size=DEFAULT_VOCAB_SIZE,
112 |         oov_prob=DEFAULT_OOV_PROB,
113 |         use_cuda=False)
114 | 
115 | default_game_config = GameConfig(
116 |         DEFAULT_BATCH_SIZE,
117 |         DEFAULT_WORLD_DIM,
118 |         MAX_AGENTS,
119 |         MAX_LANDMARKS,
120 |         MIN_AGENTS,
121 |         MIN_LANDMARKS,
122 |         NUM_SHAPES,
123 |         NUM_COLORS,
124 |         USE_UTTERANCES,
125 |         DEFAULT_VOCAB_SIZE,
126 |         DEFAULT_HIDDEN_SIZE,
127 |         False
128 |         )
129 | 
130 | if USE_UTTERANCES:
131 |     feat_size = DEFAULT_FEAT_VEC_SIZE*3
132 | else:
133 |     feat_size = DEFAULT_FEAT_VEC_SIZE*2
134 | 
135 | def get_processor_config_with_input_size(input_size):
136 |     return ProcessingModuleConfig(
137 |         input_size=input_size,
138 |         hidden_size=DEFAULT_HIDDEN_SIZE,
139 |         dropout=DEFAULT_DROPOUT)
140 | 
141 | default_action_module_config = ActionModuleConfig(
142 |         goal_processor=get_processor_config_with_input_size(constants.GOAL_SIZE),
143 |         action_processor=get_processor_config_with_input_size(feat_size),
144 |         hidden_size=DEFAULT_HIDDEN_SIZE,
145 |         dropout=DEFAULT_DROPOUT,
146 |         movement_dim_size=constants.MOVEMENT_DIM_SIZE,
147 |         movement_step_size=constants.MOVEMENT_STEP_SIZE,
148 |         vocab_size=DEFAULT_VOCAB_SIZE,
149 |         use_utterances=USE_UTTERANCES,
150 |         use_cuda=False)
151 | 
152 | default_goal_predicting_module_config = GoalPredictingProcessingModuleConfig(
153 |     processor=get_processor_config_with_input_size(DEFAULT_VOCAB_SIZE),
154 |     hidden_size=DEFAULT_HIDDEN_SIZE,
155 |     dropout=DEFAULT_DROPOUT,
156 |     goal_size=constants.GOAL_SIZE)
157 | 
158 | default_agent_config = AgentModuleConfig(
159 |         time_horizon=DEFAULT_TIME_HORIZON,
160 |         feat_vec_size=DEFAULT_FEAT_VEC_SIZE,
161 |         movement_dim_size=constants.MOVEMENT_DIM_SIZE,
162 |         utterance_processor=default_goal_predicting_module_config,
163 |         physical_processor=get_processor_config_with_input_size(constants.MOVEMENT_DIM_SIZE + constants.PHYSICAL_EMBED_SIZE),
164 |         action_processor=default_action_module_config,
165 |         word_counter=default_word_counter_config,
166 |         goal_size=constants.GOAL_SIZE,
167 |         vocab_size=DEFAULT_VOCAB_SIZE,
168 |         use_utterances=USE_UTTERANCES,
169 |         penalize_words=PENALIZE_WORDS,
170 |         use_cuda=False)
171 | 
172 | def get_training_config(kwargs):
173 |     return TrainingConfig(
174 |             num_epochs=kwargs['n_epochs'] or default_training_config.num_epochs,
175 |             learning_rate=kwargs['learning_rate'] or default_training_config.learning_rate,
176 |             load_model=bool(kwargs['load_model_weights']),
177 |             load_model_file=kwargs['load_model_weights'] or default_training_config.load_model_file,
178 |             save_model=default_training_config.save_model,
179 |             save_model_file=kwargs['save_model_weights'] or default_training_config.save_model_file,
180 |             use_cuda=kwargs['use_cuda'])
181 | 
182 | def get_game_config(kwargs):
183 |     return GameConfig(
184 |             batch_size=kwargs['batch_size'] or default_game_config.batch_size,
185 |             world_dim=kwargs['world_dim'] or default_game_config.world_dim,
186 |             max_agents=kwargs['max_agents'] or default_game_config.max_agents,
187 |             min_agents=kwargs['min_agents'] or default_game_config.min_agents,
188 |             max_landmarks=kwargs['max_landmarks'] or default_game_config.max_landmarks,
189 |             min_landmarks=kwargs['min_landmarks'] or default_game_config.min_landmarks,
190 |             num_shapes=kwargs['num_shapes'] or default_game_config.num_shapes,
191 |             num_colors=kwargs['num_colors'] or default_game_config.num_colors,
192 |             use_utterances=not kwargs['no_utterances'],
193 |             vocab_size=kwargs['vocab_size'] or default_game_config.vocab_size,
194 |             memory_size=default_game_config.memory_size,
195 |             use_cuda=kwargs['use_cuda']
196 |             )
197 | 
198 | def get_agent_config(kwargs):
199 |     vocab_size = kwargs['vocab_size'] or DEFAULT_VOCAB_SIZE
200 |     use_utterances = (not kwargs['no_utterances'])
201 |     use_cuda = kwargs['use_cuda']
202 |     penalize_words = kwargs['penalize_words']
203 |     oov_prob = kwargs['oov_prob'] or DEFAULT_OOV_PROB
204 |     if use_utterances:
205 |         feat_vec_size = DEFAULT_FEAT_VEC_SIZE*3
206 |     else:
207 |         feat_vec_size = DEFAULT_FEAT_VEC_SIZE*2
208 |     utterance_processor = GoalPredictingProcessingModuleConfig(
209 |             processor=get_processor_config_with_input_size(vocab_size),
210 |             hidden_size=DEFAULT_HIDDEN_SIZE,
211 |             dropout=DEFAULT_DROPOUT,
212 |             goal_size=constants.GOAL_SIZE)
213 |     action_processor = ActionModuleConfig(
214 |             goal_processor=get_processor_config_with_input_size(constants.GOAL_SIZE),
215 |             action_processor=get_processor_config_with_input_size(feat_vec_size),
216 |             hidden_size=DEFAULT_HIDDEN_SIZE,
217 |             dropout=DEFAULT_DROPOUT,
218 |             movement_dim_size=constants.MOVEMENT_DIM_SIZE,
219 |             movement_step_size=constants.MOVEMENT_STEP_SIZE,
220 |             vocab_size=vocab_size,
221 |             use_utterances=use_utterances,
222 |             use_cuda=use_cuda)
223 |     word_counter = WordCountingModuleConfig(
224 |             vocab_size=vocab_size,
225 |             oov_prob=oov_prob,
226 |             use_cuda=use_cuda)
227 | 
228 |     return AgentModuleConfig(
229 |             time_horizon=kwargs['n_timesteps'] or default_agent_config.time_horizon,
230 |             feat_vec_size=default_agent_config.feat_vec_size,
231 |             movement_dim_size=default_agent_config.movement_dim_size,
232 |             utterance_processor=utterance_processor,
233 |             physical_processor=default_agent_config.physical_processor,
234 |             action_processor=action_processor,
235 |             word_counter=word_counter,
236 |             goal_size=default_agent_config.goal_size,
237 |             vocab_size=vocab_size,
238 |             use_utterances=use_utterances,
239 |             penalize_words=penalize_words,
240 |             use_cuda=use_cuda
241 |             )
242 | 
243 | 


--------------------------------------------------------------------------------
/modules/game.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch import Tensor
  4 | from torch.autograd import Variable
  5 | 
  6 | """
  7 |     The GameModule takes in all actions(movement, utterance, goal prediction)
  8 |     of all agents for a given timestep and returns the total cost for that
  9 |     timestep.
 10 | 
 11 |     Game consists of:
 12 |         -num_agents (scalar)
 13 |         -num_landmarks (scalar)
 14 |         -locations: [num_agents + num_landmarks, 2]
 15 |         -physical: [num_agents + num_landmarks, entity_embed_size]
 16 |         -utterances: [num_agents, vocab_size]
 17 |         -goals: [num_agents, goal_size]
 18 |         -location_observations: [num_agents, num_agents + num_landmarks, 2]
 19 |         -memories
 20 |             -utterance: [num_agents, num_agents, memory_size]
 21 |             -physical:[num_agents, num_agents + num_landmarks, memory_size]
 22 |             -action: [num_agents, memory_size]
 23 | 
 24 |         config needs: -batch_size, -using_utterances, -world_dim, -vocab_size, -memory_size, -num_colors -num_shapes
 25 | """
 26 | 
 27 | class GameModule(nn.Module):
 28 | 
 29 |     def __init__(self, config, num_agents, num_landmarks):
 30 |         super(GameModule, self).__init__()
 31 | 
 32 |         self.batch_size = config.batch_size # scalar: num games in this batch
 33 |         self.using_utterances = config.use_utterances # bool: whether current batch allows utterances
 34 |         self.using_cuda = config.use_cuda
 35 |         self.num_agents = num_agents # scalar: number of agents in this batch
 36 |         self.num_landmarks = num_landmarks # scalar: number of landmarks in this batch
 37 |         self.num_entities = self.num_agents + self.num_landmarks # type: int
 38 | 
 39 |         if self.using_cuda:
 40 |             self.Tensor = torch.cuda.FloatTensor
 41 |         else:
 42 |             self.Tensor = torch.FloatTensor
 43 | 
 44 |         locations = torch.rand(self.batch_size, self.num_entities, 2) * config.world_dim
 45 |         colors = (torch.rand(self.batch_size, self.num_entities, 1) * config.num_colors).floor()
 46 |         shapes = (torch.rand(self.batch_size, self.num_entities, 1) * config.num_shapes).floor()
 47 | 
 48 |         goal_agents = self.Tensor(self.batch_size, self.num_agents, 1)
 49 |         goal_entities = (torch.rand(self.batch_size, self.num_agents, 1) * self.num_landmarks).floor().long() + self.num_agents
 50 |         goal_locations = self.Tensor(self.batch_size, self.num_agents, 2)
 51 | 
 52 |         if self.using_cuda:
 53 |             locations = locations.cuda()
 54 |             colors = colors.cuda()
 55 |             shapes = shapes.cuda()
 56 |             goal_entities = goal_entities.cuda()
 57 | 
 58 |         # [batch_size, num_entities, 2]
 59 |         self.locations = Variable(locations)
 60 |         # [batch_size, num_entities, 2]
 61 |         self.physical = Variable(torch.cat((colors,shapes), 2).float())
 62 | 
 63 |         #TODO: Bad for loop?
 64 |         for b in range(self.batch_size):
 65 |             goal_agents[b] = torch.randperm(self.num_agents)
 66 | 
 67 |         for b in range(self.batch_size):
 68 |             goal_locations[b] = self.locations.data[b][goal_entities[b].squeeze()]
 69 | 
 70 |         # [batch_size, num_agents, 3]
 71 |         self.goals = Variable(torch.cat((goal_locations, goal_agents), 2))
 72 |         goal_agents = Variable(goal_agents)
 73 | 
 74 | 
 75 |         if self.using_cuda:
 76 |             self.memories = {
 77 |                 "physical": Variable(torch.zeros(self.batch_size, self.num_agents, self.num_entities, config.memory_size).cuda()),
 78 |                 "action": Variable(torch.zeros(self.batch_size, self.num_agents, config.memory_size).cuda())}
 79 |         else:
 80 |             self.memories = {
 81 |                 "physical": Variable(torch.zeros(self.batch_size, self.num_agents, self.num_entities, config.memory_size)),
 82 |                 "action": Variable(torch.zeros(self.batch_size, self.num_agents, config.memory_size))}
 83 | 
 84 |         if self.using_utterances:
 85 |             if self.using_cuda:
 86 |                 self.utterances = Variable(torch.zeros(self.batch_size, self.num_agents, config.vocab_size).cuda())
 87 |                 self.memories["utterance"] = Variable(torch.zeros(self.batch_size, self.num_agents, self.num_agents, config.memory_size).cuda())
 88 |             else:
 89 |                 self.utterances = Variable(torch.zeros(self.batch_size, self.num_agents, config.vocab_size))
 90 |                 self.memories["utterance"] = Variable(torch.zeros(self.batch_size, self.num_agents, self.num_agents, config.memory_size))
 91 | 
 92 |         agent_baselines = self.locations[:, :self.num_agents, :]
 93 | 
 94 |         sort_idxs = torch.sort(self.goals[:,:,2])[1]
 95 |         self.sorted_goals = Variable(self.Tensor(self.goals.size()))
 96 |         # TODO: Bad for loop?
 97 |         for b in range(self.batch_size):
 98 |             self.sorted_goals[b] = self.goals[b][sort_idxs[b]]
 99 |         self.sorted_goals = self.sorted_goals[:,:,:2]
100 | 
101 |         # [batch_size, num_agents, num_entities, 2]
102 |         self.observations = self.locations.unsqueeze(1) - agent_baselines.unsqueeze(2)
103 | 
104 |         new_obs = self.goals[:,:,:2] - agent_baselines
105 | 
106 |         # [batch_size, num_agents, 2] [batch_size, num_agents, 1]
107 |         self.observed_goals = torch.cat((new_obs, goal_agents), dim=2)
108 | 
109 | 
110 | 
111 |     """
112 |     Updates game state given all movements and utterances and returns accrued cost
113 |         - movements: [batch_size, num_agents, config.movement_size]
114 |         - utterances: [batch_size, num_agents, config.utterance_size]
115 |         - goal_predictions: [batch_size, num_agents, num_agents, config.goal_size]
116 |     Returns:
117 |         - scalar: total cost of all games in the batch
118 |     """
119 |     def forward(self, movements, goal_predictions, utterances):
120 |         self.locations = self.locations + movements
121 |         agent_baselines = self.locations[:, :self.num_agents]
122 |         self.observations = self.locations.unsqueeze(1)- agent_baselines.unsqueeze(2)
123 |         new_obs = self.goals[:,:,:2] - agent_baselines
124 |         goal_agents = self.goals[:,:,2].unsqueeze(2)
125 |         self.observed_goals = torch.cat((new_obs, goal_agents), dim=2)
126 |         if self.using_utterances:
127 |             self.utterances = utterances
128 |             return self.compute_cost(movements, goal_predictions, utterances)
129 |         else:
130 |             return self.compute_cost(movements, goal_predictions)
131 | 
132 |     def compute_cost(self, movements, goal_predictions, utterances=None):
133 |         physical_cost = self.compute_physical_cost()
134 |         movement_cost = self.compute_movement_cost(movements)
135 |         goal_pred_cost = self.compute_goal_pred_cost(goal_predictions)
136 |         return physical_cost + goal_pred_cost + movement_cost
137 | 
138 |     """
139 |     Computes the total cost agents get from being near their goals
140 |     agent locations are stored as [batch_size, num_agents + num_landmarks, entity_embed_size]
141 |     """
142 |     def compute_physical_cost(self):
143 |         return 2*torch.sum(
144 |                     torch.sqrt(
145 |                         torch.sum(
146 |                             torch.pow(
147 |                                 self.locations[:,:self.num_agents,:] - self.sorted_goals,
148 |                                 2),
149 |                             -1)
150 |                         )
151 |                     )
152 | 
153 |     """
154 |     Computes the total cost agents get from predicting others' goals
155 |     goal_predictions: [batch_size, num_agents, num_agents, goal_size]
156 |     goal_predictions[., a_i, a_j, :] = a_i's prediction of a_j's goal with location relative to a_i
157 |     We want:
158 |         real_goal_locations[., a_i, a_j, :] = a_j's goal with location relative to a_i
159 |     We have:
160 |         goals[., a_j, :] = a_j's goal with absolute location
161 |         observed_goals[., a_j, :] = a_j's goal with location relative to a_j
162 |     Which means we want to build an observed_goals-like tensor but relative to each agent
163 |         real_goal_locations[., a_i, a_j, :] = goals[., a_j, :] - locations[a_i]
164 | 
165 | 
166 |     """
167 |     def compute_goal_pred_cost(self, goal_predictions):
168 |         relative_goal_locs = self.goals.unsqueeze(1)[:,:,:,:2] - self.locations.unsqueeze(2)[:, :self.num_agents, :, :]
169 |         goal_agents = self.goals.unsqueeze(1)[:,:,:,2:].expand_as(relative_goal_locs)[:,:,:,-1:]
170 |         relative_goals =  torch.cat((relative_goal_locs, goal_agents), dim=3)
171 |         return torch.sum(
172 |                 torch.sqrt(
173 |                     torch.sum(
174 |                         torch.pow(
175 |                             goal_predictions - relative_goals,
176 |                             2),
177 |                         -1)
178 |                     )
179 |                 )
180 | 
181 |     """
182 |     Computes the total cost agents get from moving
183 |     """
184 |     def compute_movement_cost(self, movements):
185 |         return torch.sum(torch.sqrt(torch.sum(torch.pow(movements, 2), -1)))
186 | 
187 |     def get_avg_agent_to_goal_distance(self):
188 |         return torch.sum(
189 |                     torch.sqrt(
190 |                         torch.sum(
191 |                             torch.pow(
192 |                                 self.locations[:,:self.num_agents,:] - self.sorted_goals,
193 |                                 2),
194 |                             -1)
195 |                         )
196 |                     )
197 | 
198 | 


--------------------------------------------------------------------------------