├── comp-graph.pdf ├── .gitignore ├── constants.py ├── modules ├── gumbel_softmax.py ├── word_counting.py ├── processing.py ├── goal_predicting.py ├── action.py ├── agent.py └── game.py ├── playground.py ├── README.md ├── notes.txt ├── visualize.py ├── train.py └── configs.py /comp-graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bkgoksel/emergent-language/HEAD/comp-graph.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .mypy_cache/ 2 | .ropeproject/ 3 | __pycache__/ 4 | mypy.ini 5 | *.sw* 6 | *.pt 7 | *.out 8 | *.pyc 9 | venv/ 10 | docker/ 11 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | WORLD_DIMENSIONALITY = 2 2 | MOVEMENT_STEP_SIZE = 2 3 | COLOR_SCALE = 1 4 | PHYSICAL_EMBED_SIZE = 2 5 | GOAL_SIZE = int(WORLD_DIMENSIONALITY + 1) 6 | MOVEMENT_DIM_SIZE = int(WORLD_DIMENSIONALITY) 7 | -------------------------------------------------------------------------------- /modules/gumbel_softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | class GumbelSoftmax(nn.Module): 6 | def __init__(self, use_cuda=False): 7 | super(GumbelSoftmax, self).__init__() 8 | self.using_cuda = use_cuda 9 | self.softmax = nn.Softmax(dim=1) 10 | self.temp = 1 11 | 12 | def forward(self, x): 13 | if self.using_cuda: 14 | U = Variable(torch.rand(x.size()).cuda()) 15 | else: 16 | U = Variable(torch.rand(x.size())) 17 | y = x -torch.log(-torch.log(U + 1e-20) + 1e-20) 18 | return self.softmax(y/self.temp) 19 | -------------------------------------------------------------------------------- /modules/word_counting.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import Tensor 3 | from torch.autograd import Variable 4 | 5 | class WordCountingModule(nn.Module): 6 | def __init__(self, config): 7 | super(WordCountingModule, self).__init__() 8 | self.oov_prob = config.oov_prob 9 | word_counts = Tensor(config.vocab_size) 10 | if config.use_cuda: 11 | word_counts.cuda() 12 | self.word_counts = Variable(word_counts) 13 | 14 | def forward(self, utterances): 15 | cost = -(utterances/(self.oov_prob + self.word_counts.sum() - 1)).sum() 16 | self.word_counts = self.word_counts + utterances 17 | return cost 18 | -------------------------------------------------------------------------------- /modules/processing.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | """ 4 | A Processing module takes an input from a stream and the independent memory 5 | of that stream and runs a single timestep of a GRU cell, followed by 6 | dropout and finally a linear ELU layer on top of the GRU output. 7 | It returns the output of the fully connected layer as well as the update to 8 | the independent memory. 9 | """ 10 | class ProcessingModule(nn.Module): 11 | def __init__(self, config): 12 | super(ProcessingModule, self).__init__() 13 | self.cell = nn.GRUCell(config.input_size, config.hidden_size) 14 | self.fully_connected = nn.Sequential( 15 | nn.Dropout(config.dropout), 16 | nn.Linear(config.hidden_size, config.hidden_size), 17 | nn.ELU()) 18 | 19 | def forward(self, x, m): 20 | m = self.cell(x, m) 21 | return self.fully_connected(m), m 22 | 23 | -------------------------------------------------------------------------------- /playground.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from modules.game import GameModule 3 | from configs import default_game_config, get_game_config 4 | import code 5 | 6 | 7 | config = { 8 | 'batch_size': default_game_config.batch_size, 9 | 'world_dim': default_game_config.world_dim, 10 | 'max_agents': default_game_config.max_agents, 11 | 'max_landmarks': default_game_config.max_landmarks, 12 | 'min_agents': default_game_config.min_agents, 13 | 'min_landmarks': default_game_config.min_landmarks, 14 | 'num_shapes': default_game_config.num_shapes, 15 | 'num_colors': default_game_config.num_colors, 16 | 'no_utterances': not default_game_config.use_utterances, 17 | 'vocab_size': default_game_config.vocab_size, 18 | 'memory_size': default_game_config.memory_size 19 | } 20 | 21 | agent = torch.load('latest.pt') 22 | agent.reset() 23 | agent.train(False) 24 | code.interact(local=locals()) 25 | -------------------------------------------------------------------------------- /modules/goal_predicting.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from modules.processing import ProcessingModule 3 | 4 | """ 5 | A GoalPredictingProcessingModule acts like a regular processing module but 6 | also runs a goal predictor layer that is a two layer fully-connected 7 | network. It returns the regular processing module's output, its memory 8 | update and finally a goal vector sized goal prediction 9 | """ 10 | class GoalPredictingProcessingModule(nn.Module): 11 | def __init__(self, config): 12 | super(GoalPredictingProcessingModule, self).__init__() 13 | self.processor = ProcessingModule(config.processor) 14 | self.goal_predictor = nn.Sequential( 15 | nn.Dropout(config.dropout), 16 | nn.Linear(config.processor.hidden_size, config.hidden_size), 17 | nn.Dropout(config.dropout), 18 | nn.ELU(), 19 | nn.Linear(config.hidden_size, config.goal_size)) 20 | 21 | def forward(self, x, mem): 22 | processed, mem = self.processor(x, mem) 23 | goal_prediction = self.goal_predictor(processed) 24 | return processed, mem, goal_prediction 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # emergent-language 2 | An implementation of Emergence of Grounded Compositional Language in Multi-Agent Populations by Igor Mordatch and Pieter Abbeel 3 | 4 | To run, invoke `python3 train.py` in environment with PyTorch installed. To experiment with parameters, invoke `python3 train.py --help` to get a list of command line arguments that modify parameters. Currently training just prints out the loss of each game episode run, without any further analysis, and the model weights are not saved at the end. These features are coming soon. 5 | 6 | * `game.py` provides a non-tensor based implementation of the game mechanics (used for game behavior exploration and random game generation during training 7 | * `model.py` provides the full computational model including agent and game dynamics through an entire episode 8 | * `train.py` provides the training harness that runs many games and trains the agents 9 | * `configs.py` provides the data structures that are passed as configuration to various modules in the computational graph as well as the default values used in training now 10 | * `constants.py` provides constant factors that shouldn't need modification during regular running of the model 11 | * `visualize.py` provides a computational graph visualization tool taken from [here](https://github.com/szagoruyko/functional-zoo/blob/master/visualize.py) 12 | * `simple_model.py` provides a simple model that doesn't communicate and only moves based on its own goal (used for testing other components) 13 | * `comp-graph.pdf` is a pdf visualization of the computational graph of the game-agent mechanics 14 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | TODO: 2 | -------- 3 | [x] Weight saving 4 | [x] Add a test mode where utterances are argmaxed instead of Gumbel-softmaxed 5 | [x] Add a way to "replay" an episode easily 6 | [x] Better loss over time information printing 7 | [x] Batching 8 | [x] Test role of comms by testing in envs where each agent knows its goal vs it doesn't 9 | [x] Make it possible to have agent get its own goal, determinable with a flag 10 | [x] Make this a training flag 11 | [ ] Test how this affects performance 12 | [x] Get avg final distance of agents to their goals 13 | [x] Goal predictions(relative to yourself and agent index) 14 | 15 | HYPERPARAMETER TUNING: 16 | ------------------------- 17 | 18 | 19 | LEARNING END: 20 | ------------- 21 | [ ] Try relative goals (left of landmark, above landmark), see if relative direction words evolve 22 | [ ] Give different award coefficients to goals, have a single agent have multiple goals, see if award values can be communicated 23 | 24 | DISTANT FUTURE: 25 | -------------- 26 | [ ] Visualization of a game 27 | [ ] Web interface to give an initial game state and see how the agents act and what they utter 28 | 29 | 30 | Unstructured thoughts: 31 | ---------------- 32 | - Color words 33 | - Simulate different visual systems, see how color words evolve 34 | - Verbs 35 | - GO 36 | - TAKE (i.e. make landmarks movable, have a goal be the moving of a landmark to another landmark) 37 | - Make sequential goals (i.e. go to blue, then green) and also multi-landmark but not ordered goals (go to green and blue in any order). See if a way to discriminate evolves 38 | - Narration (Agent A observes a certain environment, tries to describe it to Agent B, Agent B predicts the environment history) 39 | 40 | -------------------------------------------------------------------------------- /visualize.py: -------------------------------------------------------------------------------- 1 | from graphviz import Digraph 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | 6 | def make_dot(var, params=None, filename=None): 7 | """ Produces Graphviz representation of PyTorch autograd graph 8 | Blue nodes are the Variables that require grad, orange are Tensors 9 | saved for backward in torch.autograd.Function 10 | Args: 11 | var: output Variable 12 | params: dict of (name, Variable) to add names to node that 13 | require grad (TODO: make optional) 14 | """ 15 | if params is not None: 16 | assert isinstance(params.values()[0], Variable) 17 | param_map = {id(v): k for k, v in params.items()} 18 | 19 | node_attr = dict(style='filled', 20 | shape='box', 21 | align='left', 22 | fontsize='12', 23 | ranksep='0.1', 24 | height='0.2') 25 | dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) 26 | seen = set() 27 | 28 | def size_to_str(size): 29 | return '('+(', ').join(['%d' % v for v in size])+')' 30 | 31 | def add_nodes(var): 32 | if var not in seen: 33 | if torch.is_tensor(var): 34 | dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange') 35 | elif hasattr(var, 'variable'): 36 | u = var.variable 37 | name = param_map[id(u)] if params is not None else '' 38 | node_name = '%s\n %s' % (name, size_to_str(u.size())) 39 | dot.node(str(id(var)), node_name, fillcolor='lightblue') 40 | else: 41 | dot.node(str(id(var)), str(type(var).__name__)) 42 | seen.add(var) 43 | if hasattr(var, 'next_functions'): 44 | for u in var.next_functions: 45 | if u[0] is not None: 46 | dot.edge(str(id(u[0])), str(id(var))) 47 | add_nodes(u[0]) 48 | if hasattr(var, 'saved_tensors'): 49 | for t in var.saved_tensors: 50 | dot.edge(str(id(t)), str(id(var))) 51 | add_nodes(t) 52 | add_nodes(var.grad_fn) 53 | if filename: 54 | dot.render(filename, view=True) 55 | return dot 56 | -------------------------------------------------------------------------------- /modules/action.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from modules.processing import ProcessingModule 5 | from modules.gumbel_softmax import GumbelSoftmax 6 | 7 | """ 8 | An ActionModule takes in the physical observation feature vector, the 9 | utterance observation feature vector and the individual goal of an agent 10 | (alongside the memory for the module), processes the goal to turn it into 11 | a goal feature vector, and runs the concatenation of all three feature 12 | vectors through a processing module. The output of the processing module 13 | is then fed into two independent fully connected networks to output 14 | utterance and movement actions 15 | """ 16 | class ActionModule(nn.Module): 17 | def __init__(self, config): 18 | super(ActionModule, self).__init__() 19 | self.using_utterances = config.use_utterances 20 | self.using_cuda = config.use_cuda 21 | self.goal_processor = ProcessingModule(config.goal_processor) 22 | self.processor = ProcessingModule(config.action_processor) 23 | self.movement_step_size = config.movement_step_size 24 | self.movement_chooser = nn.Sequential( 25 | nn.Linear(config.action_processor.hidden_size, config.action_processor.hidden_size), 26 | nn.ELU(), 27 | nn.Linear(config.action_processor.hidden_size, config.movement_dim_size), 28 | nn.Tanh()) 29 | 30 | if self.using_utterances: 31 | self.utterance_chooser = nn.Sequential( 32 | nn.Linear(config.action_processor.hidden_size, config.hidden_size), 33 | nn.ELU(), 34 | nn.Linear(config.hidden_size, config.vocab_size)) 35 | self.gumbel_softmax = GumbelSoftmax(config.use_cuda) 36 | 37 | def forward(self, physical, goal, mem, training, utterance=None): 38 | goal_processed, _ = self.goal_processor(goal, mem) 39 | if self.using_utterances: 40 | x = torch.cat([physical.squeeze(1), utterance.squeeze(1), goal_processed], 1).squeeze(1) 41 | else: 42 | x = torch.cat([physical.squeeze(0), goal_processed], 1).squeeze(1) 43 | processed, mem = self.processor(x, mem) 44 | movement = self.movement_chooser(processed) 45 | if self.using_utterances: 46 | utter = self.utterance_chooser(processed) 47 | if training: 48 | utterance = self.gumbel_softmax(utter) 49 | else: 50 | utterance = torch.zeros(utter.size()) 51 | if self.using_cuda: 52 | utterance = utterance.cuda() 53 | max_utter = utter.max(1)[1] 54 | max_utter = max_utter.data[0] 55 | utterance[0, max_utter] = 1 56 | else: 57 | utterance = None 58 | final_movement = (movement * 2 * self.movement_step_size) - self.movement_step_size 59 | return final_movement, utterance, mem 60 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import torch 4 | from torch.optim import RMSprop 5 | from torch.optim.lr_scheduler import ReduceLROnPlateau 6 | import configs 7 | from modules.agent import AgentModule 8 | from modules.game import GameModule 9 | from collections import defaultdict 10 | 11 | parser = argparse.ArgumentParser(description="Trains the agents for cooperative communication task") 12 | parser.add_argument('--no-utterances', action='store_true', help='if specified disables the communications channel (default enabled)') 13 | parser.add_argument('--penalize-words', action='store_true', help='if specified penalizes uncommon word usage (default disabled)') 14 | parser.add_argument('--n-epochs', '-e', type=int, help='if specified sets number of training epochs (default 5000)') 15 | parser.add_argument('--learning-rate', type=float, help='if specified sets learning rate (default 1e-3)') 16 | parser.add_argument('--batch-size', type=int, help='if specified sets batch size(default 256)') 17 | parser.add_argument('--n-timesteps', '-t', type=int, help='if specified sets timestep length of each episode (default 32)') 18 | parser.add_argument('--num-shapes', '-s', type=int, help='if specified sets number of colors (default 3)') 19 | parser.add_argument('--num-colors', '-c', type=int, help='if specified sets number of shapes (default 3)') 20 | parser.add_argument('--max-agents', type=int, help='if specified sets maximum number of agents in each episode (default 3)') 21 | parser.add_argument('--min-agents', type=int, help='if specified sets minimum number of agents in each episode (default 1)') 22 | parser.add_argument('--max-landmarks', type=int, help='if specified sets maximum number of landmarks in each episode (default 3)') 23 | parser.add_argument('--min-landmarks', type=int, help='if specified sets minimum number of landmarks in each episode (default 1)') 24 | parser.add_argument('--vocab-size', '-v', type=int, help='if specified sets maximum vocab size in each episode (default 6)') 25 | parser.add_argument('--world-dim', '-w', type=int, help='if specified sets the side length of the square grid where all agents and landmarks spawn(default 16)') 26 | parser.add_argument('--oov-prob', '-o', type=int, help='higher value penalize uncommon words less when penalizing words (default 6)') 27 | parser.add_argument('--load-model-weights', type=str, help='if specified start with saved model weights saved at file given by this argument') 28 | parser.add_argument('--save-model-weights', type=str, help='if specified save the model weights at file given by this argument') 29 | parser.add_argument('--use-cuda', action='store_true', help='if specified enables training on CUDA (default disabled)') 30 | 31 | def print_losses(epoch, losses, dists, game_config): 32 | for a in range(game_config.min_agents, game_config.max_agents + 1): 33 | for l in range(game_config.min_landmarks, game_config.max_landmarks + 1): 34 | loss = losses[a][l][-1] if len(losses[a][l]) > 0 else 0 35 | min_loss = min(losses[a][l]) if len(losses[a][l]) > 0 else 0 36 | 37 | dist = dists[a][l][-1] if len(dists[a][l]) > 0 else 0 38 | min_dist = min(dists[a][l]) if len(dists[a][l]) > 0 else 0 39 | 40 | print("[epoch %d][%d agents, %d landmarks][%d batches][last loss: %f][min loss: %f][last dist: %f][min dist: %f]" % (epoch, a, l, len(losses[a][l]), loss, min_loss, dist, min_dist)) 41 | print("_________________________") 42 | 43 | def main(): 44 | args = vars(parser.parse_args()) 45 | agent_config = configs.get_agent_config(args) 46 | game_config = configs.get_game_config(args) 47 | training_config = configs.get_training_config(args) 48 | print("Training with config:") 49 | print(training_config) 50 | print(game_config) 51 | print(agent_config) 52 | agent = AgentModule(agent_config) 53 | if training_config.use_cuda: 54 | agent.cuda() 55 | optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate) 56 | scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5) 57 | losses = defaultdict(lambda:defaultdict(list)) 58 | dists = defaultdict(lambda:defaultdict(list)) 59 | for epoch in range(training_config.num_epochs): 60 | num_agents = np.random.randint(game_config.min_agents, game_config.max_agents+1) 61 | num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks+1) 62 | agent.reset() 63 | game = GameModule(game_config, num_agents, num_landmarks) 64 | if training_config.use_cuda: 65 | game.cuda() 66 | optimizer.zero_grad() 67 | 68 | total_loss, _ = agent(game) 69 | per_agent_loss = total_loss.data[0] / num_agents / game_config.batch_size 70 | losses[num_agents][num_landmarks].append(per_agent_loss) 71 | 72 | dist = game.get_avg_agent_to_goal_distance() 73 | avg_dist = dist.data[0] / num_agents / game_config.batch_size 74 | dists[num_agents][num_landmarks].append(avg_dist) 75 | 76 | print_losses(epoch, losses, dists, game_config) 77 | 78 | total_loss.backward() 79 | optimizer.step() 80 | 81 | if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks: 82 | scheduler.step(losses[game_config.max_agents][game_config.max_landmarks][-1]) 83 | 84 | if training_config.save_model: 85 | torch.save(agent, training_config.save_model_file) 86 | print("Saved agent model weights at %s" % training_config.save_model_file) 87 | """ 88 | import code 89 | code.interact(local=locals()) 90 | """ 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | 96 | -------------------------------------------------------------------------------- /modules/agent.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | from modules.processing import ProcessingModule 6 | from modules.goal_predicting import GoalPredictingProcessingModule 7 | from modules.action import ActionModule 8 | from modules.word_counting import WordCountingModule 9 | 10 | 11 | """ 12 | The AgentModule is the general module that's responsible for the execution of 13 | the overall policy throughout training. It holds all information pertaining to 14 | the whole training episode, and at each forward pass runs a given game until 15 | the end, returning the total cost all agents collected over the entire game 16 | """ 17 | class AgentModule(nn.Module): 18 | def __init__(self, config): 19 | super(AgentModule, self).__init__() 20 | self.init_from_config(config) 21 | self.total_cost = Variable(self.Tensor(1).zero_()) 22 | 23 | self.physical_processor = ProcessingModule(config.physical_processor) 24 | self.physical_pooling = nn.AdaptiveMaxPool2d((1,config.feat_vec_size)) 25 | self.action_processor = ActionModule(config.action_processor) 26 | 27 | if self.using_utterances: 28 | self.utterance_processor = GoalPredictingProcessingModule(config.utterance_processor) 29 | self.utterance_pooling = nn.AdaptiveMaxPool2d((1,config.feat_vec_size)) 30 | if self.penalizing_words: 31 | self.word_counter = WordCountingModule(config.word_counter) 32 | 33 | def init_from_config(self, config): 34 | self.training = True 35 | self.using_utterances = config.use_utterances 36 | self.penalizing_words = config.penalize_words 37 | self.using_cuda = config.use_cuda 38 | self.time_horizon = config.time_horizon 39 | self.movement_dim_size = config.movement_dim_size 40 | self.vocab_size = config.vocab_size 41 | self.goal_size = config.goal_size 42 | self.processing_hidden_size = config.physical_processor.hidden_size 43 | self.Tensor = torch.cuda.FloatTensor if self.using_cuda else torch.FloatTensor 44 | 45 | def reset(self): 46 | self.total_cost = torch.zeros_like(self.total_cost) 47 | if self.using_utterances and self.penalizing_words: 48 | self.word_counter.word_counts = torch.zeros_like(self.word_counter.word_counts) 49 | 50 | def train(self, mode=True): 51 | super(AgentModule, self).train(mode) 52 | self.training = mode 53 | 54 | def update_mem(self, game, mem_str, new_mem, agent, other_agent=None): 55 | # TODO: Look into tensor copying from Variable 56 | new_big_mem = Variable(self.Tensor(game.memories[mem_str].data)) 57 | if other_agent is not None: 58 | new_big_mem[:, agent, other_agent] = new_mem 59 | else: 60 | new_big_mem[:, agent] = new_mem 61 | game.memories[mem_str] = new_big_mem 62 | 63 | def process_utterances(self, game, agent, other_agent, utterance_processes, goal_predictions): 64 | utterance_processed, new_mem, goal_predicted = self.utterance_processor(game.utterances[:,other_agent], game.memories["utterance"][:, agent, other_agent]) 65 | self.update_mem(game, "utterance", new_mem, agent, other_agent) 66 | utterance_processes[:, other_agent, :] = utterance_processed 67 | goal_predictions[:, agent, other_agent, :] = goal_predicted 68 | 69 | def process_physical(self, game, agent, other_entity, physical_processes): 70 | physical_processed, new_mem = self.physical_processor(torch.cat((game.observations[:,agent,other_entity],game.physical[:,other_entity]), 1), game.memories["physical"][:,agent, other_entity]) 71 | self.update_mem(game, "physical", new_mem,agent, other_entity) 72 | physical_processes[:,other_entity,:] = physical_processed 73 | 74 | def get_physical_feat(self, game, agent): 75 | physical_processes = Variable(self.Tensor(game.batch_size, game.num_entities, self.processing_hidden_size)) 76 | for entity in range(game.num_entities): 77 | self.process_physical(game, agent, entity, physical_processes) 78 | return self.physical_pooling(physical_processes) 79 | 80 | def get_utterance_feat(self, game, agent, goal_predictions): 81 | if self.using_utterances: 82 | utterance_processes = Variable(self.Tensor(game.batch_size, game.num_agents, self.processing_hidden_size)) 83 | for other_agent in range(game.num_agents): 84 | self.process_utterances(game, agent, other_agent, utterance_processes, goal_predictions) 85 | return self.utterance_pooling(utterance_processes) 86 | else: 87 | return None 88 | 89 | def get_action(self, game, agent, physical_feat, utterance_feat, movements, utterances): 90 | movement, utterance, new_mem = self.action_processor(physical_feat, game.observed_goals[:,agent], game.memories["action"][:,agent], self.training, utterance_feat) 91 | self.update_mem(game, "action", new_mem, agent) 92 | movements[:,agent,:] = movement 93 | if self.using_utterances: 94 | utterances[:,agent,:] = utterance 95 | 96 | def forward(self, game): 97 | timesteps = [] 98 | for t in range(self.time_horizon): 99 | movements = Variable(self.Tensor(game.batch_size, game.num_entities, self.movement_dim_size).zero_()) 100 | utterances = None 101 | goal_predictions = None 102 | if self.using_utterances: 103 | utterances = Variable(self.Tensor(game.batch_size, game.num_agents, self.vocab_size)) 104 | goal_predictions = Variable(self.Tensor(game.batch_size, game.num_agents, game.num_agents, self.goal_size)) 105 | 106 | for agent in range(game.num_agents): 107 | physical_feat = self.get_physical_feat(game, agent) 108 | utterance_feat = self.get_utterance_feat(game, agent, goal_predictions) 109 | self.get_action(game, agent, physical_feat, utterance_feat, movements, utterances) 110 | 111 | cost = game(movements, goal_predictions, utterances) 112 | if self.penalizing_words: 113 | cost = cost + self.word_counter(utterances) 114 | 115 | self.total_cost = self.total_cost + cost 116 | if not self.training: 117 | timesteps.append({ 118 | 'locations': game.locations, 119 | 'movements': movements, 120 | 'loss': cost}) 121 | if self.using_utterances: 122 | timesteps[-1]['utterances'] = utterances 123 | return self.total_cost, timesteps 124 | -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | from typing import NamedTuple, Any, List 3 | import numpy as np 4 | import constants 5 | 6 | DEFAULT_BATCH_SIZE = 512 7 | DEFAULT_NUM_EPOCHS = 1000 8 | DEFAULT_LR = 5e-4 9 | SAVE_MODEL = True 10 | DEFAULT_MODEL_FILE = 'latest.pt' 11 | 12 | DEFAULT_HIDDEN_SIZE = 256 13 | DEFAULT_DROPOUT = 0.1 14 | DEFAULT_FEAT_VEC_SIZE = 256 15 | DEFAULT_TIME_HORIZON = 16 16 | 17 | USE_UTTERANCES = True 18 | PENALIZE_WORDS = True 19 | DEFAULT_VOCAB_SIZE = 20 20 | DEFAULT_OOV_PROB = 1 21 | 22 | DEFAULT_WORLD_DIM = 16 23 | MAX_AGENTS = 3 24 | MAX_LANDMARKS = 3 25 | MIN_AGENTS = 2 26 | MIN_LANDMARKS = 3 27 | NUM_COLORS = 3 28 | NUM_SHAPES = 2 29 | 30 | TrainingConfig = NamedTuple('TrainingConfig', [ 31 | ('num_epochs', int), 32 | ('learning_rate', float), 33 | ('load_model', bool), 34 | ('load_model_file', str), 35 | ('save_model', bool), 36 | ('save_model_file', str), 37 | ('use_cuda', bool) 38 | ]) 39 | 40 | GameConfig = NamedTuple('GameConfig', [ 41 | ('batch_size', int), 42 | ('world_dim', Any), 43 | ('max_agents', int), 44 | ('max_landmarks', int), 45 | ('min_agents', int), 46 | ('min_landmarks', int), 47 | ('num_shapes', int), 48 | ('num_colors', int), 49 | ('use_utterances', bool), 50 | ('vocab_size', int), 51 | ('memory_size', int), 52 | ('use_cuda', bool), 53 | ]) 54 | 55 | ProcessingModuleConfig = NamedTuple('ProcessingModuleConfig', [ 56 | ('input_size', int), 57 | ('hidden_size', int), 58 | ('dropout', float) 59 | ]) 60 | 61 | WordCountingModuleConfig = NamedTuple('WordCountingModuleConfig', [ 62 | ('vocab_size', int), 63 | ('oov_prob', float), 64 | ('use_cuda', bool) 65 | ]) 66 | 67 | GoalPredictingProcessingModuleConfig = NamedTuple("GoalPredictingProcessingModuleConfig", [ 68 | ('processor', ProcessingModuleConfig), 69 | ('hidden_size', int), 70 | ('dropout', float), 71 | ('goal_size', int) 72 | ]) 73 | 74 | ActionModuleConfig = NamedTuple("ActionModuleConfig", [ 75 | ('goal_processor', ProcessingModuleConfig), 76 | ('action_processor', ProcessingModuleConfig), 77 | ('hidden_size', int), 78 | ('dropout', float), 79 | ('movement_dim_size', int), 80 | ('movement_step_size', int), 81 | ('vocab_size', int), 82 | ('use_utterances', bool), 83 | ('use_cuda', bool) 84 | ]) 85 | 86 | AgentModuleConfig = NamedTuple("AgentModuleConfig", [ 87 | ('time_horizon', int), 88 | ('feat_vec_size', int), 89 | ('movement_dim_size', int), 90 | ('goal_size', int), 91 | ('vocab_size', int), 92 | ('utterance_processor', GoalPredictingProcessingModuleConfig), 93 | ('physical_processor', ProcessingModuleConfig), 94 | ('action_processor', ActionModuleConfig), 95 | ('word_counter', WordCountingModuleConfig), 96 | ('use_utterances', bool), 97 | ('penalize_words', bool), 98 | ('use_cuda', bool) 99 | ]) 100 | 101 | default_training_config = TrainingConfig( 102 | num_epochs=DEFAULT_NUM_EPOCHS, 103 | learning_rate=DEFAULT_LR, 104 | load_model=False, 105 | load_model_file="", 106 | save_model=SAVE_MODEL, 107 | save_model_file=DEFAULT_MODEL_FILE, 108 | use_cuda=False) 109 | 110 | default_word_counter_config = WordCountingModuleConfig( 111 | vocab_size=DEFAULT_VOCAB_SIZE, 112 | oov_prob=DEFAULT_OOV_PROB, 113 | use_cuda=False) 114 | 115 | default_game_config = GameConfig( 116 | DEFAULT_BATCH_SIZE, 117 | DEFAULT_WORLD_DIM, 118 | MAX_AGENTS, 119 | MAX_LANDMARKS, 120 | MIN_AGENTS, 121 | MIN_LANDMARKS, 122 | NUM_SHAPES, 123 | NUM_COLORS, 124 | USE_UTTERANCES, 125 | DEFAULT_VOCAB_SIZE, 126 | DEFAULT_HIDDEN_SIZE, 127 | False 128 | ) 129 | 130 | if USE_UTTERANCES: 131 | feat_size = DEFAULT_FEAT_VEC_SIZE*3 132 | else: 133 | feat_size = DEFAULT_FEAT_VEC_SIZE*2 134 | 135 | def get_processor_config_with_input_size(input_size): 136 | return ProcessingModuleConfig( 137 | input_size=input_size, 138 | hidden_size=DEFAULT_HIDDEN_SIZE, 139 | dropout=DEFAULT_DROPOUT) 140 | 141 | default_action_module_config = ActionModuleConfig( 142 | goal_processor=get_processor_config_with_input_size(constants.GOAL_SIZE), 143 | action_processor=get_processor_config_with_input_size(feat_size), 144 | hidden_size=DEFAULT_HIDDEN_SIZE, 145 | dropout=DEFAULT_DROPOUT, 146 | movement_dim_size=constants.MOVEMENT_DIM_SIZE, 147 | movement_step_size=constants.MOVEMENT_STEP_SIZE, 148 | vocab_size=DEFAULT_VOCAB_SIZE, 149 | use_utterances=USE_UTTERANCES, 150 | use_cuda=False) 151 | 152 | default_goal_predicting_module_config = GoalPredictingProcessingModuleConfig( 153 | processor=get_processor_config_with_input_size(DEFAULT_VOCAB_SIZE), 154 | hidden_size=DEFAULT_HIDDEN_SIZE, 155 | dropout=DEFAULT_DROPOUT, 156 | goal_size=constants.GOAL_SIZE) 157 | 158 | default_agent_config = AgentModuleConfig( 159 | time_horizon=DEFAULT_TIME_HORIZON, 160 | feat_vec_size=DEFAULT_FEAT_VEC_SIZE, 161 | movement_dim_size=constants.MOVEMENT_DIM_SIZE, 162 | utterance_processor=default_goal_predicting_module_config, 163 | physical_processor=get_processor_config_with_input_size(constants.MOVEMENT_DIM_SIZE + constants.PHYSICAL_EMBED_SIZE), 164 | action_processor=default_action_module_config, 165 | word_counter=default_word_counter_config, 166 | goal_size=constants.GOAL_SIZE, 167 | vocab_size=DEFAULT_VOCAB_SIZE, 168 | use_utterances=USE_UTTERANCES, 169 | penalize_words=PENALIZE_WORDS, 170 | use_cuda=False) 171 | 172 | def get_training_config(kwargs): 173 | return TrainingConfig( 174 | num_epochs=kwargs['n_epochs'] or default_training_config.num_epochs, 175 | learning_rate=kwargs['learning_rate'] or default_training_config.learning_rate, 176 | load_model=bool(kwargs['load_model_weights']), 177 | load_model_file=kwargs['load_model_weights'] or default_training_config.load_model_file, 178 | save_model=default_training_config.save_model, 179 | save_model_file=kwargs['save_model_weights'] or default_training_config.save_model_file, 180 | use_cuda=kwargs['use_cuda']) 181 | 182 | def get_game_config(kwargs): 183 | return GameConfig( 184 | batch_size=kwargs['batch_size'] or default_game_config.batch_size, 185 | world_dim=kwargs['world_dim'] or default_game_config.world_dim, 186 | max_agents=kwargs['max_agents'] or default_game_config.max_agents, 187 | min_agents=kwargs['min_agents'] or default_game_config.min_agents, 188 | max_landmarks=kwargs['max_landmarks'] or default_game_config.max_landmarks, 189 | min_landmarks=kwargs['min_landmarks'] or default_game_config.min_landmarks, 190 | num_shapes=kwargs['num_shapes'] or default_game_config.num_shapes, 191 | num_colors=kwargs['num_colors'] or default_game_config.num_colors, 192 | use_utterances=not kwargs['no_utterances'], 193 | vocab_size=kwargs['vocab_size'] or default_game_config.vocab_size, 194 | memory_size=default_game_config.memory_size, 195 | use_cuda=kwargs['use_cuda'] 196 | ) 197 | 198 | def get_agent_config(kwargs): 199 | vocab_size = kwargs['vocab_size'] or DEFAULT_VOCAB_SIZE 200 | use_utterances = (not kwargs['no_utterances']) 201 | use_cuda = kwargs['use_cuda'] 202 | penalize_words = kwargs['penalize_words'] 203 | oov_prob = kwargs['oov_prob'] or DEFAULT_OOV_PROB 204 | if use_utterances: 205 | feat_vec_size = DEFAULT_FEAT_VEC_SIZE*3 206 | else: 207 | feat_vec_size = DEFAULT_FEAT_VEC_SIZE*2 208 | utterance_processor = GoalPredictingProcessingModuleConfig( 209 | processor=get_processor_config_with_input_size(vocab_size), 210 | hidden_size=DEFAULT_HIDDEN_SIZE, 211 | dropout=DEFAULT_DROPOUT, 212 | goal_size=constants.GOAL_SIZE) 213 | action_processor = ActionModuleConfig( 214 | goal_processor=get_processor_config_with_input_size(constants.GOAL_SIZE), 215 | action_processor=get_processor_config_with_input_size(feat_vec_size), 216 | hidden_size=DEFAULT_HIDDEN_SIZE, 217 | dropout=DEFAULT_DROPOUT, 218 | movement_dim_size=constants.MOVEMENT_DIM_SIZE, 219 | movement_step_size=constants.MOVEMENT_STEP_SIZE, 220 | vocab_size=vocab_size, 221 | use_utterances=use_utterances, 222 | use_cuda=use_cuda) 223 | word_counter = WordCountingModuleConfig( 224 | vocab_size=vocab_size, 225 | oov_prob=oov_prob, 226 | use_cuda=use_cuda) 227 | 228 | return AgentModuleConfig( 229 | time_horizon=kwargs['n_timesteps'] or default_agent_config.time_horizon, 230 | feat_vec_size=default_agent_config.feat_vec_size, 231 | movement_dim_size=default_agent_config.movement_dim_size, 232 | utterance_processor=utterance_processor, 233 | physical_processor=default_agent_config.physical_processor, 234 | action_processor=action_processor, 235 | word_counter=word_counter, 236 | goal_size=default_agent_config.goal_size, 237 | vocab_size=vocab_size, 238 | use_utterances=use_utterances, 239 | penalize_words=penalize_words, 240 | use_cuda=use_cuda 241 | ) 242 | 243 | -------------------------------------------------------------------------------- /modules/game.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch import Tensor 4 | from torch.autograd import Variable 5 | 6 | """ 7 | The GameModule takes in all actions(movement, utterance, goal prediction) 8 | of all agents for a given timestep and returns the total cost for that 9 | timestep. 10 | 11 | Game consists of: 12 | -num_agents (scalar) 13 | -num_landmarks (scalar) 14 | -locations: [num_agents + num_landmarks, 2] 15 | -physical: [num_agents + num_landmarks, entity_embed_size] 16 | -utterances: [num_agents, vocab_size] 17 | -goals: [num_agents, goal_size] 18 | -location_observations: [num_agents, num_agents + num_landmarks, 2] 19 | -memories 20 | -utterance: [num_agents, num_agents, memory_size] 21 | -physical:[num_agents, num_agents + num_landmarks, memory_size] 22 | -action: [num_agents, memory_size] 23 | 24 | config needs: -batch_size, -using_utterances, -world_dim, -vocab_size, -memory_size, -num_colors -num_shapes 25 | """ 26 | 27 | class GameModule(nn.Module): 28 | 29 | def __init__(self, config, num_agents, num_landmarks): 30 | super(GameModule, self).__init__() 31 | 32 | self.batch_size = config.batch_size # scalar: num games in this batch 33 | self.using_utterances = config.use_utterances # bool: whether current batch allows utterances 34 | self.using_cuda = config.use_cuda 35 | self.num_agents = num_agents # scalar: number of agents in this batch 36 | self.num_landmarks = num_landmarks # scalar: number of landmarks in this batch 37 | self.num_entities = self.num_agents + self.num_landmarks # type: int 38 | 39 | if self.using_cuda: 40 | self.Tensor = torch.cuda.FloatTensor 41 | else: 42 | self.Tensor = torch.FloatTensor 43 | 44 | locations = torch.rand(self.batch_size, self.num_entities, 2) * config.world_dim 45 | colors = (torch.rand(self.batch_size, self.num_entities, 1) * config.num_colors).floor() 46 | shapes = (torch.rand(self.batch_size, self.num_entities, 1) * config.num_shapes).floor() 47 | 48 | goal_agents = self.Tensor(self.batch_size, self.num_agents, 1) 49 | goal_entities = (torch.rand(self.batch_size, self.num_agents, 1) * self.num_landmarks).floor().long() + self.num_agents 50 | goal_locations = self.Tensor(self.batch_size, self.num_agents, 2) 51 | 52 | if self.using_cuda: 53 | locations = locations.cuda() 54 | colors = colors.cuda() 55 | shapes = shapes.cuda() 56 | goal_entities = goal_entities.cuda() 57 | 58 | # [batch_size, num_entities, 2] 59 | self.locations = Variable(locations) 60 | # [batch_size, num_entities, 2] 61 | self.physical = Variable(torch.cat((colors,shapes), 2).float()) 62 | 63 | #TODO: Bad for loop? 64 | for b in range(self.batch_size): 65 | goal_agents[b] = torch.randperm(self.num_agents) 66 | 67 | for b in range(self.batch_size): 68 | goal_locations[b] = self.locations.data[b][goal_entities[b].squeeze()] 69 | 70 | # [batch_size, num_agents, 3] 71 | self.goals = Variable(torch.cat((goal_locations, goal_agents), 2)) 72 | goal_agents = Variable(goal_agents) 73 | 74 | 75 | if self.using_cuda: 76 | self.memories = { 77 | "physical": Variable(torch.zeros(self.batch_size, self.num_agents, self.num_entities, config.memory_size).cuda()), 78 | "action": Variable(torch.zeros(self.batch_size, self.num_agents, config.memory_size).cuda())} 79 | else: 80 | self.memories = { 81 | "physical": Variable(torch.zeros(self.batch_size, self.num_agents, self.num_entities, config.memory_size)), 82 | "action": Variable(torch.zeros(self.batch_size, self.num_agents, config.memory_size))} 83 | 84 | if self.using_utterances: 85 | if self.using_cuda: 86 | self.utterances = Variable(torch.zeros(self.batch_size, self.num_agents, config.vocab_size).cuda()) 87 | self.memories["utterance"] = Variable(torch.zeros(self.batch_size, self.num_agents, self.num_agents, config.memory_size).cuda()) 88 | else: 89 | self.utterances = Variable(torch.zeros(self.batch_size, self.num_agents, config.vocab_size)) 90 | self.memories["utterance"] = Variable(torch.zeros(self.batch_size, self.num_agents, self.num_agents, config.memory_size)) 91 | 92 | agent_baselines = self.locations[:, :self.num_agents, :] 93 | 94 | sort_idxs = torch.sort(self.goals[:,:,2])[1] 95 | self.sorted_goals = Variable(self.Tensor(self.goals.size())) 96 | # TODO: Bad for loop? 97 | for b in range(self.batch_size): 98 | self.sorted_goals[b] = self.goals[b][sort_idxs[b]] 99 | self.sorted_goals = self.sorted_goals[:,:,:2] 100 | 101 | # [batch_size, num_agents, num_entities, 2] 102 | self.observations = self.locations.unsqueeze(1) - agent_baselines.unsqueeze(2) 103 | 104 | new_obs = self.goals[:,:,:2] - agent_baselines 105 | 106 | # [batch_size, num_agents, 2] [batch_size, num_agents, 1] 107 | self.observed_goals = torch.cat((new_obs, goal_agents), dim=2) 108 | 109 | 110 | 111 | """ 112 | Updates game state given all movements and utterances and returns accrued cost 113 | - movements: [batch_size, num_agents, config.movement_size] 114 | - utterances: [batch_size, num_agents, config.utterance_size] 115 | - goal_predictions: [batch_size, num_agents, num_agents, config.goal_size] 116 | Returns: 117 | - scalar: total cost of all games in the batch 118 | """ 119 | def forward(self, movements, goal_predictions, utterances): 120 | self.locations = self.locations + movements 121 | agent_baselines = self.locations[:, :self.num_agents] 122 | self.observations = self.locations.unsqueeze(1)- agent_baselines.unsqueeze(2) 123 | new_obs = self.goals[:,:,:2] - agent_baselines 124 | goal_agents = self.goals[:,:,2].unsqueeze(2) 125 | self.observed_goals = torch.cat((new_obs, goal_agents), dim=2) 126 | if self.using_utterances: 127 | self.utterances = utterances 128 | return self.compute_cost(movements, goal_predictions, utterances) 129 | else: 130 | return self.compute_cost(movements, goal_predictions) 131 | 132 | def compute_cost(self, movements, goal_predictions, utterances=None): 133 | physical_cost = self.compute_physical_cost() 134 | movement_cost = self.compute_movement_cost(movements) 135 | goal_pred_cost = self.compute_goal_pred_cost(goal_predictions) 136 | return physical_cost + goal_pred_cost + movement_cost 137 | 138 | """ 139 | Computes the total cost agents get from being near their goals 140 | agent locations are stored as [batch_size, num_agents + num_landmarks, entity_embed_size] 141 | """ 142 | def compute_physical_cost(self): 143 | return 2*torch.sum( 144 | torch.sqrt( 145 | torch.sum( 146 | torch.pow( 147 | self.locations[:,:self.num_agents,:] - self.sorted_goals, 148 | 2), 149 | -1) 150 | ) 151 | ) 152 | 153 | """ 154 | Computes the total cost agents get from predicting others' goals 155 | goal_predictions: [batch_size, num_agents, num_agents, goal_size] 156 | goal_predictions[., a_i, a_j, :] = a_i's prediction of a_j's goal with location relative to a_i 157 | We want: 158 | real_goal_locations[., a_i, a_j, :] = a_j's goal with location relative to a_i 159 | We have: 160 | goals[., a_j, :] = a_j's goal with absolute location 161 | observed_goals[., a_j, :] = a_j's goal with location relative to a_j 162 | Which means we want to build an observed_goals-like tensor but relative to each agent 163 | real_goal_locations[., a_i, a_j, :] = goals[., a_j, :] - locations[a_i] 164 | 165 | 166 | """ 167 | def compute_goal_pred_cost(self, goal_predictions): 168 | relative_goal_locs = self.goals.unsqueeze(1)[:,:,:,:2] - self.locations.unsqueeze(2)[:, :self.num_agents, :, :] 169 | goal_agents = self.goals.unsqueeze(1)[:,:,:,2:].expand_as(relative_goal_locs)[:,:,:,-1:] 170 | relative_goals = torch.cat((relative_goal_locs, goal_agents), dim=3) 171 | return torch.sum( 172 | torch.sqrt( 173 | torch.sum( 174 | torch.pow( 175 | goal_predictions - relative_goals, 176 | 2), 177 | -1) 178 | ) 179 | ) 180 | 181 | """ 182 | Computes the total cost agents get from moving 183 | """ 184 | def compute_movement_cost(self, movements): 185 | return torch.sum(torch.sqrt(torch.sum(torch.pow(movements, 2), -1))) 186 | 187 | def get_avg_agent_to_goal_distance(self): 188 | return torch.sum( 189 | torch.sqrt( 190 | torch.sum( 191 | torch.pow( 192 | self.locations[:,:self.num_agents,:] - self.sorted_goals, 193 | 2), 194 | -1) 195 | ) 196 | ) 197 | 198 | --------------------------------------------------------------------------------