├── slime_environments ├── agents │ ├── utils │ │ ├── __init__.py │ │ ├── DQN.py │ │ └── utils.py │ ├── QLearning │ │ ├── ma-learning-params.json │ │ ├── multi-agent-params.json │ │ ├── MA_QLearning.py │ │ └── runs │ │ │ └── multi_test_01_06_10_2023__21_12_57.csv │ ├── SA_QLearning │ │ ├── sa-learning-params.json │ │ ├── single-agent-params.json │ │ └── SA_QLearning.py │ ├── Sarsa │ │ ├── ma-learning-params.json │ │ ├── multi-agent-params.json │ │ └── MA_SARSA.py │ ├── single-agent-params.json │ ├── DQNet_Centralized │ │ ├── ma-learning-params.json │ │ ├── multi-agent-params.json │ │ └── Centralized.py │ ├── DQNet_Decentralized │ │ ├── ma-learning-params.json │ │ ├── multi-agent-params.json │ │ └── Decentralized.py │ ├── Baselines-A2C-MLP.py │ └── SA_test_env.py ├── environments │ ├── __init__.py │ ├── PatchTest.py │ ├── SlimeEnvSingleAgent.py │ └── SlimeEnvMultiAgent.py └── __init__.py ├── .gitignore ├── setup.py ├── env-test-gym.py ├── requirements.txt └── README.md /slime_environments/agents/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.idea 2 | **/__pycache__ 3 | **/runs/* 4 | **/models/* -------------------------------------------------------------------------------- /slime_environments/environments/__init__.py: -------------------------------------------------------------------------------- 1 | from .SlimeEnvSingleAgent import Slime 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="slime_environments", 5 | version="1.0.0", 6 | packages=find_packages(), 7 | install_requires=['gym', 'pygame'] 8 | ) -------------------------------------------------------------------------------- /slime_environments/agents/QLearning/ma-learning-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.2, 3 | "gamma": 0.8, 4 | "epsilon": 0.9, 5 | "decay": 0.9995, 6 | "train_episodes": 100, 7 | "TRAIN_LOG_EVERY": 10, 8 | "test_episodes": 10, 9 | "TEST_LOG_EVERY": 1, 10 | "OUTPUT_FILE": "multi-test-01", 11 | "actions": ["move-toward-chemical", "random-walk", "drop-chemical"] 12 | } -------------------------------------------------------------------------------- /slime_environments/agents/SA_QLearning/sa-learning-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.2, 3 | "gamma": 0.8, 4 | "epsilon": 0.9, 5 | "decay": 0.9998, 6 | "train_episodes": 100, 7 | "TRAIN_LOG_EVERY": 10, 8 | "test_episodes": 10, 9 | "TEST_LOG_EVERY": 1, 10 | "OUTPUT_FILE": "single-test-01", 11 | "actions": ["move-toward-chemical", "random-walk", "drop-chemical"] 12 | } -------------------------------------------------------------------------------- /slime_environments/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='Slime-v0', 5 | entry_point='slime_environments.environments:Slime', 6 | max_episode_steps=10000, # DOC The keyword argument max_episode_steps=300 will ensure that GridWorld environments that are instantiated via gym.make will be wrapped in a TimeLimit wrapper (https://www.gymlibrary.dev/content/environment_creation/#registering-envs) 7 | nondeterministic=True # DOC seeding not supported atm 8 | ) -------------------------------------------------------------------------------- /slime_environments/agents/Sarsa/ma-learning-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.2, 3 | "gamma": 0.8, 4 | "epsilon": 0.90, 5 | "epsilon_end": 0.00, 6 | "epsilon_test": 0.0, 7 | "decay": 20e-9, 8 | "train_episodes": 100, 9 | "TRAIN_LOG_EVERY": 1, 10 | "test_episodes": 10, 11 | "TEST_LOG_EVERY": 1, 12 | "OUTPUT_FILE": "multi-test-01", 13 | "actions": ["move-toward-chemical", "random-walk", "drop-chemical"], 14 | "fist_saveimages_episode": 1, 15 | "middle_saveimages_episode": 50, 16 | "last_saveimages_episode": 100 17 | } -------------------------------------------------------------------------------- /slime_environments/agents/single-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 50, 3 | "sniff_threshold": 0.9, 4 | "diffuse_area": 3, 5 | "diffuse_mode": "cascade", 6 | "follow_mode": "prob", 7 | "smell_area": 5, 8 | "lay_area": 1, 9 | "lay_amount": 3, 10 | "evaporation": 0.9, 11 | "cluster_threshold": 30, 12 | "cluster_radius": 10, 13 | "rew": 100, 14 | "penalty": -1, 15 | "episode_ticks": 500, 16 | "W": 66, 17 | "H": 38, 18 | "PATCH_SIZE": 20, 19 | "TURTLE_SIZE": 16, 20 | "FPS": 30, 21 | "SHADE_STRENGTH": 10, 22 | "SHOW_CHEM_TEXT": false, 23 | "CLUSTER_FONT_SIZE": 12, 24 | "CHEMICAL_FONT_SIZE": 8 25 | } -------------------------------------------------------------------------------- /slime_environments/agents/SA_QLearning/single-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 50, 3 | "sniff_threshold": 0.9, 4 | "diffuse_area": 3, 5 | "diffuse_mode": "cascade", 6 | "follow_mode": "prob", 7 | "smell_area": 5, 8 | "lay_area": 1, 9 | "lay_amount": 3, 10 | "evaporation": 0.9, 11 | "cluster_threshold": 30, 12 | "cluster_radius": 10, 13 | "rew": 100, 14 | "penalty": -1, 15 | "episode_ticks": 500, 16 | "W": 66, 17 | "H": 38, 18 | "PATCH_SIZE": 20, 19 | "TURTLE_SIZE": 16, 20 | "FPS": 30, 21 | "SHADE_STRENGTH": 10, 22 | "SHOW_CHEM_TEXT": false, 23 | "CLUSTER_FONT_SIZE": 12, 24 | "CHEMICAL_FONT_SIZE": 8 25 | } -------------------------------------------------------------------------------- /env-test-gym.py: -------------------------------------------------------------------------------- 1 | from stable_baselines3.common.env_checker import check_env 2 | from stable_baselines3 import DQN 3 | import slime_environments 4 | import gym 5 | import json 6 | from gym.spaces import MultiBinary 7 | import numpy as np 8 | 9 | PARAMS_FILE = "slime_environments/agents/single-agent-params.json" 10 | with open(PARAMS_FILE) as f: 11 | params = json.load(f) 12 | 13 | # print(gym.__version__) 14 | env = gym.make("Slime-v0", **params) 15 | check_env(env) 16 | print("Environment compatible with Stable Baselines3") 17 | 18 | model = DQN("MlpPolicy", env, verbose=1) 19 | model.learn(total_timesteps=1000,log_interval=4) 20 | print("SB3 DQN sample training completed.") 21 | -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Centralized/ma-learning-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "step_lr": 0.9999995, 4 | "batch_size": 128, 5 | "memory_capacity": 51200, 6 | "update_net_every": 64, 7 | "alpha": 0.005, 8 | "gamma": 0.8, 9 | "epsilon": 0.90, 10 | "epsilon_end": 0.00, 11 | "epsilon_test": 0.0, 12 | "decay": 20e-9, 13 | "train_episodes": 100, 14 | "TRAIN_LOG_EVERY": 1, 15 | "test_episodes": 10, 16 | "TEST_LOG_EVERY": 1, 17 | "OUTPUT_FILE": "multi-test-01", 18 | "actions": ["move-toward-chemical", "random-walk", "drop-chemical"], 19 | "fist_saveimages_episode": 1, 20 | "middle_saveimages_episode": 50, 21 | "last_saveimages_episode": 100 22 | } -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Decentralized/ma-learning-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "step_lr": 0.99992, 4 | "batch_size": 128, 5 | "memory_capacity": 51200, 6 | "update_net_every": 4, 7 | "alpha": 0.005, 8 | "gamma": 0.8, 9 | "epsilon": 0.90, 10 | "epsilon_end": 0.00, 11 | "epsilon_test": 0.0, 12 | "decay": 2e-6, 13 | "train_episodes": 100, 14 | "TRAIN_LOG_EVERY": 1, 15 | "test_episodes": 10, 16 | "TEST_LOG_EVERY": 1, 17 | "OUTPUT_FILE": "multi-test-01", 18 | "actions": ["move-toward-chemical", "random-walk", "drop-chemical"], 19 | "fist_saveimages_episode": 1, 20 | "middle_saveimages_episode": 50, 21 | "last_saveimages_episode": 100 22 | } -------------------------------------------------------------------------------- /slime_environments/agents/Sarsa/multi-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 0, 3 | "learner_population": 100, 4 | "sniff_threshold": 0.8, 5 | "diffuse_area": 2, 6 | "diffuse_mode": "cascade", 7 | "follow_mode": "prob", 8 | "smell_area": 3, 9 | "lay_area": 1, 10 | "lay_amount": 3, 11 | "evaporation": 0.8, 12 | "cluster_threshold": 30, 13 | "cluster_radius": 5, 14 | "rew": 100, 15 | "penalty": -1, 16 | "episode_ticks": 512, 17 | "W": 66, 18 | "H": 38, 19 | "PATCH_SIZE": 20, 20 | "TURTLE_SIZE": 16, 21 | "FPS": 30, 22 | "SHADE_STRENGTH": 10, 23 | "SHOW_CHEM_TEXT": false, 24 | "CLUSTER_FONT_SIZE": 12, 25 | "CHEMICAL_FONT_SIZE": 8, 26 | "gui": true 27 | } -------------------------------------------------------------------------------- /slime_environments/agents/QLearning/multi-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 0, 3 | "learner_population": 100, 4 | "sniff_threshold": 0.9, 5 | "diffuse_area": 2, 6 | "diffuse_mode": "cascade", 7 | "follow_mode": "prob", 8 | "smell_area": 3, 9 | "lay_area": 1, 10 | "lay_amount": 3, 11 | "evaporation": 0.9, 12 | "cluster_threshold": 30, 13 | "cluster_radius": 5, 14 | "rew": 100, 15 | "penalty": -1, 16 | "episode_ticks": 500, 17 | "W": 66, 18 | "H": 38, 19 | "PATCH_SIZE": 20, 20 | "TURTLE_SIZE": 16, 21 | "FPS": 30, 22 | "SHADE_STRENGTH": 10, 23 | "SHOW_CHEM_TEXT": false, 24 | "CLUSTER_FONT_SIZE": 12, 25 | "CHEMICAL_FONT_SIZE": 8, 26 | "gui": true 27 | } -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Centralized/multi-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 0, 3 | "learner_population": 100, 4 | "sniff_threshold": 0.8, 5 | "diffuse_area": 2, 6 | "diffuse_mode": "cascade", 7 | "follow_mode": "prob", 8 | "smell_area": 3, 9 | "lay_area": 1, 10 | "lay_amount": 3, 11 | "evaporation": 0.8, 12 | "cluster_threshold": 30, 13 | "cluster_radius": 5, 14 | "rew": 100, 15 | "penalty": -1, 16 | "episode_ticks": 512, 17 | "W": 66, 18 | "H": 38, 19 | "PATCH_SIZE": 20, 20 | "TURTLE_SIZE": 16, 21 | "FPS": 30, 22 | "SHADE_STRENGTH": 10, 23 | "SHOW_CHEM_TEXT": false, 24 | "CLUSTER_FONT_SIZE": 12, 25 | "CHEMICAL_FONT_SIZE": 8, 26 | "gui": true 27 | } -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Decentralized/multi-agent-params.json: -------------------------------------------------------------------------------- 1 | { 2 | "population": 0, 3 | "learner_population": 100, 4 | "sniff_threshold": 0.8, 5 | "diffuse_area": 2, 6 | "diffuse_mode": "cascade", 7 | "follow_mode": "prob", 8 | "smell_area": 3, 9 | "lay_area": 1, 10 | "lay_amount": 3, 11 | "evaporation": 0.8, 12 | "cluster_threshold": 30, 13 | "cluster_radius": 5, 14 | "rew": 100, 15 | "penalty": -1, 16 | "episode_ticks": 512, 17 | "W": 66, 18 | "H": 38, 19 | "PATCH_SIZE": 20, 20 | "TURTLE_SIZE": 16, 21 | "FPS": 30, 22 | "SHADE_STRENGTH": 10, 23 | "SHOW_CHEM_TEXT": false, 24 | "CLUSTER_FONT_SIZE": 12, 25 | "CHEMICAL_FONT_SIZE": 8, 26 | "gui": true 27 | } -------------------------------------------------------------------------------- /slime_environments/agents/Baselines-A2C-MLP.py: -------------------------------------------------------------------------------- 1 | from stable_baselines3 import A2C 2 | import slime_environments 3 | import gym 4 | import json 5 | 6 | #model = A2C('MlpPolicy', 'Slime-v0').learn(100) # FIXME find way to pass arguments to env 7 | 8 | PARAMS_FILE = "single-agent-params.json" 9 | with open(PARAMS_FILE) as f: 10 | params = json.load(f) 11 | env = gym.make("Slime-v0", **params) 12 | 13 | model = A2C('MlpPolicy', env, verbose=2) # 2 = debug 14 | model.learn(total_timesteps=100*params['episode_ticks']) # total env steps 15 | 16 | obs, _ = env.reset() 17 | for i in range(100): 18 | action, _state = model.predict(obs, deterministic=True) # QUESTION "deterministic actions"? what does it mean? 19 | obs, reward, _, _, _ = env.step(action) 20 | env.render() 21 | 22 | env.close() 23 | -------------------------------------------------------------------------------- /slime_environments/agents/SA_test_env.py: -------------------------------------------------------------------------------- 1 | import json 2 | from itertools import permutations, combinations, product 3 | 4 | import gym 5 | import slime_environments 6 | from gym.utils.env_checker import check_env #, check_reset_seed, check_reset_return_type 7 | 8 | from slime_environments.environments.SlimeEnvSingleAgent import BooleanSpace 9 | 10 | PARAMS_FILE = "single-agent-params.json" 11 | with open(PARAMS_FILE) as f: 12 | params = json.load(f) 13 | #env = Slime(render_mode="human", **params) 14 | env = gym.make("Slime-v0", **params) 15 | 16 | check_env(env.unwrapped, skip_render_check=False) 17 | #check_reset_seed(env) 18 | #check_reset_return_type(env) 19 | 20 | # space = BooleanSpace(size=2) 21 | # print(f"size={space.size}, shape={space.shape}, values={space._values}, sample={space.sample()}") 22 | # 23 | # print(list(permutations([True, False]))) 24 | # print(list(product([True, False], repeat=2))) 25 | # print(list(combinations([x for x in [True, False]], 2))) 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.9.24 2 | charset-normalizer==2.1.1 3 | cloudpickle==2.2.1 4 | cmake==3.25.0 5 | contourpy==1.1.0 6 | cycler==0.11.0 7 | Farama-Notifications==0.0.4 8 | filelock==3.9.0 9 | fonttools==4.40.0 10 | gym==0.26.2 11 | gym-notices==0.0.8 12 | gymnasium==0.28.1 13 | idna==3.4 14 | importlib-metadata==6.6.0 15 | importlib-resources==5.12.0 16 | jax-jumpy==1.0.0 17 | Jinja2==3.1.2 18 | kiwisolver==1.4.4 19 | lit==15.0.7 20 | MarkupSafe==2.1.2 21 | matplotlib==3.7.1 22 | mkl-fft==1.3.0 23 | mkl-random==1.0.2 24 | mkl-service==2.3.0 25 | mpmath==1.2.1 26 | networkx==3.0 27 | numpy==1.24.3 28 | opencv-contrib-python==4.7.0.72 29 | packaging==23.1 30 | pettingzoo==1.23.1 31 | Pillow==9.3.0 32 | pygame==2.4.0 33 | pyparsing==3.0.9 34 | python-dateutil==2.8.2 35 | requests==2.28.1 36 | sympy==1.11.1 37 | torch==2.0.0+cu117 38 | torchaudio==2.0.1+cu117 39 | torchvision==0.15.1+cu117 40 | tqdm==4.65.0 41 | triton==2.0.0 42 | typing_extensions==4.4.0 43 | urllib3==1.26.13 44 | zipp==3.15.0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Slime environment for MARL 2 | 3 | This project is a porting of [NetLogo "Slime" simulation model](http://www.netlogoweb.org/launch#http://ccl.northwestern.edu/netlogo/models/models/Sample%20Models/Biology/Slime.nlogo) to Python, and to Farama Foundation [Gymnasium](https://github.com/Farama-Foundation/Gymnasium). 4 | The goal is to make such model available to 3rd party (MA)RL libraries such as [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) and Ray [RLlib](https://github.com/ray-project/ray). 5 | The motivation is to **experiment with (MA)RL applied to communication actions for achieving coordination** amongst agents. 6 | 7 | # Project structure 8 | 9 | The project is under development, hence **everything is provisional** and subject to change, nebertheless any meaningful change will be reported here. 10 | The most advanced development branch is `sm-baselines-api`, where the single agent environment is compatible with Gym (still need to check Gymnasium) and on its way to be compatible with stable-baselines3. 11 | 12 | There, the project is structured as follows: 13 | 14 | ``` 15 | slime_environments 16 | |__environments 17 | |__SlimeEnvSingleAgent.py # single agent learning environment 18 | |__SlimeEnvMultiAgent.py # multi-agent learning environment 19 | |__agents 20 | |__MA_QLearning.py # independent Q-learning 21 | |__SA_QLearning.py # single agent Q-learning 22 | |__multi-agent-params.json # multi-agent environment params 23 | |__single-agent-params.json # single agent environment params 24 | |__ma-learning-params.json # multi-agent learning params 25 | |__sa-learning-params.json # single agent learning params 26 | ``` 27 | -------------------------------------------------------------------------------- /slime_environments/environments/PatchTest.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import numpy as np 3 | 4 | pygame.init() 5 | 6 | W = 50 # in number of patches 7 | H = 25 # in number of patches 8 | PATCH_SIZE = 20 # thus window width and height are W * PATCH_SIZE and H * PATCH_SIZE 9 | TURTLE_SIZE = PATCH_SIZE - 1 # turtles must be slightly smaller 10 | 11 | N_TURTLES = 10 12 | 13 | SHOW_TURTLES = True 14 | SHOW_PATCHES = True 15 | MOVEMENT = True 16 | 17 | BLACK = (0, 0, 0) 18 | BLUE = (0, 0, 255) 19 | WHITE = (255, 255, 255) 20 | 21 | coords = [] 22 | offset = PATCH_SIZE // 2 23 | W_pixels = W * PATCH_SIZE 24 | H_pixels = H * PATCH_SIZE 25 | for x in range(offset, (W_pixels - offset) + 1, PATCH_SIZE): 26 | for y in range(offset, (H_pixels - offset) + 1, PATCH_SIZE): 27 | coords.append((x, y)) # "centre" of the patch or turtle (also ID of the patch) 28 | 29 | # nel dizionario associato alle coordinate x,y della patch puoi mettere i dati che vuoi, come 30 | # - quantita di feromone, 31 | # - lista degli ID delle turtle che sono sulla patch 32 | # - ...(tutto quello che ti puo servire) 33 | patches = {coords[i]: {"id": i} for i in range(len(coords))} 34 | # stesso discorso per il dizionario associato all'ID della turtle 35 | turtles = {i: {"pos": coords[np.random.randint(len(coords))]} for i in range(N_TURTLES)} 36 | 37 | screen = pygame.display.set_mode((W_pixels, H_pixels)) 38 | pygame.display.set_caption("PATCH TEST") 39 | 40 | clock = pygame.time.Clock() 41 | 42 | playing = True 43 | while playing: 44 | for event in pygame.event.get(): 45 | if event.type == pygame.QUIT: # chiusura finestra -> termina il programma 46 | playing = False 47 | screen.fill(BLACK) 48 | if SHOW_TURTLES: 49 | # print("turtles:", end=" ") 50 | for t in turtles: #  una per patch 51 | # print(t, end=" ") 52 | pygame.draw.circle(screen, BLUE, turtles[t]["pos"], 53 | TURTLE_SIZE // 2) # ultimo parametro è il raggio del cerchio 54 | # print() 55 | if SHOW_PATCHES: 56 | # mostra le patch come quadrati 57 | # print("patches:", end=" ") 58 | for p in patches: 59 | # print(patches[p]["id"], end=" ") 60 | pygame.draw.rect(screen, WHITE, pygame.Rect(p[0] - offset, p[1] - offset, PATCH_SIZE - 1, PATCH_SIZE - 1), 61 | width=1) 62 | # print() 63 | # mostra la griglia che evidenzia le patch 64 | # for p in range(PATCH_SIZE, W_pixels, PATCH_SIZE): 65 | # pygame.draw.line(screen, WHITE, (p, 0), (p, H_pixels)) 66 | # for p in range(PATCH_SIZE, H_pixels, PATCH_SIZE): 67 | # pygame.draw.line(screen, WHITE, (0, p), (W_pixels, p)) 68 | if MOVEMENT: 69 | choice = [PATCH_SIZE, -PATCH_SIZE, 0] 70 | # choice = [PATCH_SIZE] 71 | for t in turtles: 72 | x, y = turtles[t]["pos"] 73 | x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice) 74 | if x2 < 0: 75 | x2 = W_pixels - offset 76 | if x2 > W_pixels: 77 | x2 = 0 + offset 78 | if y2 < 0: 79 | y2 = H_pixels - offset 80 | if y2 > H_pixels: 81 | y2 = 0 + offset 82 | turtles[t]["pos"] = (x2, y2) 83 | 84 | pygame.display.flip() 85 | # clock.tick(1) 86 | 87 | pygame.quit() 88 | -------------------------------------------------------------------------------- /slime_environments/agents/utils/DQN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from collections import deque 5 | 6 | import math 7 | import random 8 | 9 | class ReplayMemory(object): 10 | 11 | def __init__(self, Transition, capacity): 12 | self.memory = deque([], maxlen=capacity) 13 | self.Transition = Transition 14 | 15 | def push(self, *args): 16 | """Save a transition""" 17 | self.memory.append(self.Transition(*args)) 18 | 19 | def sample(self, batch_size): 20 | return random.sample(self.memory, batch_size) 21 | 22 | def __len__(self): 23 | return len(self.memory) 24 | 25 | class DQN(nn.Module): 26 | 27 | def __init__(self, n_observations, n_actions, epsilon): 28 | super(DQN, self).__init__() 29 | self.layer1 = nn.Linear(n_observations, 128) 30 | self.layer2 = nn.Linear(128, 128) 31 | 32 | self.layer3 = nn.Linear(128, 256) 33 | self.layer4 = nn.Linear(256, 256) 34 | 35 | self.layer5 = nn.Linear(256, 512) 36 | self.layer6 = nn.Linear(512, n_actions) 37 | 38 | self.dropout = nn.Dropout(p=0.3) 39 | self.epsilon = epsilon 40 | 41 | 42 | def forward(self, x): 43 | x = self.layer1(x) 44 | x = F.relu(self.dropout(x)) 45 | x = F.relu(self.layer2(x) + x) 46 | 47 | x = self.layer3(x) 48 | x = F.relu(self.dropout(x)) 49 | x = F.relu(self.layer4(x) + x) 50 | 51 | x = self.layer5(x) 52 | x = F.relu(self.dropout(x)) 53 | return self.layer6(x) 54 | 55 | 56 | 57 | def optimize_model(Transition, memory, policy_net, target_net, gamma, batch_size, device): 58 | if len(memory) < batch_size: 59 | return policy_net, target_net, None 60 | 61 | transitions = memory.sample(batch_size) 62 | # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for 63 | # detailed explanation). This converts batch-array of Transitions 64 | # to Transition of batch-arrays. 65 | batch = Transition(*zip(*transitions)) 66 | 67 | # Compute a mask of non-final states and concatenate the batch elements 68 | # (a final state would've been the one after which simulation ended) 69 | non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.bool) 70 | non_final_next_states = torch.cat([s for s in batch.next_state if s is not None]) 71 | state_batch = torch.cat(batch.state) 72 | action_batch = torch.cat(batch.action) 73 | reward_batch = torch.cat(batch.reward) 74 | 75 | # Compute Q(s_t, a) - the model computes Q(s_t), then we select the 76 | # columns of actions taken. These are the actions which would've been taken 77 | # for each batch state according to policy_net 78 | state_action_values = policy_net(state_batch).gather(1, action_batch) 79 | 80 | # Compute V(s_{t+1}) for all next states. 81 | # Expected values of actions for non_final_next_states are computed based 82 | # on the "older" target_net; selecting their best reward with max(1)[0]. 83 | # This is merged based on the mask, such that we'll have either the expected 84 | # state value or 0 in case the state was final. 85 | next_state_values = torch.zeros(batch_size, device=device) 86 | with torch.no_grad(): 87 | next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0] 88 | # Compute the expected Q values 89 | expected_state_action_values = (next_state_values * gamma) + reward_batch 90 | 91 | # Compute Huber loss 92 | criterion = nn.SmoothL1Loss() 93 | loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1)) 94 | 95 | return policy_net, target_net, loss 96 | 97 | 98 | def select_action(env, agent, state, steps_done, policy_net, device, epsilon_end, decay): 99 | sample = random.random() 100 | policy_net.epsilon = epsilon_end + (policy_net.epsilon - epsilon_end) * math.exp(-1. * steps_done * decay) 101 | 102 | if sample > policy_net.epsilon: 103 | with torch.no_grad(): 104 | # t.max(1) will return the largest column value of each row. 105 | # second column on max result is index of where max element was 106 | # found, so we pick action with the larger expected reward. 107 | return policy_net(state).max(1)[1].view(1, 1), policy_net 108 | else: 109 | return torch.tensor([[env.action_space(agent).sample()]], device=device, dtype=torch.long), policy_net -------------------------------------------------------------------------------- /slime_environments/agents/SA_QLearning/SA_QLearning.py: -------------------------------------------------------------------------------- 1 | #from environments.SlimeEnvSingleAgent import Slime 2 | 3 | import datetime 4 | import gym 5 | import json 6 | import numpy as np 7 | import random 8 | import slime_environments 9 | 10 | PARAMS_FILE = "single-agent-params.json" 11 | LEARNING_PARAMS_FILE = "sa-learning-params.json" 12 | with open(LEARNING_PARAMS_FILE) as f: 13 | l_params = json.load(f) 14 | OUTPUT_FILE = f"{l_params['OUTPUT_FILE']}-{datetime.datetime.now()}.csv" 15 | with open(PARAMS_FILE) as f: 16 | params = json.load(f) 17 | #env = Slime(render_mode="human", **params) 18 | env = gym.make("Slime-v0", **params) 19 | 20 | # Q-Learning 21 | alpha = l_params["alpha"] # DOC learning rate (0 learn nothing 1 learn suddenly) 22 | gamma = l_params["gamma"] # DOC discount factor (0 care only bout immediate rewards, 1 care only about future ones) 23 | epsilon = l_params["epsilon"] # DOC chance of random action 24 | decay = l_params["decay"] # DOC di quanto diminuisce epsilon ogni episode (e.g. 1500 episodes => decay = 0.9995) 25 | TRAIN_EPISODES = l_params["train_episodes"] 26 | TEST_EPISODES = l_params["test_episodes"] 27 | TRAIN_LOG_EVERY = l_params["TRAIN_LOG_EVERY"] 28 | TEST_LOG_EVERY = l_params["TEST_LOG_EVERY"] 29 | 30 | with open(OUTPUT_FILE, 'w') as f: 31 | f.write(f"{json.dumps(params, indent=2)}\n") 32 | f.write("----------\n") 33 | f.write(f"TRAIN_EPISODES = {TRAIN_EPISODES}\n") 34 | f.write(f"TEST_EPISODES = {TEST_EPISODES}\n") 35 | f.write("----------\n") 36 | f.write(f"alpha = {alpha}\n") 37 | f.write(f"gamma = {gamma}\n") 38 | f.write(f"epsilon = {epsilon}\n") 39 | f.write(f"decay = {decay}\n") 40 | f.write("----------\n") 41 | # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, Avg reward X episode, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical 42 | f.write(f"Episode, Tick, Avg cluster size X tick, ") 43 | for a in l_params["actions"]: 44 | f.write(f"{a}, ") 45 | f.write("Avg reward X episode\n") 46 | 47 | q_table = np.zeros([4, env.action_space.n]) 48 | 49 | # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}} 50 | actions_dict = {str(ep): {str(ac): 0 for ac in range(3)} for ep in range(1, TRAIN_EPISODES+1)} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 51 | # DOC dict che tiene conto della reward per ogni episodio {episode: _} 52 | reward_dict = {str(ep): 0 for ep in range(1, TRAIN_EPISODES+1)} 53 | # DOC dict che tiene conto della dimensioni di ogni cluster per ogni episodio 54 | cluster_dict = {} 55 | 56 | 57 | def observation_to_int_map(obs): 58 | if sum(obs) == 0: # DOC [False, False] 59 | mapped = sum(obs) # 0 60 | elif sum(obs) == 2: # DOC [True, True] 61 | mapped = 3 62 | elif int(obs[0]) == 1 and int(obs[1]) == 0: # DOC [True, False] ==> si trova in un cluster ma non su una patch con feromone --> difficile succeda 63 | mapped = 1 64 | else: 65 | mapped = 2 # DOC [False, True] 66 | return mapped 67 | 68 | 69 | # TRAINING 70 | print("Start training...") 71 | for ep in range(1, TRAIN_EPISODES+1): 72 | observation = env.reset() 73 | obs = observation_to_int_map(observation) 74 | for tick in range(1, params['episode_ticks']+1): 75 | if random.uniform(0, 1) < epsilon: 76 | action = env.action_space.sample() # Explore action space 77 | else: 78 | action = np.argmax(q_table[obs]) # Exploit learned values 79 | 80 | next_observation, reward, _, _ = env.step(action) 81 | next_obs = observation_to_int_map(next_observation) 82 | 83 | old_value = q_table[obs][action] 84 | next_max = np.max(q_table[next_obs]) # QUESTION: was with [s] 85 | 86 | new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max) 87 | q_table[obs][action] = new_value 88 | 89 | obs = next_obs 90 | 91 | actions_dict[str(ep)][str(action)] += 1 92 | reward_dict[str(ep)] += round(reward, 2) 93 | 94 | env.render() 95 | epsilon *= decay 96 | cluster_dict[str(ep)] = -1 # round(env.avg_cluster(), 2) 97 | if ep % TRAIN_LOG_EVERY == 0: 98 | print(f"EPISODE: {ep}") 99 | print(f"\tepsilon: {epsilon}") 100 | print(f"\tq_table: {q_table}") 101 | with open(OUTPUT_FILE, 'a') as f: 102 | f.write( 103 | f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ") 104 | f.write(f"{reward_dict[str(ep)]}\n") 105 | 106 | #print(json.dumps(cluster_dict, indent=2)) 107 | print("Training finished!\n") 108 | 109 | # DOC Evaluate agent's performance after Q-learning 110 | cluster_dict = {} 111 | print("Start testing...") 112 | for ep in range(1, TEST_EPISODES+1): 113 | reward_episode = 0 114 | observation, _ = env.reset() 115 | obs = observation_to_int_map(observation) 116 | for tick in range(params['episode_ticks']): 117 | action = np.argmax(q_table[obs]) 118 | observation, reward, _, _, = env.step(action) 119 | obs = observation_to_int_map(observation) 120 | reward_episode += reward 121 | env.render() 122 | if ep % TEST_LOG_EVERY == 0: 123 | print(f"EPISODE: {ep}") 124 | print(f"\tepisode reward: {reward_episode}") 125 | #cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 126 | cluster_dict[str(ep)] = -1 127 | print(json.dumps(cluster_dict, indent=2)) 128 | print("Testing finished!\n") 129 | env.close() 130 | -------------------------------------------------------------------------------- /slime_environments/agents/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import math 4 | import datetime 5 | import matplotlib.pyplot as plt 6 | from typing import Optional 7 | from tqdm import tqdm 8 | import numpy as np 9 | import subprocess 10 | import cv2 11 | from typing import Optional 12 | 13 | def read_params(params_path:str, learning_params_path:str): 14 | params, l_params = dict(), dict() 15 | try: 16 | with open(learning_params_path) as f: 17 | l_params = json.load(f) 18 | except Exception as e: 19 | print(f"[ERROR] could not open learning params file: {e}") 20 | 21 | try: 22 | with open(params_path) as f: 23 | params = json.load(f) 24 | except Exception as e: 25 | print(f"[ERROR] could not open learning params file: {e}") 26 | 27 | return params, l_params 28 | 29 | 30 | def state_to_int_map(obs: list): 31 | if sum(obs) == 0: # DOC [False, False] 32 | mapped = sum(obs) # 0 33 | elif sum(obs) == 2: # DOC [True, True] 34 | mapped = 3 35 | elif int(obs[0]) == 1 and int(obs[1]) == 0: # DOC [True, False] ==> si trova in un cluster ma non su una patch con feromone --> difficile succeda 36 | mapped = 1 37 | else: 38 | mapped = 2 # DOC [False, True] 39 | return mapped 40 | 41 | 42 | def setup(is_train:bool, curdir:str, params:dict, l_params:dict): 43 | if not os.path.isdir(os.path.join(curdir, "runs")): 44 | os.makedirs(os.path.join(curdir, "runs")) 45 | 46 | filename = l_params['OUTPUT_FILE'].replace("-", "_") + "_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".csv" 47 | output_dir = os.path.join(curdir, "runs", "train" + "_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S")) 48 | if is_train: 49 | os.makedirs(output_dir) 50 | output_file = os.path.join(curdir, "runs", output_dir, filename) 51 | 52 | # Q-Learning 53 | alpha = l_params["alpha"] # DOC learning rate (0 learn nothing 1 learn suddenly) 54 | gamma = l_params["gamma"] # DOC discount factor (0 care only bout immediate rewards, 1 care only about future ones) 55 | epsilon = l_params["epsilon"] # DOC chance of random action 56 | decay = l_params["decay"] # DOC di quanto diminuisce epsilon ogni episode (e.g. 1500 episodes => decay = 0.9995) 57 | train_episodes = l_params["train_episodes"] 58 | test_episodes = l_params["test_episodes"] 59 | train_log_every = l_params["TRAIN_LOG_EVERY"] 60 | test_log_every = l_params["TEST_LOG_EVERY"] 61 | 62 | if is_train: 63 | with open(output_file, 'w') as f: 64 | f.write(f"{json.dumps(params, indent=2)}\n") 65 | f.write("----------\n") 66 | f.write(f"TRAIN_EPISODES = {train_episodes}\n") 67 | f.write(f"TEST_EPISODES = {test_episodes}\n") 68 | f.write("----------\n") 69 | f.write(f"alpha = {alpha}\n") 70 | f.write(f"gamma = {gamma}\n") 71 | f.write(f"epsilon = {epsilon}\n") 72 | f.write(f"decay = {decay}\n") 73 | f.write("----------\n") 74 | # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, Avg reward X episode, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical 75 | f.write(f"Episode, Tick, Avg cluster size X tick, ") 76 | 77 | for a in l_params["actions"]: 78 | f.write(f"{a}, ") 79 | 80 | for l in range(params['population'], params['population'] + params['learner_population']): 81 | for a in l_params["actions"]: 82 | f.write(f"(learner {l})-{a}, ") 83 | f.write("Avg reward X episode, loss, learning rate\n") 84 | 85 | return output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every 86 | 87 | 88 | def calculate_epsilon(type:str, episodes:int, ticks:int, learners:int, epsilon: float, decay:float, epsilon_end:Optional[float]): 89 | indexes = [] 90 | values = [] 91 | 92 | pbar = tqdm(range(episodes*ticks)) 93 | for ep in range(1, episodes + 1): 94 | for tick in range(1, ticks + 1): 95 | for agent in range(learners): 96 | index = agent + tick * learners + ep * ticks * learners 97 | indexes.append(index) 98 | if ep == 1 and tick == 1: 99 | pass 100 | else: 101 | if type.lower() in "normal": 102 | epsilon *= decay 103 | elif type.lower() == "esponential": 104 | epsilon = epsilon_end + (epsilon - epsilon_end) * math.exp(-1. * ep * decay) 105 | 106 | values.append(epsilon) 107 | pbar.update(1) 108 | 109 | plt.plot(indexes, values, marker='o') 110 | plt.xlabel('Steps') 111 | plt.ylabel('epsilon value') 112 | plt.show() 113 | print(f"Final value: {epsilon}") 114 | 115 | 116 | def positional_encoding(sequence_length, d_model): 117 | positions = np.arange(sequence_length)[:, np.newaxis] 118 | angles = np.arange(d_model)[np.newaxis, :] / np.power(10000, 2 * (np.arange(d_model) // 2) / d_model) 119 | encoding = positions * angles 120 | 121 | encoding[:, 0::2] = np.sin(encoding[:, 0::2]) # Colonne pari: seno 122 | encoding[:, 1::2] = np.cos(encoding[:, 1::2]) # Colonne dispari: coseno 123 | 124 | return encoding 125 | 126 | 127 | def update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr): 128 | with open(output_file, 'a') as f: 129 | f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ") 130 | avg_rew = 0 131 | 132 | for l in range(params['population'], params['population'] + params['learner_population']): 133 | avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks']) 134 | f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ") 135 | 136 | avg_rew /= params['learner_population'] 137 | f.write(f"{avg_rew}, {sum(losses)/len(losses)}, {cur_lr}\n") 138 | 139 | 140 | def calc_final_lr(base_lr, gamma, step_size, iterations, batch_size): 141 | print(base_lr * gamma ** ((iterations / batch_size) // step_size) ) 142 | 143 | 144 | def save_env_image(image, tick, output_dir, cur_ep_dir): 145 | assert image is not None, "Environment error: render image is None" 146 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 147 | image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) 148 | if not os.path.exists(os.path.join(output_dir, "images", cur_ep_dir)): 149 | os.makedirs(os.path.join(output_dir, "images", cur_ep_dir)) 150 | cv2.imwrite(os.path.join(output_dir, "images", cur_ep_dir, f"{tick}.jpg"), image) 151 | 152 | 153 | def video_from_images(output_dir, last_ep_dir): 154 | subprocess.run([ 155 | "ffmpeg", "-y", "-framerate", "30", "-i", os.path.join(output_dir, "images", last_ep_dir, "%d.jpg"), \ 156 | '-c:v', 'libx264', '-vf', 'fps=30', '-pix_fmt', 'yuv420p', os.path.join(output_dir, "images", last_ep_dir, "video.mp4") 157 | ], check=True) 158 | 159 | 160 | def calc_evaporation(learners, lay_amount, decay): 161 | x = 0 162 | for i in range(1000): 163 | x = x * decay + lay_amount * learners 164 | print(x) 165 | 166 | 167 | if __name__ == "__main__": 168 | # calc_final_lr(1e-3, .9945, 1, 51200, 128) 169 | # calculate_epsilon("esponential", 100, 512, 100, 0.9, 20e-9, 0.0) 170 | calc_evaporation(100, 1, 0.8) 171 | -------------------------------------------------------------------------------- /slime_environments/agents/QLearning/MA_QLearning.py: -------------------------------------------------------------------------------- 1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime 2 | 3 | import sys 4 | import os 5 | 6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | sys.path.append(parent_dir) 8 | 9 | from utils.utils import read_params, state_to_int_map, setup 10 | 11 | import argparse 12 | 13 | import json 14 | import numpy as np 15 | import random 16 | 17 | 18 | def create_agent(params:dict, l_params:dict, train_episodes:int): 19 | n_actions = len(l_params["actions"]) 20 | population = params['population'] 21 | learner_population = params['learner_population'] 22 | 23 | # Q_table 24 | qtable = {i: np.zeros([4, n_actions]) for i in range(population, population + learner_population)} 25 | 26 | # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}} 27 | actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 28 | # DOC dict che tiene conto della frequenza di scelta delle action di ogni agent per ogni episodio {episode: {agent: {action: _, action: _, ...}}} 29 | action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 30 | # DOC dict che tiene conto della reward di ogni agente per ogni episodio {episode: {agent: _}} 31 | reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 32 | # DOC dict che tiene conto dela dimensioni di ogni cluster per ogni episodio 33 | cluster_dict = {} 34 | 35 | return qtable, actions_dict, action_dict, reward_dict, cluster_dict 36 | 37 | 38 | def train(env, 39 | params:dict, 40 | qtable, 41 | actions_dict:dict, 42 | action_dict:dict, 43 | reward_dict:dict, 44 | cluster_dict:dict, 45 | train_episodes:int, 46 | train_log_every, 47 | alpha:float, 48 | gamma:float, 49 | decay:float, 50 | epsilon:float, 51 | output_file): 52 | # TRAINING 53 | print("Start training...") 54 | 55 | old_s = {} # DOC old state for each agent {agent: old_state} 56 | for ep in range(1, train_episodes + 1): 57 | env.reset() 58 | 59 | for tick in range(1, params['episode_ticks'] + 1): 60 | for agent in env.agent_iter(max_iter=params['learner_population']): 61 | cur_state, reward, _, _ = env.last(agent) 62 | cur_s = state_to_int_map(cur_state.observe()) 63 | 64 | if ep == 1 and tick == 1: 65 | action = env.action_space(agent).sample() 66 | else: 67 | old_value = qtable[agent][old_s[agent]][action] 68 | next_max = np.max(qtable[agent][cur_s]) # QUESTION: was with [action] too 69 | new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max) 70 | qtable[agent][old_s[agent]][action] = new_value 71 | 72 | if random.uniform(0, 1) < epsilon: 73 | # action = np.random.randint(0, 2) 74 | action = env.action_space(agent).sample() 75 | else: 76 | action = np.argmax(qtable[agent][cur_s]) 77 | env.step(action) 78 | 79 | old_s[agent] = cur_s 80 | 81 | actions_dict[str(ep)][str(action)] += 1 82 | action_dict[str(ep)][str(agent)][str(action)] += 1 83 | reward_dict[str(ep)][str(agent)] += round(reward, 2) 84 | 85 | env.move() 86 | env._evaporate() 87 | env._diffuse() 88 | env.render() 89 | #print(json.dumps(action_dict, indent=2)) 90 | epsilon *= decay 91 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 92 | 93 | if ep % train_log_every == 0: 94 | print(f"EPISODE: {ep}") 95 | print(f"\tepsilon: {epsilon}") 96 | #print(f"\tepisode reward: {reward_episode}") 97 | # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, move-toward-chemical (2), random-walk (0), drop-chemical (1), (learner 0)-move-toward-chemical, ..., Avg reward X episode 98 | 99 | with open(output_file, 'a') as f: 100 | f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ") 101 | avg_rew = 0 102 | 103 | for l in range(params['population'], params['population'] + params['learner_population']): 104 | avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks']) 105 | f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ") 106 | 107 | avg_rew /= params['learner_population'] 108 | f.write(f"{avg_rew}\n") 109 | 110 | #print(json.dumps(cluster_dict, indent=2)) 111 | print("Training finished!\n") 112 | 113 | return env, qtable 114 | 115 | 116 | def eval(env, 117 | params:dict, 118 | test_episodes:int, 119 | qtable, 120 | test_log_every:int, 121 | epsilon:float,): 122 | # DOC Evaluate agent's performance after Q-learning 123 | cluster_dict = {} 124 | print("Start testing...") 125 | 126 | for ep in range(1, test_episodes + 1): 127 | env.reset() 128 | for tick in range(1, params['episode_ticks']+1): 129 | for agent in env.agent_iter(max_iter=params['learner_population']): 130 | state, _, _, _ = env.last(agent) 131 | s = state_to_int_map(state.observe()) 132 | 133 | if random.uniform(0, 1) < epsilon: 134 | # action = np.random.randint(0, 2) 135 | action = env.action_space(agent).sample() 136 | else: 137 | action = np.argmax(qtable[agent][s]) 138 | 139 | env.step(action) 140 | env.move() 141 | env._evaporate() 142 | env._diffuse() 143 | env.render() 144 | 145 | if ep % test_log_every == 0: 146 | print(f"EPISODE: {ep}") 147 | print(f"\tepsilon: {epsilon}") 148 | # print(f"\tepisode reward: {reward_episode}") 149 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 150 | 151 | print(json.dumps(cluster_dict, indent=2)) 152 | print("Testing finished!\n") 153 | env.close() 154 | 155 | 156 | def main(args): 157 | random.seed(args.random_seed) 158 | np.random.seed(args.random_seed) 159 | curdir = os.path.dirname(os.path.abspath(__file__)) 160 | 161 | params, l_params = read_params(args.params_path, args.learning_params_path) 162 | 163 | env = Slime(render_mode="human", **params) 164 | 165 | output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(curdir, params, l_params) 166 | 167 | qtable, actions_dict, action_dict, reward_dict, cluster_dict = create_agent(params, l_params,train_episodes) 168 | 169 | env, qtable = train(env, params, qtable, actions_dict, action_dict, reward_dict, cluster_dict, train_episodes, train_log_every, alpha, gamma, decay, epsilon, output_file) 170 | 171 | eval(env, params, test_episodes, qtable, test_log_every, epsilon) 172 | 173 | 174 | if __name__ == "__main__": 175 | parser = argparse.ArgumentParser() 176 | parser.add_argument("params_path", type=str) 177 | parser.add_argument("learning_params_path", type=str) 178 | 179 | args = parser.parse_args() 180 | 181 | assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file" 182 | assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file" 183 | 184 | main(args) 185 | -------------------------------------------------------------------------------- /slime_environments/agents/Sarsa/MA_SARSA.py: -------------------------------------------------------------------------------- 1 | import math 2 | from slime_environments.environments.SlimeEnvMultiAgent import Slime 3 | 4 | import sys 5 | import os 6 | 7 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 8 | sys.path.append(parent_dir) 9 | 10 | from utils.utils import read_params, save_env_image, state_to_int_map, setup, video_from_images 11 | 12 | import argparse 13 | 14 | import os 15 | import json 16 | import numpy as np 17 | import random 18 | from tqdm import tqdm 19 | 20 | def create_agent(params:dict, l_params:dict, train_episodes:int): 21 | n_actions = len(l_params["actions"]) 22 | population = params['population'] 23 | learner_population = params['learner_population'] 24 | 25 | # Q_table 26 | qtable = {i: np.zeros([4, n_actions]) for i in range(population, population + learner_population)} 27 | 28 | # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}} 29 | actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 30 | # DOC dict che tiene conto della frequenza di scelta delle action di ogni agent per ogni episodio {episode: {agent: {action: _, action: _, ...}}} 31 | action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 32 | # DOC dict che tiene conto della reward di ogni agente per ogni episodio {episode: {agent: _}} 33 | reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 34 | # DOC dict che tiene conto dela dimensioni di ogni cluster per ogni episodio 35 | cluster_dict = {} 36 | 37 | return qtable, actions_dict, action_dict, reward_dict, cluster_dict 38 | 39 | 40 | def train(env, 41 | params:dict, 42 | l_params:dict, 43 | qtable:dict, 44 | actions_dict:dict, 45 | action_dict:dict, 46 | reward_dict:dict, 47 | cluster_dict:dict, 48 | train_episodes:int, 49 | train_log_every:int, 50 | alpha:float, 51 | gamma:float, 52 | decay:float, 53 | epsilon:float, 54 | output_file:str, 55 | output_dir:str): 56 | # TRAINING 57 | print("Start training...") 58 | 59 | old_s = {} # DOC old state for each agent {agent: old_state} 60 | for ep in range(1, train_episodes + 1): 61 | env.reset() 62 | 63 | for tick in tqdm(range(1, params['episode_ticks'] + 1)): 64 | for agent in env.agent_iter(max_iter=params['learner_population']): 65 | cur_state, reward, _, _ = env.last(agent) 66 | cur_s = state_to_int_map(cur_state.observe()) 67 | 68 | if ep == 1 and tick == 1: 69 | action = env.action_space(agent).sample() 70 | else: 71 | old_value = qtable[agent][old_s[agent]][action] 72 | next_action = None 73 | 74 | if random.uniform(0, 1) < epsilon: 75 | # action = np.random.randint(0, 2) 76 | next_action = env.action_space(agent).sample() 77 | else: 78 | next_action = np.argmax(qtable[agent][cur_s]) 79 | 80 | next_value = qtable[agent][cur_s][next_action] 81 | new_value = old_value + alpha * (reward + gamma * next_value - old_value) 82 | qtable[agent][old_s[agent]][action] = new_value 83 | 84 | action = next_action 85 | 86 | env.step(action) 87 | epsilon = epsilon_end + (epsilon - epsilon_end) * math.exp(-1. * ep * decay) 88 | old_s[agent] = cur_s 89 | 90 | actions_dict[str(ep)][str(action)] += 1 91 | action_dict[str(ep)][str(agent)][str(action)] += 1 92 | reward_dict[str(ep)][str(agent)] += round(reward, 2) 93 | 94 | env.move() 95 | env._evaporate() 96 | env._diffuse() 97 | image = env.render() 98 | #print(json.dumps(action_dict, indent=2)) 99 | 100 | if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]: 101 | if not os.path.exists(os.path.join(output_dir, "images")): 102 | os.makedirs(os.path.join(output_dir, "images")) 103 | 104 | if ep == int(l_params["fist_saveimages_episode"]): 105 | save_env_image(image, tick, output_dir, "first_episode") 106 | elif ep == int(l_params["middle_saveimages_episode"]): 107 | save_env_image(image, tick, output_dir, "middle_episode") 108 | elif ep == int(l_params["last_saveimages_episode"]): 109 | save_env_image(image, tick, output_dir, "last_episode") 110 | 111 | elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1: 112 | video_from_images(output_dir, "first_episode") 113 | elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1: 114 | video_from_images(output_dir, "middle_episode") 115 | 116 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 117 | 118 | if ep % train_log_every == 0: 119 | print("EPISODE: {}\tepsilon: {:.5f}".format(ep, epsilon)) 120 | 121 | with open(output_file, 'a') as f: 122 | f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ") 123 | avg_rew = 0 124 | 125 | for l in range(params['population'], params['population'] + params['learner_population']): 126 | avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks']) 127 | f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ") 128 | 129 | avg_rew /= params['learner_population'] 130 | f.write(f"{avg_rew}\n") 131 | 132 | print(json.dumps(cluster_dict, indent=2)) 133 | print("Training finished!\n") 134 | 135 | return env, qtable, epsilon 136 | 137 | 138 | def eval(env, 139 | params:dict, 140 | test_episodes:int, 141 | qtable, 142 | test_log_every:int, 143 | epsilon:float,): 144 | # DOC Evaluate agent's performance after SARSA 145 | cluster_dict = {} 146 | print("Start testing...") 147 | 148 | for ep in range(1, test_episodes + 1): 149 | env.reset() 150 | for tick in range(1, params['episode_ticks']+1): 151 | for agent in env.agent_iter(max_iter=params['learner_population']): 152 | state, _, _, _ = env.last(agent) 153 | s = state_to_int_map(state.observe()) 154 | 155 | if random.uniform(0, 1) < epsilon: 156 | # action = np.random.randint(0, 2) 157 | action = env.action_space(agent).sample() 158 | else: 159 | action = np.argmax(qtable[agent][s]) 160 | 161 | env.step(action) 162 | env.move() 163 | env._evaporate() 164 | env._diffuse() 165 | env.render() 166 | 167 | if ep % test_log_every == 0: 168 | print(f"EPISODE: {ep}") 169 | print(f"\tepsilon: {epsilon}") 170 | # print(f"\tepisode reward: {reward_episode}") 171 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 172 | 173 | print(json.dumps(cluster_dict, indent=2)) 174 | print("Testing finished!\n") 175 | env.close() 176 | 177 | 178 | def main(args): 179 | random.seed(args.random_seed) 180 | np.random.seed(args.random_seed) 181 | curdir = os.path.dirname(os.path.abspath(__file__)) 182 | 183 | params, l_params = read_params(args.params_path, args.learning_params_path) 184 | epsilon_end = l_params["epsilon_end"] 185 | 186 | env = Slime(render_mode="human", **params) 187 | 188 | output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(True, curdir, params, l_params) 189 | 190 | qtable, actions_dict, action_dict, reward_dict, cluster_dict = create_agent(params, l_params,train_episodes) 191 | 192 | env, qtable, epsilon = train(env, params, l_params, qtable, actions_dict, action_dict, reward_dict, \ 193 | cluster_dict, train_episodes, train_log_every, alpha, gamma, decay, epsilon, epsilon_end, output_file, output_dir) 194 | 195 | eval(env, params, test_episodes, qtable, test_log_every, epsilon) 196 | 197 | if __name__ == "__main__": 198 | parser = argparse.ArgumentParser() 199 | parser.add_argument("params_path", type=str) 200 | parser.add_argument("learning_params_path", type=str) 201 | parser.add_argument("--random-seed", type=int, default=0) 202 | 203 | args = parser.parse_args() 204 | 205 | assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file" 206 | assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file" 207 | 208 | main(args) 209 | 210 | -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Centralized/Centralized.py: -------------------------------------------------------------------------------- 1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime 2 | 3 | import sys 4 | import os 5 | 6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | sys.path.append(parent_dir) 8 | 9 | from utils.utils import positional_encoding, read_params, save_env_image, setup, update_summary, video_from_images 10 | from utils.DQN import DQN, ReplayMemory, optimize_model, select_action 11 | 12 | import argparse 13 | 14 | import os 15 | import math 16 | import json 17 | import random 18 | import datetime 19 | from collections import namedtuple 20 | from tqdm import tqdm 21 | 22 | import torch 23 | import torch.optim as optim 24 | from torch.optim.lr_scheduler import StepLR 25 | 26 | 27 | def train(env, 28 | params, 29 | l_params, 30 | device, 31 | policy_net, 32 | target_net, 33 | train_episodes, 34 | train_log_every, 35 | output_file, 36 | output_dir, 37 | normalize, 38 | positional_encoding): 39 | Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) 40 | 41 | batch_size = l_params["batch_size"] 42 | learning_rate = l_params["lr"] 43 | epsilon_end = l_params["epsilon_end"] 44 | alpha = l_params["alpha"] 45 | gamma = l_params["gamma"] 46 | decay = l_params["decay"] 47 | n_actions = len(l_params["actions"]) 48 | population = params['population'] 49 | learner_population = params['learner_population'] 50 | learner_population = params['learner_population'] 51 | update_net_every = l_params['update_net_every'] 52 | memory_capacity = l_params["memory_capacity"] 53 | 54 | optimizer = optim.AdamW(policy_net.parameters(), lr=learning_rate, amsgrad=True) 55 | scheduler = StepLR(optimizer, step_size=1, gamma=l_params["step_lr"]) 56 | memory = ReplayMemory(Transition, memory_capacity) 57 | 58 | old_s = {} 59 | old_a = {} 60 | cluster_dict = {} 61 | 62 | actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 63 | action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 64 | reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 65 | 66 | if not os.path.exists(output_dir): 67 | os.makedirs(output_dir) 68 | 69 | max_possible_reward = (((params['episode_ticks'] - 150)/params['episode_ticks']) * params['rew']) + \ 70 | ((params['learner_population'] / params["cluster_threshold"]) * (params['rew'] ** 2)) 71 | max_possible_pherormone = env.lay_amount * params['learner_population'] * 5 72 | 73 | for ep in range(1, train_episodes + 1): 74 | env.reset() 75 | losses = [] 76 | 77 | # Initialize the environment and get it's state 78 | for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"): 79 | for agent in env.agent_iter(max_iter=params['learner_population']): 80 | next_state, reward, _, _ = env.last(agent) 81 | next_state = torch.tensor(next_state.observe(), dtype=torch.float32, device=device) 82 | 83 | if positional_encoding: 84 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 85 | pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0) 86 | new_pherormone = pos_encoding + new_pherormone 87 | else: 88 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 89 | 90 | #normalization is done considering all the agents in the same patch dropping at the same time pherormone 91 | if normalize: 92 | new_pherormone /= max_possible_pherormone 93 | 94 | next_state = torch.cat((torch.flatten(new_pherormone), next_state)).unsqueeze(0) 95 | 96 | if ep == 1 and tick == 1: 97 | next_action = env.action_space(agent).sample() 98 | next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0) 99 | else: 100 | state = old_s[agent] 101 | action = old_a[agent] 102 | next_action, policy_net = select_action(env, agent, next_state, ep, policy_net, device, epsilon_end, decay) 103 | #normalization is done considering the max reward a single agent can receive 104 | reward = torch.tensor([reward], device=device) if not normalize \ 105 | else torch.tensor([reward / max_possible_reward], device=device) 106 | 107 | 108 | # Store the transition in memory 109 | memory.push(state, action, next_state, reward) 110 | 111 | # Perform one step of the optimization (on the policy network) 112 | policy_net, target_net, loss_single = optimize_model(Transition, memory, policy_net, target_net, gamma, batch_size, device) 113 | if loss_single is not None: 114 | # Optimize the model 115 | optimizer.zero_grad() 116 | loss_single.backward() 117 | losses.append(torch.Tensor.clone(loss_single.detach())) 118 | 119 | # In-place gradient clipping 120 | torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100) 121 | optimizer.step() 122 | scheduler.step() 123 | 124 | # Soft update of the target network's weights 125 | # θ′ ← τ θ + (1 −τ )θ′ 126 | if (agent + tick * learner_population + ep * params['episode_ticks'] * learner_population) % update_net_every == 0: 127 | target_net_state_dict = target_net.state_dict() 128 | policy_net_state_dict = policy_net.state_dict() 129 | for key in policy_net_state_dict: 130 | target_net_state_dict[key] = policy_net_state_dict[key] * alpha + target_net_state_dict[key] * (1 - alpha) 131 | target_net.load_state_dict(target_net_state_dict) 132 | 133 | env.step(next_action.item()) 134 | old_s[agent] = next_state 135 | old_a[agent] = next_action 136 | 137 | policy_net.epsilon = epsilon_end + (policy_net.epsilon - epsilon_end) * math.exp(-1. * ep * decay) 138 | 139 | actions_dict[str(ep)][str(next_action.item())] += 1 140 | action_dict[str(ep)][str(agent)][str(next_action.item())] += 1 141 | reward_dict[str(ep)][str(agent)] += round(reward.item(), 2) if isinstance(reward, torch.Tensor) else round(reward, 2) 142 | 143 | env.move() 144 | env._evaporate() 145 | env._diffuse() 146 | image = env.render() 147 | 148 | if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]: 149 | if not os.path.exists(os.path.join(output_dir, "images")): 150 | os.makedirs(os.path.join(output_dir, "images")) 151 | 152 | if ep == int(l_params["fist_saveimages_episode"]): 153 | save_env_image(image, tick, output_dir, "first_episode") 154 | elif ep == int(l_params["middle_saveimages_episode"]): 155 | save_env_image(image, tick, output_dir, "middle_episode") 156 | elif ep == int(l_params["last_saveimages_episode"]): 157 | save_env_image(image, tick, output_dir, "last_episode") 158 | 159 | elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1: 160 | video_from_images(output_dir, "first_episode") 161 | elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1: 162 | video_from_images(output_dir, "middle_episode") 163 | 164 | 165 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 166 | if ep % train_log_every == 0: 167 | cur_lr = optimizer.param_groups[0]['lr'] 168 | print("EPISODE: {}\tepsilon: {:.5f}\tavg loss: {:.8f}\tlearning rate {:.10f}".format(ep, policy_net.epsilon, sum(losses)/len(losses), cur_lr)) 169 | update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr) 170 | 171 | #print(json.dumps(cluster_dict, indent=2)) 172 | print("Training finished!\n") 173 | video_from_images(output_dir, "last_episode") 174 | 175 | policy_model_name = "policy_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".pth" 176 | target_model_name = "target_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".pth" 177 | torch.save(policy_net.state_dict(), os.path.join(output_dir, "models", policy_model_name)) 178 | torch.save(target_net.state_dict(), os.path.join(output_dir, "models", target_model_name)) 179 | 180 | return policy_net, env 181 | 182 | 183 | def test(env, params, l_params, policy_net, test_episodes, test_log_every, device, normalize, pos_enc): 184 | cluster_dict = {} 185 | print("[INFO] Start testing...") 186 | 187 | epsilon_end = l_params["epsilon_end"] 188 | policy_net.epsilon = epsilon_test = l_params["epsilon_test"] 189 | decay = l_params["decay"] 190 | 191 | max_possible_pherormone = env.lay_amount * params['learner_population'] * 5 192 | for ep in range(1, test_episodes + 1): 193 | env.reset() 194 | for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"): 195 | for agent in env.agent_iter(max_iter=params['learner_population']): 196 | if ep == 1 and tick == 1: 197 | next_action = env.action_space(agent).sample() 198 | next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0) 199 | else: 200 | state, reward, _, _ = env.last(agent) 201 | state = torch.tensor(state.observe(), dtype=torch.float32, device=device) 202 | 203 | if pos_enc: 204 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 205 | pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0) 206 | new_pherormone = pos_encoding + new_pherormone 207 | else: 208 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 209 | 210 | #normalization is done considering all the agents in the same patch dropping at the same time pherormone 211 | if normalize: 212 | new_pherormone /= max_possible_pherormone 213 | 214 | state = torch.cat((torch.flatten(new_pherormone), state)).unsqueeze(0) 215 | action, policy_net = select_action(env, agent, state, ep, policy_net, device, epsilon_end, decay) 216 | env.step(action) 217 | 218 | env.move() 219 | env._evaporate() 220 | env._diffuse() 221 | env.render() 222 | 223 | if ep % test_log_every == 0: 224 | print(f"EPISODE: {ep}") 225 | print(f"\tepsilon: {policy_net.epsilon}") 226 | # print(f"\tepisode reward: {reward_episode}") 227 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 228 | 229 | print(json.dumps(cluster_dict, indent=2)) 230 | print("Testing finished!\n") 231 | 232 | 233 | def main(args): 234 | random.seed(args.random_seed) 235 | torch.manual_seed(args.random_seed) 236 | 237 | params, l_params = read_params(args.params_path, args.learning_params_path) 238 | curdir = os.path.dirname(os.path.abspath(__file__)) 239 | output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(args.train, curdir, params, l_params) 240 | env = Slime(render_mode="human", **params) 241 | 242 | if not os.path.isdir(os.path.join(output_dir, "models")) and args.train: 243 | os.makedirs(os.path.join(output_dir, "models")) 244 | 245 | n_actions = len(l_params["actions"]) 246 | if args.positional_encoding: 247 | n_observations = 100 248 | else: 249 | n_observations = 51 250 | 251 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 252 | print(f"[INFO] Device selected: {device}") 253 | 254 | policy_net = DQN(n_observations, n_actions, epsilon).to(device) 255 | target_net = DQN(n_observations, n_actions, epsilon).to(device) 256 | 257 | if args.models_path == "" or args.train: 258 | args.model_path = output_dir 259 | 260 | if args.resume or args.test: 261 | if os.path.isfile(os.path.join(args.model_path, "models", args.policy_model_name)) and \ 262 | os.path.isfile(os.path.join(args.model_path, "models", args.target_model_name)): 263 | policy_model_path = os.path.join(args.model_path, "models", args.policy_model_name) 264 | target_model_path = os.path.join(args.model_path, "models", args.target_model_name) 265 | policy_net.load_state_dict(torch.load(policy_model_path), strict=False) 266 | target_net.load_state_dict(torch.load(target_model_path), strict=False) 267 | else: 268 | target_net.load_state_dict(policy_net.state_dict()) 269 | 270 | if args.train: 271 | policy_net, env = train(env, params, l_params, device, policy_net, target_net, train_episodes, train_log_every, output_file, output_dir, args.normalize_input, args.positional_encoding) 272 | 273 | if args.test: 274 | test(env, params, l_params, policy_net, test_episodes, test_log_every, device, args.normalize_input, args.positional_encoding) 275 | 276 | env.close() 277 | 278 | 279 | if __name__ == "__main__": 280 | parser = argparse.ArgumentParser() 281 | parser.add_argument("params_path", type=str) 282 | parser.add_argument("learning_params_path", type=str) 283 | parser.add_argument("--policy-model-name", type=str, default="") 284 | parser.add_argument("--target-model-name", type=str, default="") 285 | parser.add_argument("--models-path", type=str, default="") 286 | parser.add_argument("--normalize-input", action="store_true") 287 | parser.add_argument("--positional-encoding", action="store_true") 288 | parser.add_argument("--train", action="store_true") 289 | parser.add_argument("--test", action="store_true") 290 | parser.add_argument("--resume", action="store_true") 291 | parser.add_argument("--random-seed", type=int, default=0) 292 | 293 | args = parser.parse_args() 294 | 295 | assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file" 296 | assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file" 297 | 298 | main(args) -------------------------------------------------------------------------------- /slime_environments/agents/DQNet_Decentralized/Decentralized.py: -------------------------------------------------------------------------------- 1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime 2 | 3 | import sys 4 | import os 5 | 6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | sys.path.append(parent_dir) 8 | 9 | from utils.utils import read_params, save_env_image, setup, positional_encoding, update_summary, video_from_images 10 | from utils.DQN import DQN, ReplayMemory, optimize_model, select_action 11 | 12 | import argparse 13 | 14 | import os 15 | import json 16 | import random 17 | import datetime 18 | from collections import namedtuple 19 | from tqdm import tqdm 20 | 21 | import torch 22 | import torch.optim as optim 23 | from torch.optim.lr_scheduler import StepLR 24 | 25 | 26 | def train(env, 27 | params, 28 | l_params, 29 | device, 30 | policy_nets, 31 | target_nets, 32 | train_episodes, 33 | train_log_every, 34 | output_file, 35 | output_dir, 36 | normalize, 37 | pos_enc): 38 | Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) 39 | 40 | batch_size = l_params["batch_size"] 41 | learning_rate = l_params["lr"] 42 | epsilon_end = l_params["epsilon_end"] 43 | alpha = l_params["alpha"] 44 | gamma = l_params["gamma"] 45 | decay = l_params["decay"] 46 | n_actions = len(l_params["actions"]) 47 | population = params['population'] 48 | learner_population = params['learner_population'] 49 | update_net_every = l_params['update_net_every'] 50 | memory_capacity = l_params["memory_capacity"] 51 | 52 | optimizers = {i: optim.AdamW(policy_nets[i].parameters(), lr=learning_rate, amsgrad=True) for i in range(params['learner_population'])} 53 | schedulers = {i: StepLR(optimizers[i], step_size=1, gamma=l_params["step_lr"]) for i in range(params['learner_population'])} 54 | memory = {i: ReplayMemory(Transition, memory_capacity) for i in range(params['learner_population'])} 55 | 56 | old_s = {} 57 | old_a = {} 58 | cluster_dict = {} 59 | 60 | actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 61 | action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 62 | reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)} 63 | epsilon = 0 64 | cur_lr = 0 65 | 66 | max_possible_reward = (((params['episode_ticks'] - 150)/params['episode_ticks']) * params['rew']) + \ 67 | ((params['learner_population'] / params["cluster_threshold"]) * (params['rew'] ** 2)) 68 | max_possible_pherormone = env.lay_amount * params['learner_population'] * 5 69 | 70 | for ep in range(1, train_episodes + 1): 71 | env.reset() 72 | losses = [] 73 | 74 | # Initialize the environment and get it's state 75 | for tick in tqdm(range(1, params['episode_ticks'] + 1)): 76 | for agent in env.agent_iter(max_iter=params['learner_population']): 77 | next_state, reward, _, _ = env.last(agent) 78 | next_state = torch.tensor(next_state.observe(), dtype=torch.float32, device=device) 79 | 80 | if pos_enc: 81 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 82 | pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0) 83 | new_pherormone = pos_encoding + new_pherormone 84 | else: 85 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 86 | 87 | #normalization is done considering all the agents in the same patch dropping at the same time pherormone 88 | if normalize: 89 | new_pherormone /= max_possible_pherormone 90 | 91 | next_state = torch.cat((torch.flatten(new_pherormone), next_state)).unsqueeze(0) 92 | 93 | if ep == 1 and tick == 1: 94 | next_action = env.action_space(agent).sample() 95 | next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0) 96 | else: 97 | state = old_s[agent] 98 | action = old_a[agent] 99 | next_action, policy_nets[agent] = select_action(env, agent, next_state, ep, policy_nets[agent], device, epsilon_end, decay) 100 | 101 | #normalization is done considering the max reward a single agent can receive 102 | reward = torch.tensor([reward], device=device) if not normalize \ 103 | else torch.tensor([reward / max_possible_reward], device=device) 104 | 105 | # Store the transition in memory 106 | memory[agent].push(state, action, next_state, reward) 107 | 108 | # Perform one step of the optimization (on the policy network) 109 | policy_nets[agent], target_nets[agent], loss_single = optimize_model(Transition, memory[agent], policy_nets[agent], target_nets[agent], gamma, batch_size, device) 110 | if loss_single is not None: 111 | # Optimize the model 112 | optimizers[agent].zero_grad() 113 | loss_single.backward() 114 | losses.append(torch.Tensor.clone(loss_single.detach())) 115 | 116 | # In-place gradient clipping 117 | torch.nn.utils.clip_grad_value_(policy_nets[agent].parameters(), 100) 118 | optimizers[agent].step() 119 | schedulers[agent].step() 120 | 121 | # Soft update of the target network's weights 122 | # θ′ ← τ θ + (1 −τ )θ′ 123 | if (agent + tick * learner_population + ep * params['episode_ticks'] * learner_population) % update_net_every == 0: 124 | target_net_state_dict = target_nets[agent].state_dict() 125 | policy_net_state_dict = policy_nets[agent].state_dict() 126 | for key in policy_net_state_dict: 127 | target_net_state_dict[key] = policy_net_state_dict[key] * alpha + target_net_state_dict[key] * (1 - alpha) 128 | target_nets[agent].load_state_dict(target_net_state_dict) 129 | 130 | epsilon = policy_nets[agent].epsilon 131 | cur_lr = optimizers[agent].param_groups[0]['lr'] 132 | 133 | env.step(next_action.item()) 134 | old_s[agent] = next_state 135 | old_a[agent] = next_action 136 | 137 | actions_dict[str(ep)][str(next_action.item())] += 1 138 | action_dict[str(ep)][str(agent)][str(next_action.item())] += 1 139 | reward_dict[str(ep)][str(agent)] += round(reward.item(), 2) if isinstance(reward, torch.Tensor) else round(reward, 2) 140 | 141 | env.move() 142 | env._evaporate() 143 | env._diffuse() 144 | image = env.render() 145 | 146 | if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]: 147 | if not os.path.exists(os.path.join(output_dir, "images")): 148 | os.makedirs(os.path.join(output_dir, "images")) 149 | 150 | if ep == int(l_params["fist_saveimages_episode"]): 151 | save_env_image(image, tick, output_dir, "first_episode") 152 | elif ep == int(l_params["middle_saveimages_episode"]): 153 | save_env_image(image, tick, output_dir, "middle_episode") 154 | elif ep == int(l_params["last_saveimages_episode"]): 155 | save_env_image(image, tick, output_dir, "last_episode") 156 | 157 | elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1: 158 | video_from_images(output_dir, "first_episode") 159 | elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1: 160 | video_from_images(output_dir, "middle_episode") 161 | 162 | 163 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 164 | if ep % train_log_every == 0: 165 | print("EPISODE: {}\tepsilon: {:.5f}\tavg loss: {:.8f}\tlearning rate {:.10f}".format(ep, epsilon, sum(losses)/len(losses), cur_lr)) 166 | update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr) 167 | 168 | 169 | #print(json.dumps(cluster_dict, indent=2)) 170 | print("Training finished!\n") 171 | video_from_images(output_dir, "last_episode") 172 | 173 | env.reset() 174 | now = datetime.datetime.now() 175 | for agent in range(params['learner_population']): 176 | policy_model_name = os.path.join(f"policy_{agent}_" + now.strftime("%m_%d_%Y__%H_%M_%S") + ".pth") 177 | target_model_name = os.path.join(f"target_{agent}_" + now.strftime("%m_%d_%Y__%H_%M_%S") + ".pth") 178 | torch.save(policy_nets[agent].state_dict(), os.path.join(output_dir, "models", "policies", policy_model_name)) 179 | torch.save(target_nets[agent].state_dict(), os.path.join(output_dir, "models", "targets", target_model_name)) 180 | 181 | return policy_nets, env 182 | 183 | 184 | def test(env, params, l_params, policy_nets, test_episodes, test_log_every, device, normalize, pos_enc): 185 | cluster_dict = {} 186 | print("[INFO] Start testing...") 187 | 188 | epsilon_end = l_params["epsilon_end"] 189 | epsilon_test = l_params["epsilon_test"] 190 | decay = l_params["decay"] 191 | 192 | max_possible_pherormone = env.lay_amount * params['learner_population'] * 5 193 | for ep in range(1, test_episodes + 1): 194 | env.reset() 195 | for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"): 196 | for agent in env.agent_iter(max_iter=params['learner_population']): 197 | if ep == 1 and tick == 1: 198 | policy_nets[agent].epsilon = epsilon_test 199 | state, reward, _, _ = env.last(agent) 200 | state = torch.tensor(state.observe(), dtype=torch.float32, device=device) 201 | 202 | if pos_enc: 203 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 204 | pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0) 205 | new_pherormone = pos_encoding + new_pherormone 206 | else: 207 | new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0) 208 | 209 | #normalization is done considering all the agents in the same patch dropping at the same time pherormone 210 | if normalize: 211 | new_pherormone /= max_possible_pherormone 212 | 213 | state = torch.cat((torch.flatten(new_pherormone), state)).unsqueeze(0) 214 | 215 | action, policy_net = select_action(env, agent, state, ep, policy_nets[agent], device, epsilon_end, decay) 216 | env.step(action) 217 | 218 | env.move() 219 | env._evaporate() 220 | env._diffuse() 221 | env.render() 222 | 223 | if ep % test_log_every == 0: 224 | print(f"EPISODE: {ep}") 225 | print(f"\tepsilon: {policy_net.epsilon}") 226 | # print(f"\tepisode reward: {reward_episode}") 227 | cluster_dict[str(ep)] = round(env.avg_cluster(), 2) 228 | 229 | print(json.dumps(cluster_dict, indent=2)) 230 | print("Testing finished!\n") 231 | 232 | 233 | def main(args): 234 | random.seed(args.random_seed) 235 | torch.manual_seed(args.random_seed) 236 | 237 | params, l_params = read_params(args.params_path, args.learning_params_path) 238 | curdir = os.path.dirname(os.path.abspath(__file__)) 239 | output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(args.train, curdir, params, l_params) 240 | env = Slime(render_mode="human", **params) 241 | 242 | if not os.path.isdir(os.path.join(output_dir, "models")) and args.train: 243 | os.makedirs(os.path.join(output_dir, "models")) 244 | 245 | if not os.path.isdir(os.path.join(output_dir, "models", "policies")) and args.train: 246 | os.makedirs(os.path.join(output_dir, "models", "policies")) 247 | 248 | if not os.path.isdir(os.path.join(output_dir, "models", "targets")) and args.train: 249 | os.makedirs(os.path.join(output_dir, "models", "targets")) 250 | 251 | n_actions = len(l_params["actions"]) 252 | if args.positional_encoding: 253 | n_observations = 100 254 | else: 255 | n_observations = 51 256 | 257 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 258 | print(f"[INFO] Device selected: {device}") 259 | 260 | population = params['population'] 261 | learner_population = params['learner_population'] 262 | policy_nets = {ag: DQN(n_observations, n_actions, epsilon).to(device) for ag in range(population, population + learner_population)} 263 | target_nets = {ag: DQN(n_observations, n_actions, epsilon).to(device) for ag in range(population, population + learner_population)} 264 | 265 | if args.models_path == "" or args.train: 266 | args.model_path = output_dir 267 | 268 | policies_path = os.path.join(args.models_path, "models", "policies") 269 | targets_path = os.path.join(args.models_path, "models", "targets") 270 | if args.resume or args.test: 271 | if os.path.exists(policies_path) and os.path.exists(targets_path): 272 | policies = [os.path.join(root, file) for root, dirs, files in os.walk(policies_path) for file in files if os.path.isfile(os.path.join(root, file))] 273 | targets = [os.path.join(root, file) for root, dirs, files in os.walk(targets_path) for file in files if os.path.isfile(os.path.join(root, file))] 274 | 275 | assert len(policies) == params['learner_population'], f"policies weights {len(policies)} and learner population {params['learner_population']} are different!" 276 | assert len(targets) == params['learner_population'], f"targets weights {len(targets)} and learner population {params['learner_population']} are different!" 277 | 278 | for i, file in enumerate(policies): 279 | policy_nets[i].load_state_dict(torch.load(file), strict=False) 280 | 281 | for i, file in enumerate(targets): 282 | target_nets[i].load_state_dict(torch.load(file), strict=False) 283 | else: 284 | for ag in range(population, population + learner_population): 285 | target_nets[ag].load_state_dict(policy_nets[ag].state_dict()) 286 | 287 | if args.train: 288 | policy_nets, env = train(env, params, l_params, device, policy_nets, target_nets, train_episodes, train_log_every, output_file, output_dir, args.normalize_input, args.positional_encoding) 289 | 290 | if args.test: 291 | test(env, params, l_params, policy_nets, test_episodes, test_log_every, device, args.normalize_input, args.positional_encoding) 292 | 293 | env.close() 294 | 295 | 296 | if __name__ == "__main__": 297 | parser = argparse.ArgumentParser() 298 | parser.add_argument("params_path", type=str) 299 | parser.add_argument("learning_params_path", type=str) 300 | parser.add_argument("--policy-model-name", type=str, default="") 301 | parser.add_argument("--target-model-name", type=str, default="") 302 | parser.add_argument("--models-path", type=str, default="") 303 | parser.add_argument("--normalize-input", action="store_true") 304 | parser.add_argument("--positional-encoding", action="store_true") 305 | parser.add_argument("--train", action="store_true") 306 | parser.add_argument("--test", action="store_true") 307 | parser.add_argument("--resume", action="store_true") 308 | parser.add_argument("--random-seed", type=int, default=0) 309 | 310 | args = parser.parse_args() 311 | 312 | assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file" 313 | assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file" 314 | 315 | main(args) -------------------------------------------------------------------------------- /slime_environments/agents/QLearning/runs/multi_test_01_06_10_2023__21_12_57.csv: -------------------------------------------------------------------------------- 1 | { 2 | "population": 0, 3 | "learner_population": 100, 4 | "sniff_threshold": 0.9, 5 | "diffuse_area": 2, 6 | "diffuse_mode": "cascade", 7 | "follow_mode": "prob", 8 | "smell_area": 3, 9 | "lay_area": 1, 10 | "lay_amount": 3, 11 | "evaporation": 0.9, 12 | "cluster_threshold": 30, 13 | "cluster_radius": 5, 14 | "rew": 100, 15 | "penalty": -1, 16 | "episode_ticks": 500, 17 | "W": 66, 18 | "H": 38, 19 | "PATCH_SIZE": 20, 20 | "TURTLE_SIZE": 16, 21 | "FPS": 30, 22 | "SHADE_STRENGTH": 10, 23 | "SHOW_CHEM_TEXT": false, 24 | "CLUSTER_FONT_SIZE": 12, 25 | "CHEMICAL_FONT_SIZE": 8, 26 | "gui": true 27 | } 28 | ---------- 29 | TRAIN_EPISODES = 100 30 | TEST_EPISODES = 10 31 | ---------- 32 | alpha = 0.2 33 | gamma = 0.8 34 | epsilon = 0.9 35 | decay = 0.9995 36 | ---------- 37 | Episode, Tick, Avg cluster size X tick, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical, (learner 0)-random-walk, (learner 0)-drop-chemical, (learner 1)-move-toward-chemical, (learner 1)-random-walk, (learner 1)-drop-chemical, (learner 2)-move-toward-chemical, (learner 2)-random-walk, (learner 2)-drop-chemical, (learner 3)-move-toward-chemical, (learner 3)-random-walk, (learner 3)-drop-chemical, (learner 4)-move-toward-chemical, (learner 4)-random-walk, (learner 4)-drop-chemical, (learner 5)-move-toward-chemical, (learner 5)-random-walk, (learner 5)-drop-chemical, (learner 6)-move-toward-chemical, (learner 6)-random-walk, (learner 6)-drop-chemical, (learner 7)-move-toward-chemical, (learner 7)-random-walk, (learner 7)-drop-chemical, (learner 8)-move-toward-chemical, (learner 8)-random-walk, (learner 8)-drop-chemical, (learner 9)-move-toward-chemical, (learner 9)-random-walk, (learner 9)-drop-chemical, (learner 10)-move-toward-chemical, (learner 10)-random-walk, (learner 10)-drop-chemical, (learner 11)-move-toward-chemical, (learner 11)-random-walk, (learner 11)-drop-chemical, (learner 12)-move-toward-chemical, (learner 12)-random-walk, (learner 12)-drop-chemical, (learner 13)-move-toward-chemical, (learner 13)-random-walk, (learner 13)-drop-chemical, (learner 14)-move-toward-chemical, (learner 14)-random-walk, (learner 14)-drop-chemical, (learner 15)-move-toward-chemical, (learner 15)-random-walk, (learner 15)-drop-chemical, (learner 16)-move-toward-chemical, (learner 16)-random-walk, (learner 16)-drop-chemical, (learner 17)-move-toward-chemical, (learner 17)-random-walk, (learner 17)-drop-chemical, (learner 18)-move-toward-chemical, (learner 18)-random-walk, (learner 18)-drop-chemical, (learner 19)-move-toward-chemical, (learner 19)-random-walk, (learner 19)-drop-chemical, (learner 20)-move-toward-chemical, (learner 20)-random-walk, (learner 20)-drop-chemical, (learner 21)-move-toward-chemical, (learner 21)-random-walk, (learner 21)-drop-chemical, (learner 22)-move-toward-chemical, (learner 22)-random-walk, (learner 22)-drop-chemical, (learner 23)-move-toward-chemical, (learner 23)-random-walk, (learner 23)-drop-chemical, (learner 24)-move-toward-chemical, (learner 24)-random-walk, (learner 24)-drop-chemical, (learner 25)-move-toward-chemical, (learner 25)-random-walk, (learner 25)-drop-chemical, (learner 26)-move-toward-chemical, (learner 26)-random-walk, (learner 26)-drop-chemical, (learner 27)-move-toward-chemical, (learner 27)-random-walk, (learner 27)-drop-chemical, (learner 28)-move-toward-chemical, (learner 28)-random-walk, (learner 28)-drop-chemical, (learner 29)-move-toward-chemical, (learner 29)-random-walk, (learner 29)-drop-chemical, (learner 30)-move-toward-chemical, (learner 30)-random-walk, (learner 30)-drop-chemical, (learner 31)-move-toward-chemical, (learner 31)-random-walk, (learner 31)-drop-chemical, (learner 32)-move-toward-chemical, (learner 32)-random-walk, (learner 32)-drop-chemical, (learner 33)-move-toward-chemical, (learner 33)-random-walk, (learner 33)-drop-chemical, (learner 34)-move-toward-chemical, (learner 34)-random-walk, (learner 34)-drop-chemical, (learner 35)-move-toward-chemical, (learner 35)-random-walk, (learner 35)-drop-chemical, (learner 36)-move-toward-chemical, (learner 36)-random-walk, (learner 36)-drop-chemical, (learner 37)-move-toward-chemical, (learner 37)-random-walk, (learner 37)-drop-chemical, (learner 38)-move-toward-chemical, (learner 38)-random-walk, (learner 38)-drop-chemical, (learner 39)-move-toward-chemical, (learner 39)-random-walk, (learner 39)-drop-chemical, (learner 40)-move-toward-chemical, (learner 40)-random-walk, (learner 40)-drop-chemical, (learner 41)-move-toward-chemical, (learner 41)-random-walk, (learner 41)-drop-chemical, (learner 42)-move-toward-chemical, (learner 42)-random-walk, (learner 42)-drop-chemical, (learner 43)-move-toward-chemical, (learner 43)-random-walk, (learner 43)-drop-chemical, (learner 44)-move-toward-chemical, (learner 44)-random-walk, (learner 44)-drop-chemical, (learner 45)-move-toward-chemical, (learner 45)-random-walk, (learner 45)-drop-chemical, (learner 46)-move-toward-chemical, (learner 46)-random-walk, (learner 46)-drop-chemical, (learner 47)-move-toward-chemical, (learner 47)-random-walk, (learner 47)-drop-chemical, (learner 48)-move-toward-chemical, (learner 48)-random-walk, (learner 48)-drop-chemical, (learner 49)-move-toward-chemical, (learner 49)-random-walk, (learner 49)-drop-chemical, (learner 50)-move-toward-chemical, (learner 50)-random-walk, (learner 50)-drop-chemical, (learner 51)-move-toward-chemical, (learner 51)-random-walk, (learner 51)-drop-chemical, (learner 52)-move-toward-chemical, (learner 52)-random-walk, (learner 52)-drop-chemical, (learner 53)-move-toward-chemical, (learner 53)-random-walk, (learner 53)-drop-chemical, (learner 54)-move-toward-chemical, (learner 54)-random-walk, (learner 54)-drop-chemical, (learner 55)-move-toward-chemical, (learner 55)-random-walk, (learner 55)-drop-chemical, (learner 56)-move-toward-chemical, (learner 56)-random-walk, (learner 56)-drop-chemical, (learner 57)-move-toward-chemical, (learner 57)-random-walk, (learner 57)-drop-chemical, (learner 58)-move-toward-chemical, (learner 58)-random-walk, (learner 58)-drop-chemical, (learner 59)-move-toward-chemical, (learner 59)-random-walk, (learner 59)-drop-chemical, (learner 60)-move-toward-chemical, (learner 60)-random-walk, (learner 60)-drop-chemical, (learner 61)-move-toward-chemical, (learner 61)-random-walk, (learner 61)-drop-chemical, (learner 62)-move-toward-chemical, (learner 62)-random-walk, (learner 62)-drop-chemical, (learner 63)-move-toward-chemical, (learner 63)-random-walk, (learner 63)-drop-chemical, (learner 64)-move-toward-chemical, (learner 64)-random-walk, (learner 64)-drop-chemical, (learner 65)-move-toward-chemical, (learner 65)-random-walk, (learner 65)-drop-chemical, (learner 66)-move-toward-chemical, (learner 66)-random-walk, (learner 66)-drop-chemical, (learner 67)-move-toward-chemical, (learner 67)-random-walk, (learner 67)-drop-chemical, (learner 68)-move-toward-chemical, (learner 68)-random-walk, (learner 68)-drop-chemical, (learner 69)-move-toward-chemical, (learner 69)-random-walk, (learner 69)-drop-chemical, (learner 70)-move-toward-chemical, (learner 70)-random-walk, (learner 70)-drop-chemical, (learner 71)-move-toward-chemical, (learner 71)-random-walk, (learner 71)-drop-chemical, (learner 72)-move-toward-chemical, (learner 72)-random-walk, (learner 72)-drop-chemical, (learner 73)-move-toward-chemical, (learner 73)-random-walk, (learner 73)-drop-chemical, (learner 74)-move-toward-chemical, (learner 74)-random-walk, (learner 74)-drop-chemical, (learner 75)-move-toward-chemical, (learner 75)-random-walk, (learner 75)-drop-chemical, (learner 76)-move-toward-chemical, (learner 76)-random-walk, (learner 76)-drop-chemical, (learner 77)-move-toward-chemical, (learner 77)-random-walk, (learner 77)-drop-chemical, (learner 78)-move-toward-chemical, (learner 78)-random-walk, (learner 78)-drop-chemical, (learner 79)-move-toward-chemical, (learner 79)-random-walk, (learner 79)-drop-chemical, (learner 80)-move-toward-chemical, (learner 80)-random-walk, (learner 80)-drop-chemical, (learner 81)-move-toward-chemical, (learner 81)-random-walk, (learner 81)-drop-chemical, (learner 82)-move-toward-chemical, (learner 82)-random-walk, (learner 82)-drop-chemical, (learner 83)-move-toward-chemical, (learner 83)-random-walk, (learner 83)-drop-chemical, (learner 84)-move-toward-chemical, (learner 84)-random-walk, (learner 84)-drop-chemical, (learner 85)-move-toward-chemical, (learner 85)-random-walk, (learner 85)-drop-chemical, (learner 86)-move-toward-chemical, (learner 86)-random-walk, (learner 86)-drop-chemical, (learner 87)-move-toward-chemical, (learner 87)-random-walk, (learner 87)-drop-chemical, (learner 88)-move-toward-chemical, (learner 88)-random-walk, (learner 88)-drop-chemical, (learner 89)-move-toward-chemical, (learner 89)-random-walk, (learner 89)-drop-chemical, (learner 90)-move-toward-chemical, (learner 90)-random-walk, (learner 90)-drop-chemical, (learner 91)-move-toward-chemical, (learner 91)-random-walk, (learner 91)-drop-chemical, (learner 92)-move-toward-chemical, (learner 92)-random-walk, (learner 92)-drop-chemical, (learner 93)-move-toward-chemical, (learner 93)-random-walk, (learner 93)-drop-chemical, (learner 94)-move-toward-chemical, (learner 94)-random-walk, (learner 94)-drop-chemical, (learner 95)-move-toward-chemical, (learner 95)-random-walk, (learner 95)-drop-chemical, (learner 96)-move-toward-chemical, (learner 96)-random-walk, (learner 96)-drop-chemical, (learner 97)-move-toward-chemical, (learner 97)-random-walk, (learner 97)-drop-chemical, (learner 98)-move-toward-chemical, (learner 98)-random-walk, (learner 98)-drop-chemical, (learner 99)-move-toward-chemical, (learner 99)-random-walk, (learner 99)-drop-chemical, Avg reward X episode 38 | 10, 5000, 9.71, 16763, 16714, 16523, 184, 158, 158, 172, 169, 159, 163, 163, 174, 155, 178, 167, 160, 174, 166, 151, 185, 164, 179, 173, 148, 159, 167, 174, 147, 157, 196, 181, 153, 166, 175, 164, 161, 152, 164, 184, 172, 185, 143, 179, 163, 158, 172, 169, 159, 155, 170, 175, 152, 177, 171, 164, 193, 143, 182, 163, 155, 171, 166, 163, 173, 148, 179, 156, 174, 170, 165, 163, 172, 174, 156, 170, 164, 164, 172, 163, 163, 174, 189, 158, 153, 179, 159, 162, 157, 172, 171, 152, 168, 180, 180, 172, 148, 158, 170, 172, 179, 167, 154, 176, 163, 161, 160, 168, 172, 152, 186, 162, 153, 184, 163, 178, 159, 163, 188, 159, 153, 182, 174, 144, 178, 153, 169, 154, 183, 163, 155, 166, 179, 153, 166, 181, 177, 150, 173, 171, 161, 168, 169, 178, 153, 163, 175, 162, 167, 180, 153, 179, 158, 163, 169, 172, 159, 173, 162, 165, 174, 165, 161, 160, 173, 167, 164, 182, 154, 172, 158, 170, 158, 163, 179, 178, 151, 171, 159, 180, 161, 154, 185, 161, 163, 175, 162, 190, 156, 154, 184, 158, 158, 172, 167, 161, 164, 166, 170, 163, 159, 178, 181, 191, 128, 169, 168, 163, 157, 149, 194, 178, 167, 155, 148, 169, 183, 171, 175, 154, 175, 148, 177, 166, 183, 151, 177, 158, 165, 167, 157, 176, 175, 172, 153, 173, 178, 149, 150, 181, 169, 157, 169, 174, 157, 181, 162, 180, 162, 158, 169, 159, 172, 169, 151, 180, 172, 172, 156, 171, 164, 165, 178, 150, 172, 153, 184, 163, 156, 170, 174, 170, 172, 158, 145, 178, 177, 172, 152, 176, 177, 159, 164, 178, 151, 171, 173, 180, 147, 159, 157, 184, 180, 147, 173, 153, 173, 174, 164, 168, 168, 177, 159, 164, 3342.5985687999996 39 | 20, 10000, 7.02, 16622, 16673, 16705, 158, 163, 179, 157, 171, 172, 180, 171, 149, 167, 167, 166, 152, 156, 192, 173, 161, 166, 166, 170, 164, 160, 159, 181, 178, 164, 158, 176, 170, 154, 173, 154, 173, 168, 161, 171, 161, 166, 173, 157, 176, 167, 149, 177, 174, 174, 158, 168, 166, 173, 161, 182, 151, 167, 169, 172, 159, 165, 182, 153, 166, 157, 177, 195, 157, 148, 165, 166, 169, 181, 159, 160, 188, 147, 165, 172, 162, 166, 168, 158, 174, 158, 178, 164, 181, 152, 167, 185, 156, 159, 166, 163, 171, 148, 179, 173, 182, 147, 171, 171, 163, 166, 170, 171, 159, 147, 168, 185, 177, 149, 174, 156, 170, 174, 169, 196, 135, 135, 186, 179, 174, 165, 161, 153, 169, 178, 190, 152, 158, 152, 176, 172, 169, 169, 162, 179, 160, 161, 157, 162, 181, 163, 191, 146, 156, 137, 207, 173, 171, 156, 164, 153, 183, 147, 182, 171, 166, 176, 158, 164, 163, 173, 156, 184, 160, 162, 166, 172, 183, 150, 167, 164, 178, 158, 172, 153, 175, 163, 145, 192, 152, 160, 188, 169, 161, 170, 155, 162, 183, 145, 169, 186, 154, 176, 170, 159, 140, 201, 138, 182, 180, 188, 155, 157, 145, 182, 173, 165, 165, 170, 163, 182, 155, 181, 164, 155, 169, 155, 176, 170, 174, 156, 166, 172, 162, 163, 158, 179, 174, 165, 161, 165, 178, 157, 176, 161, 163, 150, 184, 166, 163, 186, 151, 176, 168, 156, 164, 163, 173, 169, 167, 164, 159, 159, 182, 151, 190, 159, 167, 158, 175, 167, 154, 179, 161, 190, 149, 172, 182, 146, 153, 165, 182, 171, 167, 162, 197, 145, 158, 173, 176, 151, 167, 184, 149, 169, 182, 149, 158, 168, 174, 172, 171, 157, 175, 174, 151, 173, 171, 156, 3057.8800402000006 40 | 30, 15000, 9.1, 16844, 16722, 16434, 150, 174, 176, 168, 187, 145, 154, 180, 166, 177, 173, 150, 174, 171, 155, 185, 154, 161, 160, 181, 159, 170, 170, 160, 142, 177, 181, 166, 163, 171, 174, 164, 162, 157, 176, 167, 159, 174, 167, 153, 180, 167, 142, 173, 185, 160, 175, 165, 160, 156, 184, 154, 169, 177, 160, 144, 196, 164, 177, 159, 175, 163, 162, 166, 181, 153, 174, 156, 170, 163, 158, 179, 185, 158, 157, 179, 154, 167, 151, 167, 182, 169, 172, 159, 186, 175, 139, 194, 156, 150, 170, 169, 161, 151, 183, 166, 179, 168, 153, 183, 164, 153, 177, 170, 153, 161, 175, 164, 169, 171, 160, 176, 167, 157, 154, 175, 171, 175, 163, 162, 168, 167, 165, 173, 169, 158, 174, 170, 156, 176, 152, 172, 166, 187, 147, 151, 160, 189, 174, 145, 181, 160, 180, 160, 171, 158, 171, 150, 180, 170, 187, 148, 165, 161, 183, 156, 175, 168, 157, 170, 175, 155, 185, 138, 177, 170, 166, 164, 178, 156, 166, 176, 148, 176, 153, 173, 174, 165, 161, 174, 166, 158, 176, 167, 148, 185, 162, 188, 150, 169, 164, 167, 154, 190, 156, 177, 168, 155, 158, 187, 155, 168, 155, 177, 178, 175, 147, 162, 184, 154, 184, 149, 167, 150, 187, 163, 165, 167, 168, 188, 153, 159, 177, 171, 152, 172, 154, 174, 144, 169, 187, 185, 165, 150, 177, 164, 159, 173, 161, 166, 175, 156, 169, 148, 179, 173, 166, 177, 157, 165, 167, 168, 163, 179, 158, 172, 162, 166, 174, 178, 148, 178, 174, 148, 163, 161, 176, 171, 160, 169, 182, 155, 163, 179, 171, 150, 187, 155, 158, 155, 171, 174, 178, 166, 156, 172, 171, 157, 179, 154, 167, 157, 161, 182, 175, 157, 168, 180, 164, 156, 2968.114642999999 41 | 40, 20000, 7.43, 16595, 16744, 16661, 163, 162, 175, 175, 159, 166, 170, 169, 161, 154, 161, 185, 163, 189, 148, 147, 195, 158, 179, 156, 165, 141, 182, 177, 177, 159, 164, 177, 180, 143, 157, 189, 154, 155, 168, 177, 178, 159, 163, 176, 157, 167, 148, 174, 178, 157, 157, 186, 170, 166, 164, 190, 149, 161, 159, 163, 178, 170, 185, 145, 164, 175, 161, 182, 161, 157, 183, 151, 166, 141, 180, 179, 175, 152, 173, 165, 171, 164, 177, 160, 163, 147, 170, 183, 166, 163, 171, 182, 153, 165, 179, 153, 168, 172, 165, 163, 147, 184, 169, 169, 149, 182, 172, 151, 177, 169, 173, 158, 167, 178, 155, 158, 158, 184, 155, 177, 168, 175, 170, 155, 176, 165, 159, 170, 162, 168, 157, 181, 162, 160, 171, 169, 161, 177, 162, 164, 177, 159, 160, 159, 181, 153, 165, 182, 156, 171, 173, 163, 180, 157, 156, 163, 181, 190, 150, 160, 152, 174, 174, 165, 167, 168, 157, 179, 164, 171, 170, 159, 168, 165, 167, 158, 187, 155, 141, 188, 171, 181, 152, 167, 160, 154, 186, 157, 150, 193, 180, 155, 165, 191, 167, 142, 154, 168, 178, 170, 166, 164, 176, 171, 153, 168, 161, 171, 169, 164, 167, 168, 165, 167, 182, 155, 163, 144, 178, 178, 157, 178, 165, 174, 146, 180, 158, 176, 166, 161, 168, 171, 165, 196, 139, 163, 179, 158, 167, 170, 163, 155, 176, 169, 171, 170, 159, 200, 147, 153, 172, 171, 157, 159, 168, 173, 186, 154, 160, 180, 163, 157, 173, 170, 157, 157, 181, 162, 142, 168, 190, 164, 176, 160, 161, 161, 178, 168, 152, 180, 171, 146, 183, 153, 166, 181, 208, 156, 136, 149, 186, 165, 167, 173, 160, 157, 174, 169, 164, 166, 170, 164, 177, 159, 3241.0066775999985 42 | 50, 25000, 10.23, 16754, 16729, 16517, 161, 178, 161, 163, 167, 170, 152, 162, 186, 161, 159, 180, 160, 164, 176, 160, 165, 175, 187, 125, 188, 175, 166, 159, 161, 159, 180, 170, 158, 172, 159, 169, 172, 181, 152, 167, 158, 188, 154, 169, 164, 167, 152, 177, 171, 176, 158, 166, 182, 151, 167, 169, 162, 169, 179, 159, 162, 166, 180, 154, 158, 179, 163, 177, 175, 148, 129, 172, 199, 156, 181, 163, 170, 157, 173, 183, 132, 185, 154, 191, 155, 166, 189, 145, 169, 177, 154, 194, 165, 141, 172, 159, 169, 183, 154, 163, 179, 161, 160, 149, 181, 170, 174, 175, 151, 170, 168, 162, 170, 161, 169, 150, 198, 152, 165, 168, 167, 177, 158, 165, 155, 169, 176, 178, 142, 180, 165, 163, 172, 179, 154, 167, 176, 159, 165, 182, 173, 145, 163, 173, 164, 157, 176, 167, 171, 176, 153, 148, 188, 164, 159, 171, 170, 163, 158, 179, 180, 154, 166, 192, 140, 168, 170, 165, 165, 172, 167, 161, 183, 170, 147, 171, 157, 172, 169, 188, 143, 165, 165, 170, 155, 168, 177, 161, 169, 170, 165, 157, 178, 157, 174, 169, 164, 172, 164, 165, 167, 168, 164, 181, 155, 199, 148, 153, 181, 165, 154, 168, 178, 154, 152, 179, 169, 149, 157, 194, 147, 168, 185, 175, 168, 157, 160, 177, 163, 161, 176, 163, 157, 157, 186, 178, 162, 160, 158, 180, 162, 153, 172, 175, 182, 156, 162, 163, 157, 180, 178, 177, 145, 164, 178, 158, 158, 179, 163, 156, 178, 166, 176, 162, 162, 156, 191, 153, 175, 162, 163, 177, 162, 161, 203, 152, 145, 177, 145, 178, 178, 163, 159, 162, 191, 147, 173, 180, 147, 165, 171, 164, 160, 155, 185, 159, 179, 162, 174, 172, 154, 165, 172, 163, 3185.480032000001 43 | 60, 30000, 7.98, 16676, 16745, 16579, 147, 164, 189, 174, 148, 178, 171, 174, 155, 194, 153, 153, 166, 177, 157, 198, 160, 142, 154, 183, 163, 158, 183, 159, 159, 168, 173, 151, 193, 156, 203, 150, 147, 156, 174, 170, 177, 176, 147, 159, 177, 164, 153, 163, 184, 166, 157, 177, 154, 166, 180, 192, 147, 161, 181, 166, 153, 173, 164, 163, 177, 166, 157, 140, 168, 192, 178, 156, 166, 173, 170, 157, 160, 142, 198, 172, 184, 144, 189, 168, 143, 175, 171, 154, 172, 163, 165, 169, 159, 172, 184, 157, 159, 178, 173, 149, 172, 164, 164, 145, 191, 164, 156, 170, 174, 170, 156, 174, 159, 191, 150, 184, 155, 161, 161, 171, 168, 178, 149, 173, 151, 188, 161, 160, 182, 158, 169, 164, 167, 172, 172, 156, 156, 162, 182, 151, 184, 165, 161, 182, 157, 174, 157, 169, 156, 181, 163, 151, 176, 173, 156, 174, 170, 172, 152, 176, 180, 136, 184, 173, 173, 154, 179, 157, 164, 177, 165, 158, 154, 182, 164, 155, 176, 169, 163, 175, 162, 161, 162, 177, 176, 167, 157, 179, 165, 156, 143, 176, 181, 168, 181, 151, 164, 171, 165, 163, 157, 180, 171, 174, 155, 172, 158, 170, 154, 163, 183, 175, 179, 146, 182, 165, 153, 164, 169, 167, 182, 145, 173, 168, 172, 160, 159, 179, 162, 161, 161, 178, 160, 155, 185, 179, 151, 170, 145, 173, 182, 172, 165, 163, 198, 153, 149, 155, 159, 186, 142, 186, 172, 175, 178, 147, 167, 167, 166, 150, 161, 189, 162, 183, 155, 159, 169, 172, 152, 169, 179, 131, 185, 184, 162, 176, 162, 191, 144, 165, 170, 163, 167, 161, 157, 182, 152, 167, 181, 163, 169, 168, 181, 147, 172, 156, 179, 165, 180, 167, 153, 183, 173, 144, 3104.9011563999998 44 | 70, 35000, 6.53, 16537, 16650, 16813, 161, 161, 178, 185, 172, 143, 153, 160, 187, 167, 193, 140, 154, 190, 156, 171, 167, 162, 159, 181, 160, 168, 165, 167, 161, 162, 177, 178, 178, 144, 166, 149, 185, 164, 171, 165, 161, 155, 184, 191, 169, 140, 173, 147, 180, 158, 171, 171, 157, 168, 175, 151, 171, 178, 163, 180, 157, 157, 160, 183, 153, 173, 174, 182, 158, 160, 183, 156, 161, 169, 149, 182, 166, 173, 161, 170, 161, 169, 166, 167, 167, 165, 171, 164, 181, 155, 164, 161, 161, 178, 172, 167, 161, 170, 155, 175, 156, 181, 163, 160, 160, 180, 167, 167, 166, 145, 188, 167, 169, 166, 165, 186, 152, 162, 162, 199, 139, 168, 165, 167, 166, 187, 147, 141, 174, 185, 153, 164, 183, 159, 164, 177, 170, 155, 175, 139, 174, 187, 158, 161, 181, 164, 165, 171, 168, 174, 158, 160, 157, 183, 165, 180, 155, 182, 165, 153, 156, 174, 170, 167, 167, 166, 144, 158, 198, 164, 162, 174, 173, 175, 152, 168, 161, 171, 169, 168, 163, 162, 187, 151, 169, 160, 171, 157, 147, 196, 160, 172, 168, 152, 161, 187, 163, 151, 186, 178, 162, 160, 152, 197, 151, 176, 163, 161, 175, 149, 176, 181, 166, 153, 151, 175, 174, 188, 161, 151, 136, 171, 193, 154, 174, 172, 165, 180, 155, 169, 158, 173, 187, 155, 158, 185, 133, 182, 149, 199, 152, 187, 145, 168, 169, 195, 136, 180, 153, 167, 175, 156, 169, 147, 171, 182, 155, 149, 196, 187, 143, 170, 182, 158, 160, 146, 162, 192, 167, 169, 164, 155, 158, 187, 186, 146, 168, 143, 197, 160, 157, 180, 163, 162, 163, 175, 169, 178, 153, 185, 145, 170, 159, 188, 153, 174, 183, 143, 156, 154, 190, 172, 157, 171, 2945.1333222000003 45 | 80, 40000, 10.3, 16641, 16639, 16720, 171, 175, 154, 161, 169, 170, 164, 165, 171, 167, 154, 179, 169, 179, 152, 181, 157, 162, 162, 163, 175, 150, 178, 172, 151, 172, 177, 164, 160, 176, 168, 157, 175, 162, 166, 172, 189, 152, 159, 172, 158, 170, 177, 176, 147, 174, 158, 168, 199, 150, 151, 162, 158, 180, 172, 173, 155, 155, 178, 167, 147, 192, 161, 166, 175, 159, 171, 181, 148, 167, 171, 162, 178, 171, 151, 184, 154, 162, 163, 165, 172, 172, 149, 179, 161, 171, 168, 162, 175, 163, 175, 161, 164, 156, 182, 162, 162, 168, 170, 176, 155, 169, 162, 172, 166, 148, 157, 195, 149, 182, 169, 177, 180, 143, 176, 169, 155, 169, 179, 152, 180, 165, 155, 154, 188, 158, 165, 179, 156, 159, 162, 179, 165, 165, 170, 159, 176, 165, 137, 172, 191, 161, 151, 188, 161, 167, 172, 173, 158, 169, 171, 154, 175, 161, 167, 172, 157, 168, 175, 163, 152, 185, 169, 166, 165, 187, 152, 161, 188, 131, 181, 173, 167, 160, 185, 160, 155, 155, 195, 150, 171, 178, 151, 184, 167, 149, 149, 177, 174, 166, 176, 158, 171, 156, 173, 141, 163, 196, 150, 159, 191, 171, 176, 153, 168, 173, 159, 163, 169, 168, 177, 162, 161, 150, 182, 168, 188, 163, 149, 186, 146, 168, 185, 162, 153, 147, 168, 185, 156, 165, 179, 166, 169, 165, 164, 178, 158, 162, 144, 194, 159, 175, 166, 173, 153, 174, 161, 184, 155, 152, 168, 180, 176, 144, 180, 158, 172, 170, 160, 157, 183, 178, 147, 175, 169, 167, 164, 163, 173, 164, 174, 160, 166, 167, 159, 174, 150, 184, 166, 158, 173, 169, 169, 158, 173, 171, 158, 171, 172, 169, 159, 162, 197, 141, 170, 161, 169, 170, 145, 185, 3405.7440263999997 46 | 90, 45000, 8.04, 16675, 16806, 16519, 169, 163, 168, 167, 165, 168, 168, 156, 176, 176, 169, 155, 168, 168, 164, 159, 155, 186, 176, 152, 172, 142, 174, 184, 172, 172, 156, 182, 164, 154, 162, 167, 171, 158, 185, 157, 144, 173, 183, 173, 171, 156, 184, 182, 134, 157, 160, 183, 159, 186, 155, 163, 173, 164, 158, 186, 156, 167, 159, 174, 174, 170, 156, 180, 156, 164, 191, 147, 162, 190, 163, 147, 162, 153, 185, 166, 168, 166, 159, 166, 175, 153, 158, 189, 164, 173, 163, 161, 187, 152, 190, 152, 158, 172, 177, 151, 177, 172, 151, 199, 156, 145, 160, 200, 140, 163, 176, 161, 161, 174, 165, 176, 169, 155, 172, 158, 170, 175, 174, 151, 174, 167, 159, 166, 166, 168, 190, 153, 157, 161, 172, 167, 166, 181, 153, 160, 170, 170, 164, 183, 153, 165, 160, 175, 170, 167, 163, 164, 157, 179, 166, 155, 179, 161, 172, 167, 164, 174, 162, 158, 179, 163, 176, 147, 177, 146, 177, 177, 170, 150, 180, 153, 175, 172, 167, 169, 164, 167, 161, 172, 147, 171, 182, 169, 179, 152, 149, 179, 172, 167, 163, 170, 170, 172, 158, 164, 165, 171, 178, 166, 156, 167, 168, 165, 164, 153, 183, 175, 155, 170, 159, 171, 170, 157, 163, 180, 163, 161, 176, 164, 184, 152, 170, 191, 139, 163, 158, 179, 164, 176, 160, 172, 168, 160, 155, 162, 183, 175, 177, 148, 170, 181, 149, 179, 180, 141, 156, 168, 176, 162, 166, 172, 145, 161, 194, 188, 166, 146, 164, 157, 179, 143, 181, 176, 152, 183, 165, 164, 170, 166, 168, 153, 179, 161, 172, 167, 171, 156, 173, 181, 158, 161, 168, 161, 171, 179, 168, 153, 167, 186, 147, 167, 174, 159, 175, 160, 165, 166, 159, 175, 3175.9600239999986 47 | 100, 50000, 7.56, 16486, 16666, 16848, 144, 165, 191, 145, 180, 175, 196, 129, 175, 151, 186, 163, 170, 166, 164, 172, 172, 156, 189, 142, 169, 171, 171, 158, 148, 189, 163, 159, 172, 169, 130, 188, 182, 168, 157, 175, 166, 162, 172, 165, 176, 159, 147, 156, 197, 171, 158, 171, 160, 170, 170, 167, 175, 158, 158, 168, 174, 183, 143, 174, 167, 156, 177, 166, 157, 177, 189, 159, 152, 147, 172, 181, 185, 160, 155, 162, 135, 203, 157, 175, 168, 171, 153, 176, 153, 175, 172, 167, 167, 166, 163, 154, 183, 155, 176, 169, 173, 154, 173, 164, 170, 166, 172, 157, 171, 149, 162, 189, 171, 174, 155, 163, 162, 175, 153, 179, 168, 145, 178, 177, 171, 170, 159, 140, 183, 177, 189, 143, 168, 144, 190, 166, 156, 174, 170, 191, 171, 138, 158, 138, 204, 168, 167, 165, 150, 177, 173, 174, 145, 181, 157, 172, 171, 173, 158, 169, 159, 189, 152, 181, 154, 165, 171, 151, 178, 151, 162, 187, 173, 154, 173, 193, 169, 138, 177, 153, 170, 181, 167, 152, 180, 187, 133, 178, 171, 151, 158, 157, 185, 156, 186, 158, 138, 180, 182, 179, 161, 160, 155, 168, 177, 169, 176, 155, 173, 150, 177, 178, 162, 160, 161, 178, 161, 158, 175, 167, 153, 161, 186, 135, 179, 186, 162, 167, 171, 148, 171, 181, 194, 166, 140, 173, 162, 165, 174, 150, 176, 146, 188, 166, 176, 172, 152, 162, 163, 175, 191, 168, 141, 144, 184, 172, 163, 184, 153, 167, 172, 161, 143, 178, 179, 180, 152, 168, 150, 179, 171, 178, 153, 169, 160, 170, 170, 169, 178, 153, 172, 148, 180, 179, 156, 165, 153, 174, 173, 185, 167, 148, 158, 167, 175, 175, 171, 154, 158, 167, 175, 166, 181, 153, 3228.1533041999983 48 | 49 | test 50 | "1": 7.83, 51 | "2": 8.71, 52 | "3": 7.21, 53 | "4": 9.76, 54 | "5": 10.4, 55 | "6": 9.55, 56 | "7": 10.2, 57 | "8": 9.03, 58 | "9": 9.02, 59 | "10": 8.0 -------------------------------------------------------------------------------- /slime_environments/environments/SlimeEnvSingleAgent.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import sys 4 | from typing import Optional 5 | from itertools import product 6 | 7 | import gym 8 | import numpy as np 9 | import pygame 10 | from gym import spaces 11 | from gym.spaces import MultiBinary 12 | 13 | BLACK = (0, 0, 0) 14 | BLUE = (0, 0, 255) 15 | WHITE = (255, 255, 255) 16 | RED = (190, 0, 0) 17 | GREEN = (0, 190, 0) 18 | 19 | 20 | class BooleanSpace(gym.Space): 21 | @property 22 | def is_np_flattenable(self): 23 | return True 24 | 25 | def __init__(self, size=None): 26 | """ 27 | A space of boolean values 28 | :param size: how many boolean values the space is made of 29 | """ 30 | assert isinstance(size, int) and size > 0 31 | self.size = size 32 | self._values = list(product([True, False], repeat=self.size)) 33 | gym.Space.__init__(self, (2,), bool) 34 | 35 | def contains(self, x): 36 | return x in self._values 37 | 38 | def sample(self): 39 | return random.choice(self._values) 40 | # return self.values 41 | 42 | 43 | class Slime(gym.Env): 44 | metadata = {"render_modes": ["human"], "render_fps": 30} 45 | 46 | def __init__(self, 47 | render_mode: Optional[str] = None, 48 | **kwargs): 49 | """ 50 | :param population: Controls the number of non-learning slimes (= green turtles) 51 | :param sniff_threshold: Controls how sensitive slimes are to pheromone (higher values make slimes less 52 | sensitive to pheromone)—unclear effect on learning, could be negligible 53 | :param diffuse_area Controls the diffusion radius 54 | :param diffuse_mode Controls in which order patches with pheromone to diffuse are visited: 55 | 'simple' = Python-dependant (dict keys "ordering") 56 | 'rng' = random visiting 57 | 'sorted' = diffuse first the patches with more pheromone 58 | 'filter' = do not re-diffuse patches receiving pheromone due to diffusion 59 | 'cascade' = step-by-step, incremental (recursive) diffusion within 'diffuse_area' 60 | :param follow_mode Controls how non-learning agents follow pheromone: 61 | 'det' = follow greatest pheromone 62 | 'prob' = follow greatest pheromone probabilistically (pheromone strength as weight) 63 | :param smell_area: Controls the radius of the square area sorrounding the turtle whithin which it smells pheromone 64 | :param lay_area: Controls the radius of the square area sorrounding the turtle where pheromone is laid 65 | :param lay_amount: Controls how much pheromone is laid 66 | :param evaporation: Controls how much pheromone evaporates at each step 67 | :param cluster_threshold: Controls the minimum number of slimes needed to consider an aggregate within 68 | cluster-radius a cluster (the higher the more difficult to consider an aggregate a 69 | cluster)—the higher the more difficult to obtain a positive reward for being within 70 | a cluster for learning slimes 71 | :param cluster_radius: Controls the range considered by slimes to count other slimes within a cluster (the 72 | higher the easier to form clusters, as turtles far apart are still counted together) 73 | —the higher the easier it is to obtain a positive reward for being within a cluster 74 | for learning slimes 75 | :param rew: Base reward for being in a cluster 76 | :param penalty: Base penalty for not being in a cluster 77 | :param episode_ticks: Number of ticks for episode termination 78 | :param W: Window width in # patches 79 | :param H: Window height in # patches 80 | :param PATCH_SIZE: Patch size in pixels 81 | :param TURTLE_SIZE: Turtle size in pixels 82 | :param FPS: Rendering FPS 83 | :param SHADE_STRENGTH: Strength of color shading for pheromone rendering (higher -> brighter color) 84 | :param SHOW_CHEM_TEXT: Whether to show pheromone amount on patches (when >= sniff-threshold) 85 | :param CLUSTER_FONT_SIZE: Font size of cluster number (for overlapping agents) 86 | :param CHEMICAL_FONT_SIZE: Font size of phermone amount (if SHOW_CHEM_TEXT is true) 87 | :param render_mode: 88 | """ 89 | assert render_mode is None or render_mode in self.metadata["render_modes"] 90 | 91 | self.population = kwargs['population'] 92 | self.sniff_threshold = kwargs['sniff_threshold'] 93 | self.diffuse_area = kwargs['diffuse_area'] 94 | self.smell_area = kwargs['smell_area'] 95 | self.lay_area = kwargs['lay_area'] 96 | self.lay_amount = kwargs['lay_amount'] 97 | self.evaporation = kwargs['evaporation'] 98 | self.diffuse_mode = kwargs['diffuse_mode'] 99 | self.follow_mode = kwargs['follow_mode'] 100 | self.cluster_threshold = kwargs['cluster_threshold'] 101 | self.cluster_radius = kwargs['cluster_radius'] 102 | self.reward = kwargs['rew'] 103 | self.penalty = kwargs['penalty'] 104 | self.episode_ticks = kwargs['episode_ticks'] 105 | 106 | self.W = kwargs['W'] 107 | self.H = kwargs['H'] 108 | self.patch_size = kwargs['PATCH_SIZE'] 109 | self.turtle_size = kwargs['TURTLE_SIZE'] 110 | self.fps = kwargs['FPS'] 111 | self.shade_strength = kwargs['SHADE_STRENGTH'] 112 | self.show_chem_text = kwargs['SHOW_CHEM_TEXT'] 113 | self.cluster_font_size = kwargs['CLUSTER_FONT_SIZE'] 114 | self.chemical_font_size = kwargs['CHEMICAL_FONT_SIZE'] 115 | 116 | self.coords = [] 117 | self.offset = self.patch_size // 2 118 | self.W_pixels = self.W * self.patch_size 119 | self.H_pixels = self.H * self.patch_size 120 | for x in range(self.offset, (self.W_pixels - self.offset) + 1, self.patch_size): 121 | for y in range(self.offset, (self.H_pixels - self.offset) + 1, self.patch_size): 122 | self.coords.append((x, y)) # "centre" of the patch or turtle (also ID of the patch) 123 | 124 | n_coords = len(self.coords) 125 | # create learner turtle 126 | self.learner = {"pos": self.coords[np.random.randint(n_coords)]} 127 | # create NON learner turtles 128 | self.turtles = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population)} 129 | 130 | # patches-own [chemical] - amount of pheromone in each patch 131 | self.patches = {self.coords[i]: {"id": i, 132 | 'chemical': 0.0, 133 | 'turtles': []} for i in range(n_coords)} 134 | self.patches[self.learner['pos']]['turtles'].append(-1) # DOC id of learner turtle 135 | for t in self.turtles: 136 | self.patches[self.turtles[t]['pos']]['turtles'].append(t) 137 | 138 | # pre-compute relevant structures to speed-up computation during rendering steps 139 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed smell area for each patch, including itself 140 | self.smell_patches = {} 141 | self._find_neighbours(self.smell_patches, self.smell_area) 142 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed lay area for each patch, including itself 143 | self.lay_patches = {} 144 | self._find_neighbours(self.lay_patches, self.lay_area) 145 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed diffusion area for each patch, including itself 146 | self.diffuse_patches = {} 147 | if self.diffuse_mode == 'cascade': 148 | self._find_neighbours_cascade(self.diffuse_patches, self.diffuse_area) 149 | else: 150 | self._find_neighbours(self.diffuse_patches, self.diffuse_area) 151 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed cluster-check for each patch, including itself 152 | self.cluster_patches = {} 153 | self._find_neighbours(self.cluster_patches, self.cluster_radius) 154 | 155 | self.action_space = spaces.Discrete(3) # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone TODO as dict 156 | self.observation_space = MultiBinary(2) # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch 157 | self._action_to_name = {0: "random-walk", 1: "drop-chemical", 2: "move-toward-chemical"} 158 | 159 | self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels)) 160 | self.clock = pygame.time.Clock() 161 | pygame.font.init() 162 | self.cluster_font = pygame.font.SysFont("arial", self.cluster_font_size) 163 | self.chemical_font = pygame.font.SysFont("arial", self.chemical_font_size) 164 | 165 | self.rewards = [] 166 | self.cluster_ticks = 0 167 | 168 | self.first_gui = True 169 | 170 | def _find_neighbours_cascade(self, neighbours: dict, area: int): 171 | """ 172 | For each patch, find neighbouring patches within square radius 'area', 1 step at a time 173 | (visiting first 1-hop patches, then 2-hops patches, and so on) 174 | 175 | :param neighbours: empty dictionary to fill 176 | (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...}) 177 | :param area: integer representing the number of patches to consider in the 8 directions around each patch 178 | :return: None (1st argument modified as side effect) 179 | """ 180 | for p in self.patches: 181 | neighbours[p] = [] 182 | for ring in range(area): 183 | for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, self.patch_size): 184 | for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, self.patch_size): 185 | #x, y = self._wrap(x, y) 186 | if (x, y) not in neighbours[p]: 187 | neighbours[p].append((x, y)) 188 | for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, -self.patch_size): 189 | for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, -self.patch_size): 190 | #x, y = self._wrap(x, y) 191 | if (x, y) not in neighbours[p]: 192 | neighbours[p].append((x, y)) 193 | for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, self.patch_size): 194 | for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, -self.patch_size): 195 | #x, y = self._wrap(x, y) 196 | if (x, y) not in neighbours[p]: 197 | neighbours[p].append((x, y)) 198 | for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, -self.patch_size): 199 | for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, self.patch_size): 200 | #x, y = self._wrap(x, y) 201 | if (x, y) not in neighbours[p]: 202 | neighbours[p].append((x, y)) 203 | neighbours[p] = [self._wrap(x, y) for (x, y) in neighbours[p]] 204 | #neighbours[p] = list(set(neighbours[p])) 205 | 206 | def _find_neighbours(self, neighbours: dict, area: int): 207 | """ 208 | For each patch, find neighbouring patches within square radius 'area' 209 | 210 | :param neighbours: empty dictionary to fill 211 | (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...}) 212 | :param area: integer representing the number of patches to consider in the 8 directions around each patch 213 | :return: None (1st argument modified as side effect) 214 | """ 215 | for p in self.patches: 216 | neighbours[p] = [] 217 | for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size): 218 | for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size): 219 | x, y = self._wrap(x, y) 220 | neighbours[p].append((x, y)) 221 | for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size): 222 | for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size): 223 | x, y = self._wrap(x, y) 224 | neighbours[p].append((x, y)) 225 | for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size): 226 | for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size): 227 | x, y = self._wrap(x, y) 228 | neighbours[p].append((x, y)) 229 | for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size): 230 | for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size): 231 | x, y = self._wrap(x, y) 232 | neighbours[p].append((x, y)) 233 | neighbours[p] = list(set(neighbours[p])) 234 | 235 | def _wrap(self, x: int, y: int): 236 | """ 237 | Wrap x,y coordinates around the torus 238 | 239 | :param x: the x coordinate to wrap 240 | :param y: the y coordinate to wrap 241 | :return: the wrapped x, y 242 | """ 243 | if x < 0: 244 | x = self.W_pixels + x 245 | elif x > self.W_pixels: 246 | x = x - self.W_pixels 247 | if y < 0: 248 | y = self.H_pixels + y 249 | elif y > self.H_pixels: 250 | y = y - self.H_pixels 251 | return x, y 252 | 253 | def step(self, action: int): 254 | """ 255 | OpenAI Gym env step function. Actions are: 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 256 | 257 | :param action: 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 258 | :return: current observation, current reward, episode done, info 259 | """ 260 | 261 | # non learners act 262 | for turtle in self.turtles: 263 | pos = self.turtles[turtle]['pos'] 264 | t = self.turtles[turtle] 265 | max_pheromone, max_coords = self._find_max_pheromone(pos) 266 | 267 | if max_pheromone >= self.sniff_threshold: 268 | self.follow_pheromone(max_coords, t, turtle) 269 | else: 270 | self.walk(t, turtle) 271 | 272 | self.lay_pheromone(self.turtles[turtle]['pos'], self.lay_amount) 273 | 274 | # learner acts 275 | if action == 0: # DOC walk 276 | self.walk(self.learner, -1) 277 | elif action == 1: # DOC lay_pheromone 278 | self.lay_pheromone(self.learner['pos'], self.lay_amount) 279 | elif action == 2: # DOC follow_pheromone 280 | max_pheromone, max_coords = self._find_max_pheromone(self.learner['pos']) 281 | if max_pheromone >= self.sniff_threshold: 282 | self.follow_pheromone(max_coords, self.learner, -1) 283 | else: 284 | self.walk(self.learner, -1) 285 | 286 | self._diffuse() 287 | self._evaporate() 288 | 289 | cur_reward = self.reward_cluster_and_time_punish_time() 290 | 291 | return self._get_obs(), cur_reward, False, {} # DOC Gym v26 has additional 'truncated' boolean 292 | 293 | def lay_pheromone(self, pos, amount: int): 294 | """ 295 | Lay 'amount' pheromone in square 'area' centred in 'pos' 296 | 297 | :param pos: the x,y position taken as centre of pheromone deposit area 298 | :param amount: the amount of pheromone to deposit 299 | :return: None (environment properties are changed as side effect) 300 | """ 301 | for p in self.lay_patches[pos]: 302 | self.patches[p]['chemical'] += amount 303 | 304 | def _diffuse(self): 305 | """ 306 | Diffuses pheromone from each patch to nearby patches controlled through self.diffuse_area patches in a way 307 | controlled through self.diffuse_mode: 308 | 'simple' = Python-dependant (dict keys "ordering") 309 | 'rng' = random visiting 310 | 'sorted' = diffuse first the patches with more pheromone 311 | 'filter' = do not re-diffuse patches receiving pheromone due to diffusion 312 | 313 | :return: None (environment properties are changed as side effect) 314 | """ 315 | n_size = len(self.diffuse_patches[list(self.patches.keys())[0]]) # same for every patch 316 | patch_keys = list(self.patches.keys()) 317 | if self.diffuse_mode == 'rng': 318 | random.shuffle(patch_keys) 319 | elif self.diffuse_mode == 'sorted': 320 | patch_list = list(self.patches.items()) 321 | patch_list = sorted(patch_list, key=lambda t: t[1]['chemical'], reverse=True) 322 | patch_keys = [t[0] for t in patch_list] 323 | elif self.diffuse_mode == 'filter': 324 | patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0] 325 | elif self.diffuse_mode == 'rng-filter': 326 | patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0] 327 | random.shuffle(patch_keys) 328 | for patch in patch_keys: 329 | p = self.patches[patch]['chemical'] 330 | ratio = p / n_size 331 | if p > 0: 332 | diffuse_keys = self.diffuse_patches[patch][:] 333 | for n in diffuse_keys: 334 | self.patches[n]['chemical'] += ratio 335 | self.patches[patch]['chemical'] = ratio 336 | 337 | def _evaporate(self): 338 | """ 339 | Evaporates pheromone from each patch according to param self.evaporation 340 | 341 | :return: None (environment properties are changed as side effect) 342 | """ 343 | for patch in self.patches.keys(): 344 | if self.patches[patch]['chemical'] > 0: 345 | self.patches[patch]['chemical'] *= self.evaporation 346 | 347 | def walk(self, turtle: dict[str: tuple[int, int]], _id: int): 348 | """ 349 | Action 0: move in random direction (8 sorrounding cells) 350 | 351 | :param _id: the id of the turtle to move 352 | :param turtle: the turtle to move (dict mapping 'pos' to position as x,y) 353 | :return: None (pos is updated after movement as side-effect) 354 | """ 355 | choice = [self.patch_size, -self.patch_size, 0] 356 | x, y = turtle['pos'] 357 | self.patches[turtle['pos']]['turtles'].remove(_id) 358 | x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice) 359 | x2, y2 = self._wrap(x2, y2) 360 | turtle['pos'] = (x2, y2) 361 | self.patches[turtle['pos']]['turtles'].append(_id) 362 | 363 | def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int): 364 | """ 365 | Action 2: move turtle towards greatest pheromone found 366 | 367 | :param _id: the id of the turtle to move 368 | :param ph_coords: the position where max pheromone has been sensed 369 | :param turtle: the turtle looking for pheromone 370 | :return: None (pos is updated after movement as side-effect) 371 | """ 372 | x, y = turtle['pos'] 373 | self.patches[turtle['pos']]['turtles'].remove(_id) 374 | if ph_coords[0] > x and ph_coords[1] > y: # top right 375 | x += self.patch_size 376 | y += self.patch_size 377 | elif ph_coords[0] < x and ph_coords[1] < y: # bottom left 378 | x -= self.patch_size 379 | y -= self.patch_size 380 | elif ph_coords[0] > x and ph_coords[1] < y: # bottom right 381 | x += self.patch_size 382 | y -= self.patch_size 383 | elif ph_coords[0] < x and ph_coords[1] > y: # top left 384 | x -= self.patch_size 385 | y += self.patch_size 386 | elif ph_coords[0] == x and ph_coords[1] < y: # below me 387 | y -= self.patch_size 388 | elif ph_coords[0] == x and ph_coords[1] > y: # above me 389 | y += self.patch_size 390 | elif ph_coords[0] > x and ph_coords[1] == y: # right 391 | x += self.patch_size 392 | elif ph_coords[0] < x and ph_coords[1] == y: # left 393 | x -= self.patch_size 394 | else: # my patch 395 | pass 396 | x, y = self._wrap(x, y) 397 | turtle['pos'] = (x, y) 398 | self.patches[turtle['pos']]['turtles'].append(_id) 399 | 400 | def _find_max_pheromone(self, pos: tuple[int, int]): 401 | """ 402 | Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'. 403 | Following pheromone modeis controlled by param self.follow_mode: 404 | 'det' = follow greatest pheromone 405 | 'prob' = follow greatest pheromone probabilistically (pheromone strength as weight) 406 | 407 | :param pos: the x,y position of the turtle looking for pheromone 408 | :return: the maximum pheromone level found and its x,y position 409 | """ 410 | if self.follow_mode == "prob": 411 | population = [k for k in self.smell_patches[pos]] 412 | weights = [self.patches[k]['chemical'] for k in self.smell_patches[pos]] 413 | if all([w == 0 for w in weights]): 414 | winner = population[np.random.choice(len(population))] 415 | else: 416 | winner = random.choices(population, weights=weights, k=1)[0] 417 | max_ph = self.patches[winner]['chemical'] 418 | else: 419 | max_ph = -1 420 | max_pos = [pos] 421 | for p in self.smell_patches[pos]: 422 | chem = self.patches[p]['chemical'] 423 | if chem > max_ph: 424 | max_ph = chem 425 | max_pos = [p] 426 | elif chem == max_ph: 427 | max_pos.append(p) 428 | winner = max_pos[np.random.choice(len(max_pos))] 429 | 430 | return max_ph, winner 431 | 432 | def _compute_cluster(self): 433 | """ 434 | Checks whether the learner turtle is within a cluster, given 'cluster_radius' and 'cluster_threshold' 435 | 436 | :return: a boolean 437 | """ 438 | cluster = 1 439 | for p in self.cluster_patches[self.learner['pos']]: 440 | cluster += len(self.patches[p]['turtles']) 441 | 442 | return cluster 443 | 444 | def _check_chemical(self): 445 | """ 446 | Checks whether there is pheromone on the patch where the learner turtle is 447 | 448 | :return: a boolean 449 | """ 450 | return self.patches[self.learner['pos']][ 451 | 'chemical'] >= self.sniff_threshold 452 | 453 | def reward_cluster_punish_time(self): 454 | """ 455 | Reward is (positve) proportional to cluster size (quadratic) and (negative) proportional to time spent outside 456 | clusters 457 | 458 | :return: the reward 459 | """ 460 | cluster = self._compute_cluster() 461 | if cluster >= self.cluster_threshold: 462 | self.cluster_ticks += 1 463 | 464 | cur_reward = ((cluster ^ 2) / self.cluster_threshold) * self.reward + ( 465 | ((self.episode_ticks - self.cluster_ticks) / self.episode_ticks) * self.penalty) 466 | 467 | self.rewards.append(cur_reward) 468 | return cur_reward 469 | 470 | def reward_cluster_and_time_punish_time(self): 471 | """ 472 | 473 | :return: 474 | """ 475 | cluster = self._compute_cluster() 476 | if cluster >= self.cluster_threshold: 477 | self.cluster_ticks += 1 478 | 479 | cur_reward = (self.cluster_ticks / self.episode_ticks) * self.reward + \ 480 | (cluster / self.cluster_threshold) * (self.reward ** 2) + \ 481 | (((self.episode_ticks - self.cluster_ticks) / self.episode_ticks) * self.penalty) 482 | 483 | self.rewards.append(cur_reward) 484 | return cur_reward 485 | 486 | def reset(self): 487 | # super().reset() 488 | # empty stuff 489 | self.rewards = [] 490 | self.cluster_ticks = 0 491 | 492 | # re-position learner turtle 493 | self.patches[self.learner['pos']]['turtles'].remove(-1) 494 | self.learner['pos'] = self.coords[np.random.randint(len(self.coords))] 495 | self.patches[self.learner['pos']]['turtles'].append(-1) # DOC id of learner turtle 496 | # re-position NON learner turtles 497 | for t in self.turtles: 498 | self.patches[self.turtles[t]['pos']]['turtles'].remove(t) 499 | self.turtles[t]['pos'] = self.coords[np.random.randint(len(self.coords))] 500 | self.patches[self.turtles[t]['pos']]['turtles'].append(t) 501 | # patches-own [chemical] - amount of pheromone in the patch 502 | for p in self.patches: 503 | self.patches[p]['chemical'] = 0.0 504 | 505 | return self._get_obs() 506 | 507 | def render(self, mode="human",**kwargs): 508 | for event in pygame.event.get(): 509 | if event.type == pygame.QUIT: # window closed -> program quits 510 | pygame.quit() 511 | 512 | if self.first_gui: 513 | self.first_gui = False 514 | pygame.init() 515 | pygame.display.set_caption("SLIME") 516 | 517 | self.screen.fill(BLACK) 518 | # draw patches 519 | for p in self.patches: 520 | chem = round(self.patches[p]['chemical']) * self.shade_strength 521 | pygame.draw.rect(self.screen, (0, chem if chem <= 255 else 255, 0), 522 | pygame.Rect(p[0] - self.offset, p[1] - self.offset, self.patch_size, self.patch_size)) 523 | if self.show_chem_text and (not sys.gettrace() is None or 524 | self.patches[p]['chemical'] >= self.sniff_threshold): # if debugging show text everywhere, even 0 525 | text = self.chemical_font.render(str(round(self.patches[p]['chemical'], 1)), True, GREEN) 526 | self.screen.blit(text, text.get_rect(center=p)) 527 | 528 | # draw learner 529 | pygame.draw.circle(self.screen, RED, (self.learner['pos'][0], self.learner['pos'][1]), 530 | self.turtle_size // 2) 531 | # draw NON learners 532 | for turtle in self.turtles.values(): 533 | pygame.draw.circle(self.screen, BLUE, (turtle['pos'][0], turtle['pos'][1]), self.turtle_size // 2) 534 | 535 | for p in self.patches: 536 | if len(self.patches[p]['turtles']) > 1: 537 | text = self.cluster_font.render(str(len(self.patches[p]['turtles'])), True, 538 | RED if -1 in self.patches[p]['turtles'] else WHITE) 539 | self.screen.blit(text, text.get_rect(center=p)) 540 | 541 | self.clock.tick(self.fps) 542 | pygame.display.flip() 543 | 544 | def close(self): 545 | if self.screen is not None: 546 | pygame.display.quit() 547 | pygame.quit() 548 | 549 | def _get_obs(self): 550 | return np.array([self._compute_cluster() >= self.cluster_threshold, self._check_chemical()]) 551 | 552 | 553 | if __name__ == "__main__": 554 | PARAMS_FILE = "../agents/single-agent-params.json" 555 | EPISODES = 5 556 | LOG_EVERY = 1 557 | 558 | with open(PARAMS_FILE) as f: 559 | params = json.load(f) 560 | env = Slime(render_mode="human", **params) 561 | 562 | for ep in range(1, EPISODES + 1): 563 | env.reset() 564 | print( 565 | f"-------------------------------------------\nEPISODE: {ep}\n-------------------------------------------") 566 | for tick in range(params['episode_ticks']): 567 | observation, reward, _, _, _ = env.step(env.action_space.sample()) 568 | if tick % LOG_EVERY == 0: 569 | print(f"{tick}: {observation}, {reward}") 570 | env.render() 571 | env.close() -------------------------------------------------------------------------------- /slime_environments/environments/SlimeEnvMultiAgent.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import sys 4 | from typing import Optional 5 | 6 | import gym 7 | import numpy as np 8 | import pygame 9 | from gym import spaces 10 | from pettingzoo import AECEnv 11 | from pettingzoo.utils import agent_selector 12 | from pettingzoo.utils.env import ObsType 13 | 14 | BLACK = (0, 0, 0) 15 | BLUE = (0, 0, 255) 16 | WHITE = (255, 255, 255) 17 | RED = (190, 0, 0) 18 | GREEN = (0, 190, 0) 19 | 20 | 21 | class BooleanSpace(gym.Space): 22 | def __init__(self, size=None): 23 | """ 24 | A space of boolean values 25 | :param size: how many boolean values the space is made of 26 | """ 27 | assert isinstance(size, int) and size > 0 28 | self.size = size 29 | self.values = [False for _ in range(self.size)] 30 | gym.Space.__init__(self, (), bool) 31 | 32 | def contains(self, x): 33 | return x in self.values 34 | 35 | def sample(self): 36 | return [random.choice([True, False]) for _ in range(self.size)] 37 | # return self.values 38 | 39 | def observe(self): 40 | """ 41 | Get the current observation 42 | :return: the current observation 43 | """ 44 | return self.values 45 | 46 | def change(self, p, value): 47 | """ 48 | Set a specific boolean value for the current observation 49 | :param p: which boolean values to change (position index) 50 | :param value: the boolean value to set 51 | :return: None 52 | """ 53 | self.values[p] = value 54 | 55 | def change_all(self, values): 56 | """ 57 | Set all the boolean values for the current observation 58 | :param values: the boolean values to set 59 | :return: None 60 | """ 61 | self.values = values 62 | 63 | 64 | class Slime(AECEnv): 65 | def seed(self, seed: Optional[int] = None) -> None: 66 | pass 67 | 68 | def observe(self, agent: str) -> ObsType: 69 | pass 70 | 71 | def state(self) -> np.ndarray: 72 | pass 73 | 74 | metadata = {"render_modes": ["human", "server"]} 75 | 76 | def __init__(self, 77 | render_mode: Optional[str] = None, 78 | **kwargs): 79 | """ 80 | :param population: Controls the number of non-learning slimes (= green turtles) 81 | :param sniff_threshold: Controls how sensitive slimes are to pheromone (higher values make slimes less 82 | sensitive to pheromone)—unclear effect on learning, could be negligible 83 | :param diffuse_area Controls the diffusion radius 84 | :param diffuse_mode Controls in which order patches with pheromone to diffuse are visited: 85 | 'simple' = Python-dependant (dict keys "ordering") 86 | 'rng' = random visiting 87 | 'sorted' = diffuse first the patches with more pheromone 88 | 'filter' = do not re-diffuse patches receiving pheromone due to diffusion 89 | 'cascade' = step-by-step diffusion within 'diffuse_area' 90 | :param follow_mode Controls how non-learning agents follow pheromone: 91 | 'det' = follow greatest pheromone 92 | 'prob' = follow greatest pheromone probabilistically (pheromone strength as weight) 93 | :param smell_area: Controls the radius of the square area sorrounding the turtle whithin which it smells pheromone 94 | :param lay_area: Controls the radius of the square area sorrounding the turtle where pheromone is laid 95 | :param lay_amount: Controls how much pheromone is laid 96 | :param evaporation: Controls how much pheromone evaporates at each step 97 | :param cluster_threshold: Controls the minimum number of slimes needed to consider an aggregate within 98 | cluster-radius a cluster (the higher the more difficult to consider an aggregate a 99 | cluster)—the higher the more difficult to obtain a positive reward for being within 100 | a cluster for learning slimes 101 | :param cluster_radius: Controls the range considered by slimes to count other slimes within a cluster (the 102 | higher the easier to form clusters, as turtles far apart are still counted together) 103 | —the higher the easier it is to obtain a positive reward for being within a cluster 104 | for learning slimes 105 | :param rew: Base reward for being in a cluster 106 | :param penalty: Base penalty for not being in a cluster 107 | :param episode_ticks: Number of ticks for episode termination 108 | :param W: Window width in # patches 109 | :param H: Window height in # patches 110 | :param PATCH_SIZE: Patch size in pixels 111 | :param TURTLE_SIZE: Turtle size in pixels 112 | :param FPS: Rendering FPS 113 | :param SHADE_STRENGTH: Strength of color shading for pheromone rendering (higher -> brighter color) 114 | :param SHOW_CHEM_TEXT: Whether to show pheromone amount on patches (when >= sniff-threshold) 115 | :param CLUSTER_FONT_SIZE: Font size of cluster number (for overlapping agents) 116 | :param CHEMICAL_FONT_SIZE: Font size of phermone amount (if SHOW_CHEM_TEXT is true) 117 | :param render_mode: 118 | """ 119 | assert render_mode is None or render_mode in self.metadata["render_modes"] 120 | 121 | self.population = kwargs['population'] 122 | self.learner_population = kwargs['learner_population'] 123 | self.sniff_threshold = kwargs['sniff_threshold'] 124 | self.diffuse_area = kwargs['diffuse_area'] 125 | self.smell_area = kwargs['smell_area'] 126 | self.lay_area = kwargs['lay_area'] 127 | self.lay_amount = kwargs['lay_amount'] 128 | self.evaporation = kwargs['evaporation'] 129 | self.diffuse_mode = kwargs['diffuse_mode'] 130 | self.follow_mode = kwargs['follow_mode'] 131 | self.cluster_threshold = kwargs['cluster_threshold'] 132 | self.cluster_radius = kwargs['cluster_radius'] 133 | self.reward = kwargs['rew'] 134 | self.penalty = kwargs['penalty'] 135 | self.episode_ticks = kwargs['episode_ticks'] 136 | 137 | self.W = kwargs['W'] 138 | self.H = kwargs['H'] 139 | self.patch_size = kwargs['PATCH_SIZE'] 140 | self.turtle_size = kwargs['TURTLE_SIZE'] 141 | self.fps = kwargs['FPS'] 142 | self.shade_strength = kwargs['SHADE_STRENGTH'] 143 | self.show_chem_text = kwargs['SHOW_CHEM_TEXT'] 144 | self.cluster_font_size = kwargs['CLUSTER_FONT_SIZE'] 145 | self.chemical_font_size = kwargs['CHEMICAL_FONT_SIZE'] 146 | self.gui = kwargs["gui"] 147 | 148 | self.coords = [] 149 | self.offset = self.patch_size // 2 150 | self.W_pixels = self.W * self.patch_size 151 | self.H_pixels = self.H * self.patch_size 152 | for x in range(self.offset, (self.W_pixels - self.offset) + 1, self.patch_size): 153 | for y in range(self.offset, (self.H_pixels - self.offset) + 1, self.patch_size): 154 | self.coords.append((x, y)) # "centre" of the patch or turtle (also ID of the patch) 155 | 156 | pop_tot = self.population + self.learner_population 157 | self.agents = [i for i in range(self.population, pop_tot)] # DOC learning agents IDs 158 | self._agent_selector = agent_selector(self.agents) 159 | self.agent = self._agent_selector.next() 160 | 161 | n_coords = len(self.coords) 162 | # create learners turtle 163 | self.learners = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population, pop_tot)} 164 | # create NON learner turtles 165 | self.turtles = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population)} 166 | 167 | # patches-own [chemical] - amount of pheromone in each patch 168 | self.patches = {self.coords[i]: {"id": i, 169 | 'chemical': 0.0, 170 | 'turtles': []} for i in range(n_coords)} 171 | for l in self.learners: 172 | self.patches[self.learners[l]['pos']]['turtles'].append(l) # DOC id of learner turtles 173 | for t in self.turtles: 174 | self.patches[self.turtles[t]['pos']]['turtles'].append(t) 175 | 176 | # pre-compute relevant structures to speed-up computation during rendering steps 177 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed smell area for each patch, including itself 178 | self.smell_patches = {} 179 | self._find_neighbours(self.smell_patches, self.smell_area) 180 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed lay area for each patch, including itself 181 | self.lay_patches = {} 182 | self._find_neighbours(self.lay_patches, self.lay_area) 183 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed diffusion area for each patch, including itself 184 | self.diffuse_patches = {} 185 | if self.diffuse_mode == 'cascade': 186 | self._find_neighbours_cascade(self.diffuse_patches, self.diffuse_area) 187 | else: 188 | self._find_neighbours(self.diffuse_patches, self.diffuse_area) 189 | # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed cluster-check for each patch, including itself 190 | self.cluster_patches = {} 191 | self._find_neighbours(self.cluster_patches, self.cluster_radius) 192 | 193 | self.action_spaces = {a: spaces.Discrete(3) for a in 194 | self.agents} # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone 195 | self.observation_space = BooleanSpace( 196 | size=2) # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch 197 | self.obs_dict = {a: BooleanSpace(size=2) for a in self.agents} 198 | 199 | if self.gui: 200 | self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels)) 201 | self.clock = pygame.time.Clock() 202 | pygame.font.init() 203 | self.cluster_font = pygame.font.SysFont("arial", self.cluster_font_size) 204 | self.chemical_font = pygame.font.SysFont("arial", self.chemical_font_size) 205 | self.first_gui = True 206 | 207 | self.rewards = {i: [] for i in range(self.population, pop_tot)} 208 | self.cluster_ticks = {i: 0 for i in range(self.population, pop_tot)} 209 | 210 | def _find_neighbours_cascade(self, neighbours: dict, area: int): 211 | """ 212 | For each patch, find neighbouring patches within square radius 'area', 1 step at a time 213 | (visiting first 1-hop patches, then 2-hops patches, and so on) 214 | 215 | :param neighbours: empty dictionary to fill 216 | (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...}) 217 | :param area: integer representing the number of patches to consider in the 8 directions around each patch 218 | :return: None (1st argument modified as side effect) 219 | """ 220 | for p in self.patches: 221 | neighbours[p] = [] 222 | for ring in range(area): 223 | for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, 224 | self.patch_size): 225 | for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, 226 | self.patch_size): 227 | if (x, y) not in neighbours[p]: 228 | neighbours[p].append((x, y)) 229 | for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, 230 | -self.patch_size): 231 | for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, 232 | -self.patch_size): 233 | if (x, y) not in neighbours[p]: 234 | neighbours[p].append((x, y)) 235 | for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, 236 | self.patch_size): 237 | for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, 238 | -self.patch_size): 239 | if (x, y) not in neighbours[p]: 240 | neighbours[p].append((x, y)) 241 | for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, 242 | -self.patch_size): 243 | for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, 244 | self.patch_size): 245 | if (x, y) not in neighbours[p]: 246 | neighbours[p].append((x, y)) 247 | neighbours[p] = [self._wrap(x, y) for (x, y) in neighbours[p]] 248 | # neighbours[p] = list(set(neighbours[p])) 249 | 250 | def _find_neighbours(self, neighbours: dict, area: int): 251 | """ 252 | For each patch, find neighbouring patches within square radius 'area' 253 | 254 | :param neighbours: empty dictionary to fill 255 | (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...}) 256 | :param area: integer representing the number of patches to consider in the 8 directions around each patch 257 | :return: None (1st argument modified as side effect) 258 | """ 259 | for p in self.patches: 260 | neighbours[p] = [] 261 | for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size): 262 | for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size): 263 | x, y = self._wrap(x, y) 264 | neighbours[p].append((x, y)) 265 | for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size): 266 | for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size): 267 | x, y = self._wrap(x, y) 268 | neighbours[p].append((x, y)) 269 | for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size): 270 | for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size): 271 | x, y = self._wrap(x, y) 272 | neighbours[p].append((x, y)) 273 | for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size): 274 | for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size): 275 | x, y = self._wrap(x, y) 276 | neighbours[p].append((x, y)) 277 | neighbours[p] = list(set(neighbours[p])) 278 | 279 | def _wrap(self, x: int, y: int): 280 | """ 281 | Wrap x,y coordinates around the torus 282 | 283 | :param x: the x coordinate to wrap 284 | :param y: the y coordinate to wrap 285 | :return: the wrapped x, y 286 | """ 287 | if x < 0: 288 | x = self.W_pixels + x 289 | elif x > self.W_pixels: 290 | x = x - self.W_pixels 291 | if y < 0: 292 | y = self.H_pixels + y 293 | elif y > self.H_pixels: 294 | y = y - self.H_pixels 295 | return x, y 296 | 297 | # learners act 298 | def step(self, action: int): 299 | agent_in_charge = self.agent_selection # ID of agent 300 | if action == 0: # DOC walk 301 | self.walk(self.learners[agent_in_charge], agent_in_charge) 302 | elif action == 1: # DOC lay_pheromone 303 | self.lay_pheromone(self.learners[agent_in_charge]['pos'], self.lay_amount) 304 | elif action == 2: # DOC follow_pheromone 305 | max_pheromone, max_coords = self._find_max_pheromone(self.learners[agent_in_charge]['pos']) 306 | if max_pheromone >= self.sniff_threshold: 307 | self.follow_pheromone(max_coords, self.learners[agent_in_charge], agent_in_charge) 308 | else: 309 | self.walk(self.learners[agent_in_charge], agent_in_charge) 310 | 311 | self.agent_selection = self._agent_selector.next() 312 | 313 | # non learners act 314 | def move(self): 315 | for turtle in self.turtles: 316 | pos = self.turtles[turtle]['pos'] 317 | t = self.turtles[turtle] 318 | max_pheromone, max_coords = self._find_max_pheromone(pos) 319 | 320 | if max_pheromone >= self.sniff_threshold: 321 | self.follow_pheromone(max_coords, t, turtle) 322 | else: 323 | self.walk(t, turtle) 324 | 325 | self.lay_pheromone(self.turtles[turtle]['pos'], self.lay_amount) 326 | 327 | # not using ".change_all" method form BooleanSpace 328 | def last(self, current_agent): 329 | #self._evaporate() 330 | #self._diffuse() 331 | 332 | self.agent = current_agent 333 | self.obs_dict[self.agent].change(0, self._compute_cluster(self.agent) >= self.cluster_threshold) 334 | self.obs_dict[self.agent].change(1, self._check_chemical(self.agent)) 335 | cur_reward = self.reward_cluster_and_time_punish_time(self.agent) 336 | 337 | return self.obs_dict[self.agent], cur_reward, False, {} 338 | 339 | def lay_pheromone(self, pos: tuple[int, int], amount: int): 340 | """ 341 | Lay 'amount' pheromone in square 'area' centred in 'pos' 342 | :param pos: the x,y position taken as centre of pheromone deposit area 343 | :param amount: the amount of pheromone to deposit 344 | :return: None (environment properties are changed as side effect) 345 | """ 346 | for p in self.lay_patches[pos]: 347 | self.patches[p]['chemical'] += amount 348 | 349 | def _diffuse(self): 350 | """ 351 | Diffuses pheromone from each patch to nearby patches controlled through self.diffuse_area patches in a way 352 | controlled through self.diffuse_mode: 353 | 'simple' = Python-dependant (dict keys "ordering") 354 | 'rng' = random visiting 355 | 'sorted' = diffuse first the patches with more pheromone 356 | 'filter' = do not re-diffuse patches receiving pheromone due to diffusion 357 | 358 | :return: None (environment properties are changed as side effect) 359 | """ 360 | n_size = len(self.diffuse_patches[list(self.patches.keys())[0]]) # same for every patch 361 | patch_keys = list(self.patches.keys()) 362 | if self.diffuse_mode == 'rng': 363 | random.shuffle(patch_keys) 364 | elif self.diffuse_mode == 'sorted': 365 | patch_list = list(self.patches.items()) 366 | patch_list = sorted(patch_list, key=lambda t: t[1]['chemical'], reverse=True) 367 | patch_keys = [t[0] for t in patch_list] 368 | elif self.diffuse_mode == 'filter': 369 | patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0] 370 | elif self.diffuse_mode == 'rng-filter': 371 | patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0] 372 | random.shuffle(patch_keys) 373 | for patch in patch_keys: 374 | p = self.patches[patch]['chemical'] 375 | ratio = p / n_size 376 | if p > 0: 377 | diffuse_keys = self.diffuse_patches[patch][:] 378 | for n in diffuse_keys: 379 | self.patches[n]['chemical'] += ratio 380 | self.patches[patch]['chemical'] = ratio 381 | 382 | def _evaporate(self): 383 | """ 384 | Evaporates pheromone from each patch according to param self.evaporation 385 | 386 | :return: None (environment properties are changed as side effect) 387 | """ 388 | for patch in self.patches.keys(): 389 | if self.patches[patch]['chemical'] > 0: 390 | self.patches[patch]['chemical'] *= self.evaporation 391 | 392 | def walk(self, turtle: dict[str: tuple[int, int]], _id: int): 393 | """ 394 | Action 0: move in random direction (8 sorrounding cells) 395 | 396 | :param _id: the id of the turtle to move 397 | :param turtle: the turtle to move (dict mapping 'pos' to position as x,y) 398 | :return: None (pos is updated after movement as side-effect) 399 | """ 400 | choice = [self.patch_size, -self.patch_size, 0] 401 | x, y = turtle['pos'] 402 | self.patches[turtle['pos']]['turtles'].remove(_id) 403 | x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice) 404 | x2, y2 = self._wrap(x2, y2) 405 | turtle['pos'] = (x2, y2) 406 | self.patches[turtle['pos']]['turtles'].append(_id) 407 | 408 | def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int): 409 | """ 410 | Action 2: move turtle towards greatest pheromone found 411 | :param _id: the id of the turtle to move 412 | :param ph_coords: the position where max pheromone has been sensed 413 | :param turtle: the turtle looking for pheromone 414 | :return: None (pos is updated after movement as side-effect) 415 | """ 416 | x, y = turtle['pos'] 417 | self.patches[turtle['pos']]['turtles'].remove(_id) 418 | if ph_coords[0] > x and ph_coords[1] > y: # top right 419 | x += self.patch_size 420 | y += self.patch_size 421 | elif ph_coords[0] < x and ph_coords[1] < y: # bottom left 422 | x -= self.patch_size 423 | y -= self.patch_size 424 | elif ph_coords[0] > x and ph_coords[1] < y: # bottom right 425 | x += self.patch_size 426 | y -= self.patch_size 427 | elif ph_coords[0] < x and ph_coords[1] > y: # top left 428 | x -= self.patch_size 429 | y += self.patch_size 430 | elif ph_coords[0] == x and ph_coords[1] < y: # below me 431 | y -= self.patch_size 432 | elif ph_coords[0] == x and ph_coords[1] > y: # above me 433 | y += self.patch_size 434 | elif ph_coords[0] > x and ph_coords[1] == y: # right 435 | x += self.patch_size 436 | elif ph_coords[0] < x and ph_coords[1] == y: # left 437 | x -= self.patch_size 438 | else: # my patch 439 | pass 440 | x, y = self._wrap(x, y) 441 | turtle['pos'] = (x, y) 442 | self.patches[turtle['pos']]['turtles'].append(_id) 443 | 444 | def _find_max_pheromone(self, pos: tuple[int, int]): 445 | """ 446 | Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'. 447 | Following pheromone modeis controlled by param self.follow_mode: 448 | 'det' = follow greatest pheromone 449 | 'prob' = follow greatest pheromone probabilistically (pheromone strength as weight) 450 | 451 | :param pos: the x,y position of the turtle looking for pheromone 452 | :return: the maximum pheromone level found and its x,y position 453 | """ 454 | if self.follow_mode == "prob": 455 | population = [k for k in self.smell_patches[pos]] 456 | weights = [self.patches[k]['chemical'] for k in self.smell_patches[pos]] 457 | if all([w == 0 for w in weights]): 458 | winner = population[np.random.choice(len(population))] 459 | else: 460 | winner = random.choices(population, weights=weights, k=1)[0] 461 | max_ph = self.patches[winner]['chemical'] 462 | else: 463 | max_ph = -1 464 | max_pos = [pos] 465 | for p in self.smell_patches[pos]: 466 | chem = self.patches[p]['chemical'] 467 | if chem > max_ph: 468 | max_ph = chem 469 | max_pos = [p] 470 | elif chem == max_ph: 471 | max_pos.append(p) 472 | winner = max_pos[np.random.choice(len(max_pos))] 473 | 474 | return max_ph, winner 475 | 476 | def _compute_cluster(self, current_agent): 477 | """ 478 | Checks whether the learner turtle is within a cluster, given 'cluster_radius' and 'cluster_threshold' 479 | 480 | :return: a boolean 481 | """ 482 | self.agent = current_agent 483 | cluster = 1 484 | for p in self.cluster_patches[self.learners[self.agent]['pos']]: 485 | cluster += len(self.patches[p]['turtles']) 486 | 487 | return cluster 488 | 489 | def avg_cluster(self): 490 | """ 491 | Record the cluster size 492 | :return: avg cluster size 493 | """ 494 | cluster_sizes = [] # registra la dim. dei cluster 495 | for l in self.learners: 496 | cluster = [] # tiene conto di quali turtle sono in quel cluster 497 | for p in self.cluster_patches[self.learners[l]['pos']]: 498 | for t in self.patches[p]['turtles']: 499 | cluster.append(t) 500 | cluster.sort() 501 | if cluster not in cluster_sizes: 502 | cluster_sizes.append(cluster) 503 | 504 | # cleaning process: confornta i cluster (nello stesso episodio) e se ne trova 2 con più del 90% di turtle uguali ne elimina 1 505 | for cluster in cluster_sizes: 506 | for cl in cluster_sizes: 507 | if cl != cluster: 508 | intersection = list(set(cluster) & set(cl)) 509 | if len(intersection) > len(cluster) * 0.90: 510 | cluster_sizes.remove(cl) 511 | 512 | # calcolo avg_cluster_size 513 | somma = 0 514 | for cluster in cluster_sizes: 515 | somma += len(cluster) 516 | avg_cluster_size = somma / len(cluster_sizes) 517 | 518 | return avg_cluster_size 519 | 520 | def _check_chemical(self, current_agent): 521 | """ 522 | Checks whether there is pheromone on the patch where the learner turtle is 523 | 524 | :return: a boolean 525 | """ 526 | self.agent = current_agent 527 | return self.patches[self.learners[self.agent]['pos']][ 528 | 'chemical'] > self.sniff_threshold 529 | 530 | # not a real reward function 531 | def test_reward(self, current_agent): # trying to invert rewards process, GOAL: check any strange behaviour 532 | """ 533 | :return: the reward 534 | """ 535 | self.agent = current_agent 536 | chem = 0 537 | for p in self.patches.values(): 538 | if self.agent in p['turtles']: 539 | chem = p['chemical'] 540 | if chem >= 5: 541 | cur_reward = -1000 542 | else: 543 | cur_reward = 100 544 | 545 | self.rewards[self.agent].append(cur_reward) 546 | return cur_reward 547 | 548 | def reward_cluster_punish_time(self, current_agent): # DOC NetLogo rewardFunc7 549 | """ 550 | Reward is (positve) proportional to cluster size (quadratic) and (negative) proportional to time spent outside 551 | clusters 552 | 553 | :return: the reward 554 | """ 555 | self.agent = current_agent 556 | cluster = self._compute_cluster(self.agent) 557 | if cluster >= self.cluster_threshold: 558 | self.cluster_ticks[self.agent] += 1 559 | 560 | cur_reward = ((cluster ^ 2) / self.cluster_threshold) * self.reward + ( 561 | ((self.episode_ticks - self.cluster_ticks[self.agent]) / self.episode_ticks) * self.penalty) 562 | 563 | self.rewards[self.agent].append(cur_reward) 564 | return cur_reward 565 | 566 | def reward_cluster_and_time_punish_time(self, current_agent): # DOC NetLogo rewardFunc8 567 | """ 568 | 569 | :return: 570 | """ 571 | self.agent = current_agent 572 | cluster = self._compute_cluster(self.agent) 573 | if cluster >= self.cluster_threshold: 574 | self.cluster_ticks[self.agent] += 1 575 | 576 | cur_reward = (self.cluster_ticks[self.agent] / self.episode_ticks) * self.reward + \ 577 | (cluster / self.cluster_threshold) * (self.reward ** 2) + \ 578 | (((self.episode_ticks - self.cluster_ticks[self.agent]) / self.episode_ticks) * self.penalty) 579 | 580 | self.rewards[self.agent].append(cur_reward) 581 | return cur_reward 582 | 583 | def reset(self): 584 | # empty stuff 585 | pop_tot = self.population + self.learner_population 586 | self.rewards = {i: [] for i in range(self.population, pop_tot)} 587 | self.cluster_ticks = {i: 0 for i in range(self.population, pop_tot)} 588 | self.obs_dict = {a: BooleanSpace(size=2) for a in self.agents} 589 | # re-position learner turtle 590 | for l in self.learners: 591 | self.patches[self.learners[l]['pos']]['turtles'].remove(l) 592 | self.learners[l]['pos'] = self.coords[np.random.randint(len(self.coords))] 593 | self.patches[self.learners[l]['pos']]['turtles'].append(l) # DOC id of learner turtle 594 | # re-position NON learner turtles 595 | for t in self.turtles: 596 | self.patches[self.turtles[t]['pos']]['turtles'].remove(t) 597 | self.turtles[t]['pos'] = self.coords[np.random.randint(len(self.coords))] 598 | self.patches[self.turtles[t]['pos']]['turtles'].append(t) 599 | # patches-own [chemical] - amount of pheromone in the patch 600 | for p in self.patches: 601 | self.patches[p]['chemical'] = 0.0 602 | 603 | self._agent_selector.reinit(self.agents) 604 | self.agent_selection = self._agent_selector.next() 605 | 606 | # return self.obs_dict[self.agent], 0, False, {} 607 | 608 | def render(self, **kwargs): 609 | if self.gui: 610 | 611 | for event in pygame.event.get(): 612 | if event.type == pygame.QUIT: # window closed -> program quits 613 | pygame.quit() 614 | 615 | if self.first_gui: 616 | self.first_gui = False 617 | pygame.init() 618 | pygame.display.set_caption("SLIME") 619 | 620 | self.screen.fill(BLACK) 621 | # draw patches 622 | for p in self.patches: 623 | chem = round(self.patches[p]['chemical']) * self.shade_strength 624 | pygame.draw.rect(self.screen, (0, chem if chem <= 255 else 255, 0), 625 | pygame.Rect(p[0] - self.offset, p[1] - self.offset, self.patch_size, self.patch_size)) 626 | if self.show_chem_text and (not sys.gettrace() is None or 627 | self.patches[p][ 628 | 'chemical'] >= self.sniff_threshold): # if debugging show text everywhere, even 0 629 | text = self.chemical_font.render(str(round(self.patches[p]['chemical'], 1)), True, GREEN) 630 | self.screen.blit(text, text.get_rect(center=p)) 631 | 632 | # draw learners 633 | for learner in self.learners.values(): 634 | pygame.draw.circle(self.screen, RED, (learner['pos'][0], learner['pos'][1]), self.turtle_size // 2) 635 | # draw NON learners 636 | for turtle in self.turtles.values(): 637 | pygame.draw.circle(self.screen, BLUE, (turtle['pos'][0], turtle['pos'][1]), self.turtle_size // 2) 638 | 639 | for p in self.patches: 640 | if len(self.patches[p]['turtles']) > 1: 641 | text = self.cluster_font.render(str(len(self.patches[p]['turtles'])), True, 642 | RED if -1 in self.patches[p]['turtles'] else WHITE) 643 | self.screen.blit(text, text.get_rect(center=p)) 644 | 645 | self.clock.tick(self.fps) 646 | pygame.display.flip() 647 | return pygame.surfarray.array3d(self.screen) 648 | 649 | def close(self): 650 | if self.gui: 651 | if self.screen is not None: 652 | pygame.display.quit() 653 | pygame.quit() 654 | 655 | 656 | def get_neighborood_chemical(self, agent, as_vectors=False): 657 | agent_pos = self.learners[agent]["pos"] 658 | smell_patches = self.smell_patches[agent_pos] 659 | 660 | output_mask = [] 661 | for patch in smell_patches: 662 | output_mask.append(self.patches[patch]["chemical"] - self.patches[agent_pos]["chemical"]) if as_vectors else output_mask.append(self.patches[patch]["chemical"]) 663 | 664 | return np.array([output_mask], dtype=np.float32) 665 | 666 | 667 | if __name__ == "__main__": 668 | PARAMS_FILE = "../agents/multi-agent-params.json" 669 | EPISODES = 5 670 | LOG_EVERY = 1 671 | 672 | with open(PARAMS_FILE) as f: 673 | params = json.load(f) 674 | if params["gui"]: 675 | render = "human" 676 | else: 677 | render = "server" 678 | env = Slime(render_mode=render, **params) 679 | 680 | for ep in range(1, EPISODES + 1): 681 | env.reset() 682 | print( 683 | f"-------------------------------------------\nEPISODE: {ep}\n-------------------------------------------") 684 | for tick in range(params['episode_ticks']): 685 | for agent in env.agent_iter(max_iter=params["learner_population"]): 686 | observation, reward, done, info = env.last(agent) 687 | env.step(env.action_space(agent).sample()) 688 | # env.evaporate_chemical() 689 | env.move() 690 | env._evaporate() 691 | env._diffuse() 692 | env.render() 693 | env.close() --------------------------------------------------------------------------------