├── slime_environments
    ├── agents
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── DQN.py
    │   │   └── utils.py
    │   ├── QLearning
    │   │   ├── ma-learning-params.json
    │   │   ├── multi-agent-params.json
    │   │   ├── MA_QLearning.py
    │   │   └── runs
    │   │   │   └── multi_test_01_06_10_2023__21_12_57.csv
    │   ├── SA_QLearning
    │   │   ├── sa-learning-params.json
    │   │   ├── single-agent-params.json
    │   │   └── SA_QLearning.py
    │   ├── Sarsa
    │   │   ├── ma-learning-params.json
    │   │   ├── multi-agent-params.json
    │   │   └── MA_SARSA.py
    │   ├── single-agent-params.json
    │   ├── DQNet_Centralized
    │   │   ├── ma-learning-params.json
    │   │   ├── multi-agent-params.json
    │   │   └── Centralized.py
    │   ├── DQNet_Decentralized
    │   │   ├── ma-learning-params.json
    │   │   ├── multi-agent-params.json
    │   │   └── Decentralized.py
    │   ├── Baselines-A2C-MLP.py
    │   └── SA_test_env.py
    ├── environments
    │   ├── __init__.py
    │   ├── PatchTest.py
    │   ├── SlimeEnvSingleAgent.py
    │   └── SlimeEnvMultiAgent.py
    └── __init__.py
├── .gitignore
├── setup.py
├── env-test-gym.py
├── requirements.txt
└── README.md


/slime_environments/agents/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.idea
2 | **/__pycache__
3 | **/runs/*
4 | **/models/*


--------------------------------------------------------------------------------
/slime_environments/environments/__init__.py:
--------------------------------------------------------------------------------
1 | from .SlimeEnvSingleAgent import Slime
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |     name="slime_environments",
5 |     version="1.0.0",
6 |     packages=find_packages(),
7 |     install_requires=['gym', 'pygame']
8 | )


--------------------------------------------------------------------------------
/slime_environments/agents/QLearning/ma-learning-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "alpha": 0.2,
 3 |   "gamma": 0.8,
 4 |   "epsilon": 0.9,
 5 |   "decay": 0.9995,
 6 |   "train_episodes": 100,
 7 |   "TRAIN_LOG_EVERY": 10,
 8 |   "test_episodes": 10,
 9 |   "TEST_LOG_EVERY": 1,
10 |   "OUTPUT_FILE": "multi-test-01",
11 |   "actions": ["move-toward-chemical", "random-walk", "drop-chemical"]
12 | }


--------------------------------------------------------------------------------
/slime_environments/agents/SA_QLearning/sa-learning-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "alpha": 0.2,
 3 |   "gamma": 0.8,
 4 |   "epsilon": 0.9,
 5 |   "decay": 0.9998,
 6 |   "train_episodes": 100,
 7 |   "TRAIN_LOG_EVERY": 10,
 8 |   "test_episodes": 10,
 9 |   "TEST_LOG_EVERY": 1,
10 |   "OUTPUT_FILE": "single-test-01",
11 |   "actions": ["move-toward-chemical", "random-walk", "drop-chemical"]
12 | }


--------------------------------------------------------------------------------
/slime_environments/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='Slime-v0',
5 |     entry_point='slime_environments.environments:Slime',
6 |     max_episode_steps=10000,  # DOC The keyword argument max_episode_steps=300 will ensure that GridWorld environments that are instantiated via gym.make will be wrapped in a TimeLimit wrapper (https://www.gymlibrary.dev/content/environment_creation/#registering-envs)
7 |     nondeterministic=True  # DOC seeding not supported atm
8 | )


--------------------------------------------------------------------------------
/slime_environments/agents/Sarsa/ma-learning-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "alpha": 0.2,
 3 |   "gamma": 0.8,
 4 |   "epsilon": 0.90,
 5 |   "epsilon_end": 0.00,
 6 |   "epsilon_test": 0.0,
 7 |   "decay": 20e-9,
 8 |   "train_episodes": 100,
 9 |   "TRAIN_LOG_EVERY": 1,
10 |   "test_episodes": 10,
11 |   "TEST_LOG_EVERY": 1,
12 |   "OUTPUT_FILE": "multi-test-01",
13 |   "actions": ["move-toward-chemical", "random-walk", "drop-chemical"],
14 |   "fist_saveimages_episode": 1,
15 |   "middle_saveimages_episode": 50,
16 |   "last_saveimages_episode": 100
17 | }


--------------------------------------------------------------------------------
/slime_environments/agents/single-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 50,
 3 |   "sniff_threshold": 0.9,
 4 |   "diffuse_area": 3,
 5 |   "diffuse_mode": "cascade",
 6 |   "follow_mode": "prob",
 7 |   "smell_area": 5,
 8 |   "lay_area": 1,
 9 |   "lay_amount": 3,
10 |   "evaporation": 0.9,
11 |   "cluster_threshold": 30,
12 |   "cluster_radius": 10,
13 |   "rew": 100,
14 |   "penalty": -1,
15 |   "episode_ticks": 500,
16 |   "W": 66,
17 |   "H": 38,
18 |   "PATCH_SIZE": 20,
19 |   "TURTLE_SIZE": 16,
20 |   "FPS": 30,
21 |   "SHADE_STRENGTH": 10,
22 |   "SHOW_CHEM_TEXT": false,
23 |   "CLUSTER_FONT_SIZE": 12,
24 |   "CHEMICAL_FONT_SIZE": 8
25 | }


--------------------------------------------------------------------------------
/slime_environments/agents/SA_QLearning/single-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 50,
 3 |   "sniff_threshold": 0.9,
 4 |   "diffuse_area": 3,
 5 |   "diffuse_mode": "cascade",
 6 |   "follow_mode": "prob",
 7 |   "smell_area": 5,
 8 |   "lay_area": 1,
 9 |   "lay_amount": 3,
10 |   "evaporation": 0.9,
11 |   "cluster_threshold": 30,
12 |   "cluster_radius": 10,
13 |   "rew": 100,
14 |   "penalty": -1,
15 |   "episode_ticks": 500,
16 |   "W": 66,
17 |   "H": 38,
18 |   "PATCH_SIZE": 20,
19 |   "TURTLE_SIZE": 16,
20 |   "FPS": 30,
21 |   "SHADE_STRENGTH": 10,
22 |   "SHOW_CHEM_TEXT": false,
23 |   "CLUSTER_FONT_SIZE": 12,
24 |   "CHEMICAL_FONT_SIZE": 8
25 | }


--------------------------------------------------------------------------------
/env-test-gym.py:
--------------------------------------------------------------------------------
 1 | from stable_baselines3.common.env_checker import check_env
 2 | from stable_baselines3 import DQN
 3 | import slime_environments
 4 | import gym
 5 | import json
 6 | from gym.spaces import MultiBinary
 7 | import numpy as np
 8 | 
 9 | PARAMS_FILE = "slime_environments/agents/single-agent-params.json"
10 | with open(PARAMS_FILE) as f:
11 |     params = json.load(f)
12 | 
13 | # print(gym.__version__)
14 | env = gym.make("Slime-v0", **params)
15 | check_env(env)
16 | print("Environment compatible with Stable Baselines3")
17 | 
18 | model = DQN("MlpPolicy", env, verbose=1)
19 | model.learn(total_timesteps=1000,log_interval=4)
20 | print("SB3 DQN sample training completed.")
21 | 


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Centralized/ma-learning-params.json:
--------------------------------------------------------------------------------
 1 |   {
 2 |   "lr": 3e-4,
 3 |   "step_lr": 0.9999995,
 4 |   "batch_size": 128,
 5 |   "memory_capacity": 51200,
 6 |   "update_net_every": 64,
 7 |   "alpha": 0.005,
 8 |   "gamma": 0.8,
 9 |   "epsilon": 0.90,
10 |   "epsilon_end": 0.00,
11 |   "epsilon_test": 0.0,
12 |   "decay": 20e-9,
13 |   "train_episodes": 100,
14 |   "TRAIN_LOG_EVERY": 1,
15 |   "test_episodes": 10,
16 |   "TEST_LOG_EVERY": 1,
17 |   "OUTPUT_FILE": "multi-test-01",
18 |   "actions": ["move-toward-chemical", "random-walk", "drop-chemical"],
19 |   "fist_saveimages_episode": 1,
20 |   "middle_saveimages_episode": 50,
21 |   "last_saveimages_episode": 100
22 | }


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Decentralized/ma-learning-params.json:
--------------------------------------------------------------------------------
 1 |   {
 2 |   "lr": 3e-4,
 3 |   "step_lr": 0.99992,
 4 |   "batch_size": 128,
 5 |   "memory_capacity": 51200,
 6 |   "update_net_every": 4,
 7 |   "alpha": 0.005,
 8 |   "gamma": 0.8,
 9 |   "epsilon": 0.90,
10 |   "epsilon_end": 0.00,
11 |   "epsilon_test": 0.0,
12 |   "decay": 2e-6,
13 |   "train_episodes": 100,
14 |   "TRAIN_LOG_EVERY": 1,
15 |   "test_episodes": 10,
16 |   "TEST_LOG_EVERY": 1,
17 |   "OUTPUT_FILE": "multi-test-01",
18 |   "actions": ["move-toward-chemical", "random-walk", "drop-chemical"],
19 |   "fist_saveimages_episode": 1,
20 |   "middle_saveimages_episode": 50,
21 |   "last_saveimages_episode": 100
22 | }


--------------------------------------------------------------------------------
/slime_environments/agents/Sarsa/multi-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 0,
 3 |   "learner_population": 100,
 4 |   "sniff_threshold": 0.8,
 5 |   "diffuse_area": 2,
 6 |   "diffuse_mode": "cascade",
 7 |   "follow_mode": "prob",
 8 |   "smell_area": 3,
 9 |   "lay_area": 1,
10 |   "lay_amount": 3,
11 |   "evaporation": 0.8,
12 |   "cluster_threshold": 30,
13 |   "cluster_radius": 5,
14 |   "rew": 100,
15 |   "penalty": -1,
16 |   "episode_ticks": 512,
17 |   "W": 66,
18 |   "H": 38,
19 |   "PATCH_SIZE": 20,
20 |   "TURTLE_SIZE": 16,
21 |   "FPS": 30,
22 |   "SHADE_STRENGTH": 10,
23 |   "SHOW_CHEM_TEXT": false,
24 |   "CLUSTER_FONT_SIZE": 12,
25 |   "CHEMICAL_FONT_SIZE": 8,
26 |   "gui": true
27 | }


--------------------------------------------------------------------------------
/slime_environments/agents/QLearning/multi-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 0,
 3 |   "learner_population": 100,
 4 |   "sniff_threshold": 0.9,
 5 |   "diffuse_area": 2,
 6 |   "diffuse_mode": "cascade",
 7 |   "follow_mode": "prob",
 8 |   "smell_area": 3,
 9 |   "lay_area": 1,
10 |   "lay_amount": 3,
11 |   "evaporation": 0.9,
12 |   "cluster_threshold": 30,
13 |   "cluster_radius": 5,
14 |   "rew": 100,
15 |   "penalty": -1,
16 |   "episode_ticks": 500,
17 |   "W": 66,
18 |   "H": 38,
19 |   "PATCH_SIZE": 20,
20 |   "TURTLE_SIZE": 16,
21 |   "FPS": 30,
22 |   "SHADE_STRENGTH": 10,
23 |   "SHOW_CHEM_TEXT": false,
24 |   "CLUSTER_FONT_SIZE": 12,
25 |   "CHEMICAL_FONT_SIZE": 8,
26 |   "gui": true
27 | }


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Centralized/multi-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 0,
 3 |   "learner_population": 100,
 4 |   "sniff_threshold": 0.8,
 5 |   "diffuse_area": 2,
 6 |   "diffuse_mode": "cascade",
 7 |   "follow_mode": "prob",
 8 |   "smell_area": 3,
 9 |   "lay_area": 1,
10 |   "lay_amount": 3,
11 |   "evaporation": 0.8,
12 |   "cluster_threshold": 30,
13 |   "cluster_radius": 5,
14 |   "rew": 100,
15 |   "penalty": -1,
16 |   "episode_ticks": 512,
17 |   "W": 66,
18 |   "H": 38,
19 |   "PATCH_SIZE": 20,
20 |   "TURTLE_SIZE": 16,
21 |   "FPS": 30,
22 |   "SHADE_STRENGTH": 10,
23 |   "SHOW_CHEM_TEXT": false,
24 |   "CLUSTER_FONT_SIZE": 12,
25 |   "CHEMICAL_FONT_SIZE": 8,
26 |   "gui": true
27 | }


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Decentralized/multi-agent-params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 0,
 3 |   "learner_population": 100,
 4 |   "sniff_threshold": 0.8,
 5 |   "diffuse_area": 2,
 6 |   "diffuse_mode": "cascade",
 7 |   "follow_mode": "prob",
 8 |   "smell_area": 3,
 9 |   "lay_area": 1,
10 |   "lay_amount": 3,
11 |   "evaporation": 0.8,
12 |   "cluster_threshold": 30,
13 |   "cluster_radius": 5,
14 |   "rew": 100,
15 |   "penalty": -1,
16 |   "episode_ticks": 512,
17 |   "W": 66,
18 |   "H": 38,
19 |   "PATCH_SIZE": 20,
20 |   "TURTLE_SIZE": 16,
21 |   "FPS": 30,
22 |   "SHADE_STRENGTH": 10,
23 |   "SHOW_CHEM_TEXT": false,
24 |   "CLUSTER_FONT_SIZE": 12,
25 |   "CHEMICAL_FONT_SIZE": 8,
26 |   "gui": true
27 | }


--------------------------------------------------------------------------------
/slime_environments/agents/Baselines-A2C-MLP.py:
--------------------------------------------------------------------------------
 1 | from stable_baselines3 import A2C
 2 | import slime_environments
 3 | import gym
 4 | import json
 5 | 
 6 | #model = A2C('MlpPolicy', 'Slime-v0').learn(100)  # FIXME find way to pass arguments to env
 7 | 
 8 | PARAMS_FILE = "single-agent-params.json"
 9 | with open(PARAMS_FILE) as f:
10 |     params = json.load(f)
11 | env = gym.make("Slime-v0", **params)
12 | 
13 | model = A2C('MlpPolicy', env, verbose=2)  # 2 = debug
14 | model.learn(total_timesteps=100*params['episode_ticks'])  # total env steps
15 | 
16 | obs, _ = env.reset()
17 | for i in range(100):
18 |     action, _state = model.predict(obs, deterministic=True)  # QUESTION "deterministic actions"? what does it mean?
19 |     obs, reward, _, _, _ = env.step(action)
20 |     env.render()
21 | 
22 | env.close()
23 | 


--------------------------------------------------------------------------------
/slime_environments/agents/SA_test_env.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from itertools import permutations, combinations, product
 3 | 
 4 | import gym
 5 | import slime_environments
 6 | from gym.utils.env_checker import check_env #, check_reset_seed, check_reset_return_type
 7 | 
 8 | from slime_environments.environments.SlimeEnvSingleAgent import BooleanSpace
 9 | 
10 | PARAMS_FILE = "single-agent-params.json"
11 | with open(PARAMS_FILE) as f:
12 |     params = json.load(f)
13 | #env = Slime(render_mode="human", **params)
14 | env = gym.make("Slime-v0", **params)
15 | 
16 | check_env(env.unwrapped, skip_render_check=False)
17 | #check_reset_seed(env)
18 | #check_reset_return_type(env)
19 | 
20 | # space = BooleanSpace(size=2)
21 | # print(f"size={space.size}, shape={space.shape}, values={space._values}, sample={space.sample()}")
22 | #
23 | # print(list(permutations([True, False])))
24 | # print(list(product([True, False], repeat=2)))
25 | # print(list(combinations([x for x in [True, False]], 2)))
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2022.9.24
 2 | charset-normalizer==2.1.1
 3 | cloudpickle==2.2.1
 4 | cmake==3.25.0
 5 | contourpy==1.1.0
 6 | cycler==0.11.0
 7 | Farama-Notifications==0.0.4
 8 | filelock==3.9.0
 9 | fonttools==4.40.0
10 | gym==0.26.2
11 | gym-notices==0.0.8
12 | gymnasium==0.28.1
13 | idna==3.4
14 | importlib-metadata==6.6.0
15 | importlib-resources==5.12.0
16 | jax-jumpy==1.0.0
17 | Jinja2==3.1.2
18 | kiwisolver==1.4.4
19 | lit==15.0.7
20 | MarkupSafe==2.1.2
21 | matplotlib==3.7.1
22 | mkl-fft==1.3.0
23 | mkl-random==1.0.2
24 | mkl-service==2.3.0
25 | mpmath==1.2.1
26 | networkx==3.0
27 | numpy==1.24.3
28 | opencv-contrib-python==4.7.0.72
29 | packaging==23.1
30 | pettingzoo==1.23.1
31 | Pillow==9.3.0
32 | pygame==2.4.0
33 | pyparsing==3.0.9
34 | python-dateutil==2.8.2
35 | requests==2.28.1
36 | sympy==1.11.1
37 | torch==2.0.0+cu117
38 | torchaudio==2.0.1+cu117
39 | torchvision==0.15.1+cu117
40 | tqdm==4.65.0
41 | triton==2.0.0
42 | typing_extensions==4.4.0
43 | urllib3==1.26.13
44 | zipp==3.15.0


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Slime environment for MARL
 2 | 
 3 | This project is a porting of [NetLogo "Slime" simulation model](http://www.netlogoweb.org/launch#http://ccl.northwestern.edu/netlogo/models/models/Sample%20Models/Biology/Slime.nlogo) to Python, and to Farama Foundation [Gymnasium](https://github.com/Farama-Foundation/Gymnasium).
 4 | The goal is to make such model available to 3rd party (MA)RL libraries such as [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) and Ray [RLlib](https://github.com/ray-project/ray).
 5 | The motivation is to **experiment with (MA)RL applied to communication actions for achieving coordination** amongst agents.
 6 | 
 7 | # Project structure
 8 | 
 9 | The project is under development, hence **everything is provisional** and subject to change, nebertheless any meaningful change will be reported here.
10 | The most advanced development branch is `sm-baselines-api`, where the single agent environment is compatible with Gym (still need to check Gymnasium) and on its way to be compatible with stable-baselines3.
11 | 
12 | There, the project is structured as follows:
13 | 
14 | ```
15 | slime_environments
16 | |__environments
17 |    |__SlimeEnvSingleAgent.py         # single agent learning environment
18 |    |__SlimeEnvMultiAgent.py          # multi-agent learning environment
19 | |__agents
20 |    |__MA_QLearning.py                # independent Q-learning
21 |    |__SA_QLearning.py                # single agent Q-learning
22 |    |__multi-agent-params.json        # multi-agent environment params
23 |    |__single-agent-params.json       # single agent environment params
24 |    |__ma-learning-params.json        # multi-agent learning params
25 |    |__sa-learning-params.json        # single agent learning params
26 | ```   
27 | 


--------------------------------------------------------------------------------
/slime_environments/environments/PatchTest.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import numpy as np
 3 | 
 4 | pygame.init()
 5 | 
 6 | W = 50  # in number of patches
 7 | H = 25  # in number of patches
 8 | PATCH_SIZE = 20  # thus window width and height are W * PATCH_SIZE and H * PATCH_SIZE
 9 | TURTLE_SIZE = PATCH_SIZE - 1  # turtles must be slightly smaller
10 | 
11 | N_TURTLES = 10
12 | 
13 | SHOW_TURTLES = True
14 | SHOW_PATCHES = True
15 | MOVEMENT = True
16 | 
17 | BLACK = (0, 0, 0)
18 | BLUE = (0, 0, 255)
19 | WHITE = (255, 255, 255)
20 | 
21 | coords = []
22 | offset = PATCH_SIZE // 2
23 | W_pixels = W * PATCH_SIZE
24 | H_pixels = H * PATCH_SIZE
25 | for x in range(offset, (W_pixels - offset) + 1, PATCH_SIZE):
26 |     for y in range(offset, (H_pixels - offset) + 1, PATCH_SIZE):
27 |         coords.append((x, y))  # "centre" of the patch or turtle (also ID of the patch)
28 | 
29 | # nel dizionario associato alle coordinate x,y della patch puoi mettere i dati che vuoi, come
30 | #  - quantita di feromone,
31 | #  - lista degli ID delle turtle che sono sulla patch
32 | #  - ...(tutto quello che ti puo servire)
33 | patches = {coords[i]: {"id": i} for i in range(len(coords))}
34 | # stesso discorso per il dizionario associato all'ID della turtle
35 | turtles = {i: {"pos": coords[np.random.randint(len(coords))]} for i in range(N_TURTLES)}
36 | 
37 | screen = pygame.display.set_mode((W_pixels, H_pixels))
38 | pygame.display.set_caption("PATCH TEST")
39 | 
40 | clock = pygame.time.Clock()
41 | 
42 | playing = True
43 | while playing:
44 |     for event in pygame.event.get():
45 |         if event.type == pygame.QUIT:  # chiusura finestra -> termina il programma
46 |             playing = False
47 |     screen.fill(BLACK)
48 |     if SHOW_TURTLES:
49 |         # print("turtles:", end=" ")
50 |         for t in turtles:  #  una per patch
51 |             # print(t, end=" ")
52 |             pygame.draw.circle(screen, BLUE, turtles[t]["pos"],
53 |                                TURTLE_SIZE // 2)  # ultimo parametro è il raggio del cerchio
54 |         # print()
55 |     if SHOW_PATCHES:
56 |         # mostra le patch come quadrati
57 |         # print("patches:", end=" ")
58 |         for p in patches:
59 |             # print(patches[p]["id"], end=" ")
60 |             pygame.draw.rect(screen, WHITE, pygame.Rect(p[0] - offset, p[1] - offset, PATCH_SIZE - 1, PATCH_SIZE - 1),
61 |                              width=1)
62 |         # print()
63 |         # mostra la griglia che evidenzia le patch
64 |         # for p in range(PATCH_SIZE, W_pixels, PATCH_SIZE):
65 |         #     pygame.draw.line(screen, WHITE, (p, 0), (p, H_pixels))
66 |         # for p in range(PATCH_SIZE, H_pixels, PATCH_SIZE):
67 |         #     pygame.draw.line(screen, WHITE, (0, p), (W_pixels, p))
68 |     if MOVEMENT:
69 |         choice = [PATCH_SIZE, -PATCH_SIZE, 0]
70 |         # choice = [PATCH_SIZE]
71 |         for t in turtles:
72 |             x, y = turtles[t]["pos"]
73 |             x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice)
74 |             if x2 < 0:
75 |                 x2 = W_pixels - offset
76 |             if x2 > W_pixels:
77 |                 x2 = 0 + offset
78 |             if y2 < 0:
79 |                 y2 = H_pixels - offset
80 |             if y2 > H_pixels:
81 |                 y2 = 0 + offset
82 |             turtles[t]["pos"] = (x2, y2)
83 | 
84 |     pygame.display.flip()
85 |     # clock.tick(1)
86 | 
87 | pygame.quit()
88 | 


--------------------------------------------------------------------------------
/slime_environments/agents/utils/DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from collections import deque
  5 | 
  6 | import math
  7 | import random
  8 | 
  9 | class ReplayMemory(object):
 10 | 
 11 |     def __init__(self, Transition, capacity):
 12 |         self.memory = deque([], maxlen=capacity)
 13 |         self.Transition = Transition
 14 | 
 15 |     def push(self, *args):
 16 |         """Save a transition"""
 17 |         self.memory.append(self.Transition(*args))
 18 | 
 19 |     def sample(self, batch_size):
 20 |         return random.sample(self.memory, batch_size)
 21 | 
 22 |     def __len__(self):
 23 |         return len(self.memory)
 24 |     
 25 | class DQN(nn.Module):
 26 | 
 27 |     def __init__(self, n_observations, n_actions, epsilon):
 28 |         super(DQN, self).__init__()
 29 |         self.layer1 = nn.Linear(n_observations, 128)
 30 |         self.layer2 = nn.Linear(128, 128)
 31 |         
 32 |         self.layer3 = nn.Linear(128, 256)
 33 |         self.layer4 = nn.Linear(256, 256)
 34 |         
 35 |         self.layer5 = nn.Linear(256, 512)
 36 |         self.layer6 = nn.Linear(512, n_actions)
 37 |         
 38 |         self.dropout = nn.Dropout(p=0.3)
 39 |         self.epsilon = epsilon
 40 | 
 41 | 
 42 |     def forward(self, x):
 43 |         x = self.layer1(x)
 44 |         x = F.relu(self.dropout(x))
 45 |         x = F.relu(self.layer2(x) + x)
 46 |         
 47 |         x = self.layer3(x)
 48 |         x = F.relu(self.dropout(x))
 49 |         x = F.relu(self.layer4(x) + x)
 50 |         
 51 |         x = self.layer5(x)
 52 |         x = F.relu(self.dropout(x))
 53 |         return self.layer6(x)
 54 |     
 55 | 
 56 | 
 57 | def optimize_model(Transition, memory, policy_net, target_net, gamma, batch_size, device):
 58 |     if len(memory) < batch_size:
 59 |         return policy_net, target_net, None
 60 |     
 61 |     transitions = memory.sample(batch_size)
 62 |     # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
 63 |     # detailed explanation). This converts batch-array of Transitions
 64 |     # to Transition of batch-arrays.
 65 |     batch = Transition(*zip(*transitions))
 66 | 
 67 |     # Compute a mask of non-final states and concatenate the batch elements
 68 |     # (a final state would've been the one after which simulation ended)
 69 |     non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.bool)
 70 |     non_final_next_states = torch.cat([s for s in batch.next_state if s is not None])
 71 |     state_batch = torch.cat(batch.state)
 72 |     action_batch = torch.cat(batch.action)
 73 |     reward_batch = torch.cat(batch.reward)
 74 | 
 75 |     # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
 76 |     # columns of actions taken. These are the actions which would've been taken
 77 |     # for each batch state according to policy_net
 78 |     state_action_values = policy_net(state_batch).gather(1, action_batch)
 79 | 
 80 |     # Compute V(s_{t+1}) for all next states.
 81 |     # Expected values of actions for non_final_next_states are computed based
 82 |     # on the "older" target_net; selecting their best reward with max(1)[0].
 83 |     # This is merged based on the mask, such that we'll have either the expected
 84 |     # state value or 0 in case the state was final.
 85 |     next_state_values = torch.zeros(batch_size, device=device)
 86 |     with torch.no_grad():
 87 |         next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0]
 88 |     # Compute the expected Q values
 89 |     expected_state_action_values = (next_state_values * gamma) + reward_batch
 90 | 
 91 |     # Compute Huber loss
 92 |     criterion = nn.SmoothL1Loss()
 93 |     loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))
 94 |     
 95 |     return policy_net, target_net, loss
 96 | 
 97 | 
 98 | def select_action(env, agent, state, steps_done, policy_net, device, epsilon_end, decay):
 99 |     sample = random.random()
100 |     policy_net.epsilon = epsilon_end + (policy_net.epsilon - epsilon_end) * math.exp(-1. * steps_done * decay)
101 |     
102 |     if sample > policy_net.epsilon:
103 |         with torch.no_grad():
104 |             # t.max(1) will return the largest column value of each row.
105 |             # second column on max result is index of where max element was
106 |             # found, so we pick action with the larger expected reward.
107 |             return policy_net(state).max(1)[1].view(1, 1), policy_net
108 |     else:
109 |         return torch.tensor([[env.action_space(agent).sample()]], device=device, dtype=torch.long), policy_net


--------------------------------------------------------------------------------
/slime_environments/agents/SA_QLearning/SA_QLearning.py:
--------------------------------------------------------------------------------
  1 | #from environments.SlimeEnvSingleAgent import Slime
  2 | 
  3 | import datetime
  4 | import gym
  5 | import json
  6 | import numpy as np
  7 | import random
  8 | import slime_environments
  9 | 
 10 | PARAMS_FILE = "single-agent-params.json"
 11 | LEARNING_PARAMS_FILE = "sa-learning-params.json"
 12 | with open(LEARNING_PARAMS_FILE) as f:
 13 |     l_params = json.load(f)
 14 | OUTPUT_FILE = f"{l_params['OUTPUT_FILE']}-{datetime.datetime.now()}.csv"
 15 | with open(PARAMS_FILE) as f:
 16 |     params = json.load(f)
 17 | #env = Slime(render_mode="human", **params)
 18 | env = gym.make("Slime-v0", **params)
 19 | 
 20 | # Q-Learning
 21 | alpha = l_params["alpha"]  # DOC learning rate (0 learn nothing 1 learn suddenly)
 22 | gamma = l_params["gamma"]  # DOC discount factor (0 care only bout immediate rewards, 1 care only about future ones)
 23 | epsilon = l_params["epsilon"]  # DOC chance of random action
 24 | decay = l_params["decay"]  # DOC di quanto diminuisce epsilon ogni episode (e.g. 1500 episodes => decay = 0.9995)
 25 | TRAIN_EPISODES = l_params["train_episodes"]
 26 | TEST_EPISODES = l_params["test_episodes"]
 27 | TRAIN_LOG_EVERY = l_params["TRAIN_LOG_EVERY"]
 28 | TEST_LOG_EVERY = l_params["TEST_LOG_EVERY"]
 29 | 
 30 | with open(OUTPUT_FILE, 'w') as f:
 31 |     f.write(f"{json.dumps(params, indent=2)}\n")
 32 |     f.write("----------\n")
 33 |     f.write(f"TRAIN_EPISODES = {TRAIN_EPISODES}\n")
 34 |     f.write(f"TEST_EPISODES = {TEST_EPISODES}\n")
 35 |     f.write("----------\n")
 36 |     f.write(f"alpha = {alpha}\n")
 37 |     f.write(f"gamma = {gamma}\n")
 38 |     f.write(f"epsilon = {epsilon}\n")
 39 |     f.write(f"decay = {decay}\n")
 40 |     f.write("----------\n")
 41 |     # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, Avg reward X episode, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical
 42 |     f.write(f"Episode, Tick, Avg cluster size X tick, ")
 43 |     for a in l_params["actions"]:
 44 |         f.write(f"{a}, ")
 45 |     f.write("Avg reward X episode\n")
 46 | 
 47 | q_table = np.zeros([4, env.action_space.n])
 48 | 
 49 | # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}}
 50 | actions_dict = {str(ep): {str(ac): 0 for ac in range(3)} for ep in range(1, TRAIN_EPISODES+1)}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
 51 | # DOC dict che tiene conto della reward per ogni episodio {episode: _}
 52 | reward_dict = {str(ep): 0 for ep in range(1, TRAIN_EPISODES+1)}
 53 | # DOC dict che tiene conto della dimensioni di ogni cluster per ogni episodio
 54 | cluster_dict = {}
 55 | 
 56 | 
 57 | def observation_to_int_map(obs):
 58 |     if sum(obs) == 0:  # DOC [False, False]
 59 |         mapped = sum(obs)  # 0
 60 |     elif sum(obs) == 2:  # DOC [True, True]
 61 |         mapped = 3
 62 |     elif int(obs[0]) == 1 and int(obs[1]) == 0:  # DOC [True, False] ==> si trova in un cluster ma non su una patch con feromone --> difficile succeda
 63 |         mapped = 1
 64 |     else:
 65 |         mapped = 2  # DOC [False, True]
 66 |     return mapped
 67 | 
 68 | 
 69 | # TRAINING
 70 | print("Start training...")
 71 | for ep in range(1, TRAIN_EPISODES+1):
 72 |     observation = env.reset()
 73 |     obs = observation_to_int_map(observation)
 74 |     for tick in range(1, params['episode_ticks']+1):
 75 |         if random.uniform(0, 1) < epsilon:
 76 |             action = env.action_space.sample()  # Explore action space
 77 |         else:
 78 |             action = np.argmax(q_table[obs])  # Exploit learned values
 79 | 
 80 |         next_observation, reward, _, _ = env.step(action)
 81 |         next_obs = observation_to_int_map(next_observation)
 82 | 
 83 |         old_value = q_table[obs][action]
 84 |         next_max = np.max(q_table[next_obs])  # QUESTION: was with [s]
 85 | 
 86 |         new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
 87 |         q_table[obs][action] = new_value
 88 | 
 89 |         obs = next_obs
 90 | 
 91 |         actions_dict[str(ep)][str(action)] += 1
 92 |         reward_dict[str(ep)] += round(reward, 2)
 93 | 
 94 |         env.render()
 95 |     epsilon *= decay
 96 |     cluster_dict[str(ep)] = -1  # round(env.avg_cluster(), 2)
 97 |     if ep % TRAIN_LOG_EVERY == 0:
 98 |         print(f"EPISODE: {ep}")
 99 |         print(f"\tepsilon: {epsilon}")
100 |         print(f"\tq_table: {q_table}")
101 |         with open(OUTPUT_FILE, 'a') as f:
102 |             f.write(
103 |                 f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ")
104 |             f.write(f"{reward_dict[str(ep)]}\n")
105 | 
106 | #print(json.dumps(cluster_dict, indent=2))
107 | print("Training finished!\n")
108 | 
109 | # DOC Evaluate agent's performance after Q-learning
110 | cluster_dict = {}
111 | print("Start testing...")
112 | for ep in range(1, TEST_EPISODES+1):
113 |     reward_episode = 0
114 |     observation, _ = env.reset()
115 |     obs = observation_to_int_map(observation)
116 |     for tick in range(params['episode_ticks']):
117 |         action = np.argmax(q_table[obs])
118 |         observation, reward, _, _, = env.step(action)
119 |         obs = observation_to_int_map(observation)
120 |         reward_episode += reward
121 |         env.render()
122 |     if ep % TEST_LOG_EVERY == 0:
123 |         print(f"EPISODE: {ep}")
124 |         print(f"\tepisode reward: {reward_episode}")
125 |     #cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
126 |     cluster_dict[str(ep)] = -1
127 | print(json.dumps(cluster_dict, indent=2))
128 | print("Testing finished!\n")
129 | env.close()
130 | 


--------------------------------------------------------------------------------
/slime_environments/agents/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import math
  4 | import datetime
  5 | import matplotlib.pyplot as plt
  6 | from typing import Optional
  7 | from tqdm import tqdm
  8 | import numpy as np
  9 | import subprocess
 10 | import cv2
 11 | from typing import Optional
 12 | 
 13 | def read_params(params_path:str, learning_params_path:str):
 14 |     params, l_params = dict(), dict()
 15 |     try:
 16 |         with open(learning_params_path) as f:
 17 |             l_params = json.load(f)
 18 |     except Exception as e:
 19 |         print(f"[ERROR] could not open learning params file: {e}")
 20 |     
 21 |     try:
 22 |         with open(params_path) as f:
 23 |             params = json.load(f)
 24 |     except Exception as e:
 25 |         print(f"[ERROR] could not open learning params file: {e}")
 26 |         
 27 |     return params, l_params
 28 | 
 29 | 
 30 | def state_to_int_map(obs: list):
 31 |     if sum(obs) == 0:  # DOC [False, False]
 32 |         mapped = sum(obs)  # 0
 33 |     elif sum(obs) == 2:  # DOC [True, True]
 34 |         mapped = 3
 35 |     elif int(obs[0]) == 1 and int(obs[1]) == 0:  # DOC [True, False] ==> si trova in un cluster ma non su una patch con feromone --> difficile succeda
 36 |         mapped = 1
 37 |     else:
 38 |         mapped = 2  # DOC [False, True]
 39 |     return mapped
 40 | 
 41 | 
 42 | def setup(is_train:bool, curdir:str, params:dict, l_params:dict):
 43 |     if not os.path.isdir(os.path.join(curdir, "runs")):
 44 |         os.makedirs(os.path.join(curdir, "runs"))
 45 |     
 46 |     filename = l_params['OUTPUT_FILE'].replace("-", "_") + "_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".csv"
 47 |     output_dir =  os.path.join(curdir, "runs", "train" + "_" + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S"))
 48 |     if is_train:
 49 |         os.makedirs(output_dir)
 50 |     output_file = os.path.join(curdir, "runs", output_dir, filename)
 51 | 
 52 |     # Q-Learning
 53 |     alpha = l_params["alpha"]  # DOC learning rate (0 learn nothing 1 learn suddenly)
 54 |     gamma = l_params["gamma"]  # DOC discount factor (0 care only bout immediate rewards, 1 care only about future ones)
 55 |     epsilon = l_params["epsilon"]  # DOC chance of random action
 56 |     decay = l_params["decay"]  # DOC di quanto diminuisce epsilon ogni episode (e.g. 1500 episodes => decay = 0.9995)
 57 |     train_episodes = l_params["train_episodes"]
 58 |     test_episodes = l_params["test_episodes"]
 59 |     train_log_every = l_params["TRAIN_LOG_EVERY"]
 60 |     test_log_every = l_params["TEST_LOG_EVERY"]
 61 | 
 62 |     if is_train:
 63 |         with open(output_file, 'w') as f:
 64 |             f.write(f"{json.dumps(params, indent=2)}\n")
 65 |             f.write("----------\n")
 66 |             f.write(f"TRAIN_EPISODES = {train_episodes}\n")
 67 |             f.write(f"TEST_EPISODES = {test_episodes}\n")
 68 |             f.write("----------\n")
 69 |             f.write(f"alpha = {alpha}\n")
 70 |             f.write(f"gamma = {gamma}\n")
 71 |             f.write(f"epsilon = {epsilon}\n")
 72 |             f.write(f"decay = {decay}\n")
 73 |             f.write("----------\n")
 74 |             # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, Avg reward X episode, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical
 75 |             f.write(f"Episode, Tick, Avg cluster size X tick, ")
 76 |         
 77 |         for a in l_params["actions"]:
 78 |             f.write(f"{a}, ")
 79 |         
 80 |         for l in range(params['population'], params['population'] + params['learner_population']):
 81 |             for a in l_params["actions"]:
 82 |                 f.write(f"(learner {l})-{a}, ")
 83 |         f.write("Avg reward X episode, loss, learning rate\n")
 84 |     
 85 |     return output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every
 86 | 
 87 | 
 88 | def calculate_epsilon(type:str, episodes:int, ticks:int, learners:int, epsilon: float, decay:float, epsilon_end:Optional[float]):
 89 |     indexes = []
 90 |     values = []
 91 |     
 92 |     pbar = tqdm(range(episodes*ticks))
 93 |     for ep in range(1, episodes + 1):
 94 |         for tick in range(1, ticks + 1):
 95 |             for agent in range(learners):
 96 |                 index = agent + tick * learners + ep * ticks * learners
 97 |                 indexes.append(index)
 98 |                 if ep == 1 and tick == 1:
 99 |                     pass
100 |                 else:
101 |                     if type.lower() in "normal":
102 |                         epsilon *= decay
103 |                     elif type.lower() == "esponential":
104 |                         epsilon = epsilon_end + (epsilon - epsilon_end) * math.exp(-1. * ep * decay)
105 |                     
106 |                 values.append(epsilon)
107 |             pbar.update(1)
108 |                 
109 |     plt.plot(indexes, values, marker='o')
110 |     plt.xlabel('Steps')
111 |     plt.ylabel('epsilon value')
112 |     plt.show()
113 |     print(f"Final value: {epsilon}")
114 |     
115 | 
116 | def positional_encoding(sequence_length, d_model):
117 |     positions = np.arange(sequence_length)[:, np.newaxis]
118 |     angles = np.arange(d_model)[np.newaxis, :] / np.power(10000, 2 * (np.arange(d_model) // 2) / d_model)
119 |     encoding = positions * angles
120 | 
121 |     encoding[:, 0::2] = np.sin(encoding[:, 0::2])  # Colonne pari: seno
122 |     encoding[:, 1::2] = np.cos(encoding[:, 1::2])  # Colonne dispari: coseno
123 | 
124 |     return encoding
125 | 
126 | 
127 | def update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr):
128 |     with open(output_file, 'a') as f:
129 |         f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ")
130 |         avg_rew = 0
131 |         
132 |         for l in range(params['population'], params['population'] + params['learner_population']):
133 |             avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks'])
134 |             f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ")
135 |         
136 |         avg_rew /= params['learner_population']
137 |         f.write(f"{avg_rew}, {sum(losses)/len(losses)}, {cur_lr}\n")
138 | 
139 | 
140 | def calc_final_lr(base_lr, gamma, step_size, iterations, batch_size):
141 |     print(base_lr * gamma ** ((iterations / batch_size) // step_size) )
142 |     
143 | 
144 | def save_env_image(image, tick, output_dir, cur_ep_dir):
145 |     assert image is not None, "Environment error: render image is None" 
146 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
147 |     image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
148 |     if not os.path.exists(os.path.join(output_dir, "images", cur_ep_dir)):
149 |         os.makedirs(os.path.join(output_dir, "images", cur_ep_dir))
150 |     cv2.imwrite(os.path.join(output_dir, "images", cur_ep_dir, f"{tick}.jpg"), image)
151 | 
152 | 
153 | def video_from_images(output_dir, last_ep_dir):
154 |     subprocess.run([
155 |             "ffmpeg", "-y", "-framerate", "30", "-i", os.path.join(output_dir, "images", last_ep_dir, "%d.jpg"), \
156 |             '-c:v', 'libx264', '-vf', 'fps=30', '-pix_fmt', 'yuv420p', os.path.join(output_dir, "images", last_ep_dir, "video.mp4")
157 |             ], check=True)
158 |     
159 |     
160 | def calc_evaporation(learners, lay_amount, decay):
161 |     x = 0
162 |     for i in range(1000):
163 |         x = x * decay + lay_amount * learners
164 |         print(x)
165 |         
166 |         
167 | if __name__ == "__main__":
168 |     # calc_final_lr(1e-3, .9945, 1, 51200, 128)
169 |     # calculate_epsilon("esponential", 100, 512, 100, 0.9, 20e-9, 0.0)
170 |     calc_evaporation(100, 1, 0.8)
171 | 


--------------------------------------------------------------------------------
/slime_environments/agents/QLearning/MA_QLearning.py:
--------------------------------------------------------------------------------
  1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  7 | sys.path.append(parent_dir)
  8 | 
  9 | from utils.utils import read_params, state_to_int_map, setup
 10 | 
 11 | import argparse
 12 | 
 13 | import json
 14 | import numpy as np
 15 | import random
 16 | 
 17 | 
 18 | def create_agent(params:dict, l_params:dict, train_episodes:int):
 19 |     n_actions = len(l_params["actions"])
 20 |     population = params['population']
 21 |     learner_population = params['learner_population']
 22 |     
 23 |     # Q_table
 24 |     qtable = {i: np.zeros([4, n_actions]) for i in range(population, population + learner_population)}
 25 |     
 26 |     # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}}
 27 |     actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
 28 |     # DOC dict che tiene conto della frequenza di scelta delle action di ogni agent per ogni episodio {episode: {agent: {action: _, action: _, ...}}}
 29 |     action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 30 |     # DOC dict che tiene conto della reward di ogni agente per ogni episodio {episode: {agent: _}}
 31 |     reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 32 |     # DOC dict che tiene conto dela dimensioni di ogni cluster per ogni episodio
 33 |     cluster_dict = {}
 34 |     
 35 |     return qtable, actions_dict, action_dict, reward_dict, cluster_dict
 36 | 
 37 | 
 38 | def train(env, 
 39 |           params:dict, 
 40 |           qtable, 
 41 |           actions_dict:dict, 
 42 |           action_dict:dict, 
 43 |           reward_dict:dict, 
 44 |           cluster_dict:dict,
 45 |           train_episodes:int, 
 46 |           train_log_every, 
 47 |           alpha:float, 
 48 |           gamma:float, 
 49 |           decay:float,
 50 |           epsilon:float,
 51 |           output_file):
 52 |     # TRAINING
 53 |     print("Start training...")
 54 |     
 55 |     old_s = {}  # DOC old state for each agent {agent: old_state}
 56 |     for ep in range(1, train_episodes + 1):
 57 |         env.reset()
 58 |         
 59 |         for tick in range(1, params['episode_ticks'] + 1):
 60 |             for agent in env.agent_iter(max_iter=params['learner_population']):
 61 |                 cur_state, reward, _, _ = env.last(agent)
 62 |                 cur_s = state_to_int_map(cur_state.observe())
 63 |                 
 64 |                 if ep == 1 and tick == 1:
 65 |                     action = env.action_space(agent).sample()
 66 |                 else:
 67 |                     old_value = qtable[agent][old_s[agent]][action]
 68 |                     next_max = np.max(qtable[agent][cur_s])  # QUESTION: was with [action] too
 69 |                     new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
 70 |                     qtable[agent][old_s[agent]][action] = new_value
 71 | 
 72 |                     if random.uniform(0, 1) < epsilon:
 73 |                         # action = np.random.randint(0, 2)
 74 |                         action = env.action_space(agent).sample()
 75 |                     else:
 76 |                         action = np.argmax(qtable[agent][cur_s])
 77 |                 env.step(action)
 78 | 
 79 |                 old_s[agent] = cur_s
 80 | 
 81 |                 actions_dict[str(ep)][str(action)] += 1
 82 |                 action_dict[str(ep)][str(agent)][str(action)] += 1
 83 |                 reward_dict[str(ep)][str(agent)] += round(reward, 2)
 84 |                 
 85 |             env.move()
 86 |             env._evaporate()
 87 |             env._diffuse()
 88 |             env.render()
 89 |             #print(json.dumps(action_dict, indent=2))
 90 |         epsilon *= decay
 91 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
 92 |         
 93 |         if ep % train_log_every == 0:
 94 |             print(f"EPISODE: {ep}")
 95 |             print(f"\tepsilon: {epsilon}")
 96 |             #print(f"\tepisode reward: {reward_episode}")
 97 |             # From NetlogoDataAnalysis: Episode, Tick, Avg cluster size X tick, move-toward-chemical (2), random-walk (0), drop-chemical (1), (learner 0)-move-toward-chemical, ..., Avg reward X episode
 98 |             
 99 |             with open(output_file, 'a') as f:
100 |                 f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ")
101 |                 avg_rew = 0
102 |                 
103 |                 for l in range(params['population'], params['population'] + params['learner_population']):
104 |                     avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks'])
105 |                     f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ")
106 |                 
107 |                 avg_rew /= params['learner_population']
108 |                 f.write(f"{avg_rew}\n")
109 | 
110 |     #print(json.dumps(cluster_dict, indent=2))
111 |     print("Training finished!\n")
112 |     
113 |     return env, qtable
114 | 
115 | 
116 | def eval(env,
117 |          params:dict, 
118 |          test_episodes:int,
119 |          qtable,
120 |          test_log_every:int,
121 |          epsilon:float,):
122 |     # DOC Evaluate agent's performance after Q-learning
123 |     cluster_dict = {}
124 |     print("Start testing...")
125 |     
126 |     for ep in range(1, test_episodes + 1):
127 |         env.reset()
128 |         for tick in range(1, params['episode_ticks']+1):
129 |             for agent in env.agent_iter(max_iter=params['learner_population']):
130 |                 state, _, _, _ = env.last(agent)
131 |                 s = state_to_int_map(state.observe())
132 | 
133 |                 if random.uniform(0, 1) < epsilon:
134 |                     # action = np.random.randint(0, 2)
135 |                     action = env.action_space(agent).sample()
136 |                 else:
137 |                     action = np.argmax(qtable[agent][s])
138 | 
139 |                 env.step(action)
140 |             env.move()
141 |             env._evaporate()
142 |             env._diffuse()
143 |             env.render()
144 |             
145 |         if ep % test_log_every == 0:
146 |             print(f"EPISODE: {ep}")
147 |             print(f"\tepsilon: {epsilon}")
148 |             # print(f"\tepisode reward: {reward_episode}")
149 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
150 |         
151 |     print(json.dumps(cluster_dict, indent=2))
152 |     print("Testing finished!\n")
153 |     env.close()
154 | 
155 | 
156 | def main(args):
157 |     random.seed(args.random_seed)
158 |     np.random.seed(args.random_seed)
159 |     curdir = os.path.dirname(os.path.abspath(__file__))
160 |     
161 |     params, l_params = read_params(args.params_path, args.learning_params_path)
162 |     
163 |     env = Slime(render_mode="human", **params)
164 |     
165 |     output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(curdir, params, l_params)
166 |     
167 |     qtable, actions_dict, action_dict, reward_dict, cluster_dict = create_agent(params, l_params,train_episodes)
168 |     
169 |     env, qtable = train(env, params, qtable, actions_dict, action_dict, reward_dict, cluster_dict, train_episodes, train_log_every, alpha, gamma, decay, epsilon, output_file)
170 |     
171 |     eval(env, params, test_episodes, qtable, test_log_every, epsilon)
172 | 
173 | 
174 | if __name__ == "__main__":
175 |     parser = argparse.ArgumentParser()
176 |     parser.add_argument("params_path", type=str)
177 |     parser.add_argument("learning_params_path", type=str)
178 |     
179 |     args = parser.parse_args()
180 |     
181 |     assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file"
182 |     assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file"
183 |     
184 |     main(args)
185 |     


--------------------------------------------------------------------------------
/slime_environments/agents/Sarsa/MA_SARSA.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from slime_environments.environments.SlimeEnvMultiAgent import Slime
  3 | 
  4 | import sys
  5 | import os
  6 | 
  7 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  8 | sys.path.append(parent_dir)
  9 | 
 10 | from utils.utils import read_params, save_env_image, state_to_int_map, setup, video_from_images
 11 | 
 12 | import argparse
 13 | 
 14 | import os
 15 | import json
 16 | import numpy as np
 17 | import random
 18 | from tqdm import tqdm
 19 | 
 20 | def create_agent(params:dict, l_params:dict, train_episodes:int):
 21 |     n_actions = len(l_params["actions"])
 22 |     population = params['population']
 23 |     learner_population = params['learner_population']
 24 |     
 25 |     # Q_table
 26 |     qtable = {i: np.zeros([4, n_actions]) for i in range(population, population + learner_population)}
 27 |     
 28 |     # DOC dict che tiene conto della frequenza di scelta delle action per ogni episodio {episode: {action: _, action: _, ...}}
 29 |     actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
 30 |     # DOC dict che tiene conto della frequenza di scelta delle action di ogni agent per ogni episodio {episode: {agent: {action: _, action: _, ...}}}
 31 |     action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 32 |     # DOC dict che tiene conto della reward di ogni agente per ogni episodio {episode: {agent: _}}
 33 |     reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 34 |     # DOC dict che tiene conto dela dimensioni di ogni cluster per ogni episodio
 35 |     cluster_dict = {}
 36 |     
 37 |     return qtable, actions_dict, action_dict, reward_dict, cluster_dict
 38 | 
 39 | 
 40 | def train(env, 
 41 |           params:dict, 
 42 |           l_params:dict,
 43 |           qtable:dict, 
 44 |           actions_dict:dict, 
 45 |           action_dict:dict, 
 46 |           reward_dict:dict, 
 47 |           cluster_dict:dict,
 48 |           train_episodes:int, 
 49 |           train_log_every:int, 
 50 |           alpha:float, 
 51 |           gamma:float, 
 52 |           decay:float,
 53 |           epsilon:float,
 54 |           output_file:str,
 55 |           output_dir:str):
 56 |     # TRAINING
 57 |     print("Start training...")
 58 |     
 59 |     old_s = {}  # DOC old state for each agent {agent: old_state}
 60 |     for ep in range(1, train_episodes + 1):
 61 |         env.reset()
 62 |         
 63 |         for tick in tqdm(range(1, params['episode_ticks'] + 1)):
 64 |             for agent in env.agent_iter(max_iter=params['learner_population']):
 65 |                 cur_state, reward, _, _ = env.last(agent)
 66 |                 cur_s = state_to_int_map(cur_state.observe())
 67 |                 
 68 |                 if ep == 1 and tick == 1:
 69 |                     action = env.action_space(agent).sample()
 70 |                 else:
 71 |                     old_value = qtable[agent][old_s[agent]][action]
 72 |                     next_action = None
 73 |                     
 74 |                     if random.uniform(0, 1) < epsilon:
 75 |                         # action = np.random.randint(0, 2)
 76 |                         next_action = env.action_space(agent).sample()
 77 |                     else:
 78 |                         next_action = np.argmax(qtable[agent][cur_s])
 79 |                         
 80 |                     next_value = qtable[agent][cur_s][next_action]
 81 |                     new_value = old_value + alpha * (reward + gamma * next_value - old_value)
 82 |                     qtable[agent][old_s[agent]][action] = new_value
 83 |                     
 84 |                     action = next_action
 85 |                     
 86 |                 env.step(action)
 87 |                 epsilon = epsilon_end + (epsilon - epsilon_end) * math.exp(-1. * ep * decay)
 88 |                 old_s[agent] = cur_s
 89 | 
 90 |                 actions_dict[str(ep)][str(action)] += 1
 91 |                 action_dict[str(ep)][str(agent)][str(action)] += 1
 92 |                 reward_dict[str(ep)][str(agent)] += round(reward, 2)
 93 |                 
 94 |             env.move()
 95 |             env._evaporate()
 96 |             env._diffuse()
 97 |             image = env.render()
 98 |             #print(json.dumps(action_dict, indent=2))
 99 |             
100 |             if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]:
101 |                 if not os.path.exists(os.path.join(output_dir, "images")):
102 |                         os.makedirs(os.path.join(output_dir, "images"))
103 |                 
104 |                 if ep == int(l_params["fist_saveimages_episode"]):
105 |                     save_env_image(image, tick, output_dir, "first_episode")
106 |                 elif ep == int(l_params["middle_saveimages_episode"]):
107 |                     save_env_image(image, tick, output_dir, "middle_episode")
108 |                 elif ep == int(l_params["last_saveimages_episode"]):
109 |                     save_env_image(image, tick, output_dir, "last_episode")
110 |             
111 |             elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1:
112 |                 video_from_images(output_dir, "first_episode")
113 |             elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1:
114 |                 video_from_images(output_dir, "middle_episode")
115 |             
116 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
117 |         
118 |         if ep % train_log_every == 0:
119 |             print("EPISODE: {}\tepsilon: {:.5f}".format(ep, epsilon))
120 |             
121 |             with open(output_file, 'a') as f:
122 |                 f.write(f"{ep}, {params['episode_ticks'] * ep}, {cluster_dict[str(ep)]}, {actions_dict[str(ep)]['2']}, {actions_dict[str(ep)]['0']}, {actions_dict[str(ep)]['1']}, ")
123 |                 avg_rew = 0
124 |                 
125 |                 for l in range(params['population'], params['population'] + params['learner_population']):
126 |                     avg_rew += (reward_dict[str(ep)][str(l)] / params['episode_ticks'])
127 |                     f.write(f"{action_dict[str(ep)][str(l)]['2']}, {action_dict[str(ep)][str(l)]['0']}, {action_dict[str(ep)][str(l)]['1']}, ")
128 |                 
129 |                 avg_rew /= params['learner_population']
130 |                 f.write(f"{avg_rew}\n")
131 | 
132 |     print(json.dumps(cluster_dict, indent=2))
133 |     print("Training finished!\n")
134 |     
135 |     return env, qtable, epsilon
136 | 
137 | 
138 | def eval(env,
139 |          params:dict, 
140 |          test_episodes:int,
141 |          qtable,
142 |          test_log_every:int,
143 |          epsilon:float,):
144 |     # DOC Evaluate agent's performance after SARSA
145 |     cluster_dict = {}
146 |     print("Start testing...")
147 |     
148 |     for ep in range(1, test_episodes + 1):
149 |         env.reset()
150 |         for tick in range(1, params['episode_ticks']+1):
151 |             for agent in env.agent_iter(max_iter=params['learner_population']):
152 |                 state, _, _, _ = env.last(agent)
153 |                 s = state_to_int_map(state.observe())
154 | 
155 |                 if random.uniform(0, 1) < epsilon:
156 |                     # action = np.random.randint(0, 2)
157 |                     action = env.action_space(agent).sample()
158 |                 else:
159 |                     action = np.argmax(qtable[agent][s])
160 | 
161 |                 env.step(action)
162 |             env.move()
163 |             env._evaporate()
164 |             env._diffuse()
165 |             env.render()
166 |             
167 |         if ep % test_log_every == 0:
168 |             print(f"EPISODE: {ep}")
169 |             print(f"\tepsilon: {epsilon}")
170 |             # print(f"\tepisode reward: {reward_episode}")
171 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
172 |         
173 |     print(json.dumps(cluster_dict, indent=2))
174 |     print("Testing finished!\n")
175 |     env.close()
176 | 
177 | 
178 | def main(args):
179 |     random.seed(args.random_seed)
180 |     np.random.seed(args.random_seed)
181 |     curdir = os.path.dirname(os.path.abspath(__file__))
182 |     
183 |     params, l_params = read_params(args.params_path, args.learning_params_path)
184 |     epsilon_end = l_params["epsilon_end"]
185 |     
186 |     env = Slime(render_mode="human", **params)
187 |     
188 |     output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(True, curdir, params, l_params)
189 |     
190 |     qtable, actions_dict, action_dict, reward_dict, cluster_dict = create_agent(params, l_params,train_episodes)
191 |     
192 |     env, qtable, epsilon = train(env, params, l_params, qtable, actions_dict, action_dict, reward_dict, \
193 |         cluster_dict, train_episodes, train_log_every, alpha, gamma, decay, epsilon, epsilon_end, output_file, output_dir)
194 |     
195 |     eval(env, params, test_episodes, qtable, test_log_every, epsilon)
196 | 
197 | if __name__ == "__main__":
198 |     parser = argparse.ArgumentParser()
199 |     parser.add_argument("params_path", type=str)
200 |     parser.add_argument("learning_params_path", type=str)
201 |     parser.add_argument("--random-seed", type=int, default=0)
202 |     
203 |     args = parser.parse_args()
204 |     
205 |     assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file"
206 |     assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file"
207 |     
208 |     main(args)
209 |     
210 |     


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Centralized/Centralized.py:
--------------------------------------------------------------------------------
  1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  7 | sys.path.append(parent_dir)
  8 | 
  9 | from utils.utils import positional_encoding, read_params, save_env_image, setup, update_summary, video_from_images
 10 | from utils.DQN import DQN, ReplayMemory, optimize_model, select_action
 11 | 
 12 | import argparse
 13 | 
 14 | import os
 15 | import math
 16 | import json
 17 | import random
 18 | import datetime
 19 | from collections import namedtuple
 20 | from tqdm import tqdm
 21 | 
 22 | import torch
 23 | import torch.optim as optim
 24 | from torch.optim.lr_scheduler import StepLR
 25 | 
 26 |     
 27 | def train(env, 
 28 |           params, 
 29 |           l_params, 
 30 |           device, 
 31 |           policy_net, 
 32 |           target_net, 
 33 |           train_episodes,
 34 |           train_log_every,
 35 |           output_file,
 36 |           output_dir,
 37 |           normalize,
 38 |           positional_encoding):
 39 |     Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))
 40 | 
 41 |     batch_size = l_params["batch_size"]
 42 |     learning_rate = l_params["lr"]
 43 |     epsilon_end = l_params["epsilon_end"]
 44 |     alpha = l_params["alpha"]
 45 |     gamma = l_params["gamma"]
 46 |     decay = l_params["decay"]
 47 |     n_actions = len(l_params["actions"])
 48 |     population = params['population']
 49 |     learner_population = params['learner_population']
 50 |     learner_population = params['learner_population']
 51 |     update_net_every = l_params['update_net_every']
 52 |     memory_capacity = l_params["memory_capacity"]
 53 |     
 54 |     optimizer = optim.AdamW(policy_net.parameters(), lr=learning_rate, amsgrad=True)
 55 |     scheduler = StepLR(optimizer, step_size=1, gamma=l_params["step_lr"])
 56 |     memory = ReplayMemory(Transition, memory_capacity)
 57 |     
 58 |     old_s = {}
 59 |     old_a = {}
 60 |     cluster_dict = {}
 61 |     
 62 |     actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
 63 |     action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 64 |     reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 65 |     
 66 |     if not os.path.exists(output_dir):
 67 |         os.makedirs(output_dir)
 68 |    
 69 |     max_possible_reward = (((params['episode_ticks'] - 150)/params['episode_ticks']) * params['rew']) + \
 70 |         ((params['learner_population'] / params["cluster_threshold"]) * (params['rew'] ** 2))
 71 |     max_possible_pherormone = env.lay_amount * params['learner_population'] * 5
 72 |     
 73 |     for ep in range(1, train_episodes + 1):
 74 |         env.reset()
 75 |         losses = []
 76 |         
 77 |         # Initialize the environment and get it's state
 78 |         for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"):
 79 |             for agent in env.agent_iter(max_iter=params['learner_population']):
 80 |                 next_state, reward, _, _  = env.last(agent)
 81 |                 next_state = torch.tensor(next_state.observe(), dtype=torch.float32, device=device)
 82 | 
 83 |                 if positional_encoding:
 84 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
 85 |                     pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0)
 86 |                     new_pherormone = pos_encoding + new_pherormone 
 87 |                 else:
 88 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
 89 |                 
 90 |                 #normalization is done considering all the agents in the same patch dropping at the same time pherormone
 91 |                 if normalize:
 92 |                     new_pherormone /= max_possible_pherormone
 93 |                     
 94 |                 next_state = torch.cat((torch.flatten(new_pherormone), next_state)).unsqueeze(0)
 95 |                 
 96 |                 if ep == 1 and tick == 1:
 97 |                     next_action = env.action_space(agent).sample()
 98 |                     next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0)
 99 |                 else:
100 |                     state = old_s[agent]
101 |                     action = old_a[agent]
102 |                     next_action, policy_net = select_action(env, agent, next_state, ep, policy_net, device, epsilon_end, decay)
103 |                     #normalization is done considering the max reward a single agent can receive
104 |                     reward = torch.tensor([reward], device=device) if not normalize \
105 |                         else torch.tensor([reward / max_possible_reward], device=device)
106 |                     
107 |                     
108 |                     # Store the transition in memory
109 |                     memory.push(state, action, next_state, reward)
110 |                     
111 |                     # Perform one step of the optimization (on the policy network)
112 |                     policy_net, target_net, loss_single = optimize_model(Transition, memory, policy_net, target_net, gamma, batch_size, device)
113 |                     if loss_single is not None:
114 |                         # Optimize the model
115 |                         optimizer.zero_grad()
116 |                         loss_single.backward()
117 |                         losses.append(torch.Tensor.clone(loss_single.detach()))
118 |                         
119 |                         # In-place gradient clipping
120 |                         torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
121 |                         optimizer.step()
122 |                         scheduler.step()
123 |                     
124 |                     # Soft update of the target network's weights
125 |                     # θ′ ← τ θ + (1 −τ )θ′
126 |                     if (agent + tick * learner_population + ep * params['episode_ticks'] * learner_population) % update_net_every == 0:
127 |                         target_net_state_dict = target_net.state_dict()
128 |                         policy_net_state_dict = policy_net.state_dict()
129 |                         for key in policy_net_state_dict:
130 |                             target_net_state_dict[key] = policy_net_state_dict[key] * alpha + target_net_state_dict[key] * (1 - alpha)
131 |                         target_net.load_state_dict(target_net_state_dict)
132 |                     
133 |                 env.step(next_action.item())
134 |                 old_s[agent] = next_state
135 |                 old_a[agent] = next_action
136 |                 
137 |                 policy_net.epsilon = epsilon_end + (policy_net.epsilon - epsilon_end) * math.exp(-1. * ep * decay)
138 |                 
139 |                 actions_dict[str(ep)][str(next_action.item())] += 1
140 |                 action_dict[str(ep)][str(agent)][str(next_action.item())] += 1
141 |                 reward_dict[str(ep)][str(agent)] += round(reward.item(), 2) if isinstance(reward, torch.Tensor) else round(reward, 2)                
142 |                 
143 |             env.move()
144 |             env._evaporate()
145 |             env._diffuse()
146 |             image = env.render()
147 |             
148 |             if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]:
149 |                 if not os.path.exists(os.path.join(output_dir, "images")):
150 |                         os.makedirs(os.path.join(output_dir, "images"))
151 |                 
152 |                 if ep == int(l_params["fist_saveimages_episode"]):
153 |                     save_env_image(image, tick, output_dir, "first_episode")
154 |                 elif ep == int(l_params["middle_saveimages_episode"]):
155 |                     save_env_image(image, tick, output_dir, "middle_episode")
156 |                 elif ep == int(l_params["last_saveimages_episode"]):
157 |                     save_env_image(image, tick, output_dir, "last_episode")
158 |             
159 |             elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1:
160 |                 video_from_images(output_dir, "first_episode")
161 |             elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1:
162 |                 video_from_images(output_dir, "middle_episode")
163 |             
164 |             
165 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
166 |         if ep % train_log_every == 0:
167 |             cur_lr = optimizer.param_groups[0]['lr']
168 |             print("EPISODE: {}\tepsilon: {:.5f}\tavg loss: {:.8f}\tlearning rate {:.10f}".format(ep, policy_net.epsilon, sum(losses)/len(losses), cur_lr))
169 |             update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr)
170 |                     
171 |     #print(json.dumps(cluster_dict, indent=2))
172 |     print("Training finished!\n")
173 |     video_from_images(output_dir, "last_episode")
174 |     
175 |     policy_model_name = "policy_"  + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".pth"
176 |     target_model_name = "target_"  + datetime.datetime.now().strftime("%m_%d_%Y__%H_%M_%S") + ".pth"
177 |     torch.save(policy_net.state_dict(), os.path.join(output_dir, "models", policy_model_name))
178 |     torch.save(target_net.state_dict(), os.path.join(output_dir, "models", target_model_name))
179 | 
180 |     return policy_net, env
181 | 
182 | 
183 | def test(env, params, l_params, policy_net, test_episodes, test_log_every, device, normalize, pos_enc):
184 |     cluster_dict = {}
185 |     print("[INFO] Start testing...")
186 |     
187 |     epsilon_end = l_params["epsilon_end"]
188 |     policy_net.epsilon = epsilon_test = l_params["epsilon_test"]
189 |     decay = l_params["decay"]
190 |     
191 |     max_possible_pherormone = env.lay_amount * params['learner_population'] * 5
192 |     for ep in range(1, test_episodes + 1):
193 |         env.reset()
194 |         for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"):
195 |             for agent in env.agent_iter(max_iter=params['learner_population']):
196 |                 if ep == 1 and tick == 1:
197 |                     next_action = env.action_space(agent).sample()
198 |                     next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0)
199 |                 else:
200 |                     state, reward, _, _  = env.last(agent)
201 |                     state = torch.tensor(state.observe(), dtype=torch.float32, device=device)
202 | 
203 |                     if pos_enc:
204 |                         new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
205 |                         pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0)
206 |                         new_pherormone = pos_encoding + new_pherormone 
207 |                     else:
208 |                         new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
209 |                     
210 |                     #normalization is done considering all the agents in the same patch dropping at the same time pherormone
211 |                     if normalize:
212 |                         new_pherormone /= max_possible_pherormone
213 |                     
214 |                     state = torch.cat((torch.flatten(new_pherormone), state)).unsqueeze(0)
215 |                     action, policy_net = select_action(env, agent, state, ep, policy_net, device, epsilon_end, decay)
216 |                     env.step(action)
217 |                 
218 |             env.move()
219 |             env._evaporate()
220 |             env._diffuse()
221 |             env.render()
222 |             
223 |         if ep % test_log_every == 0:
224 |             print(f"EPISODE: {ep}")
225 |             print(f"\tepsilon: {policy_net.epsilon}")
226 |             # print(f"\tepisode reward: {reward_episode}")
227 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
228 |         
229 |     print(json.dumps(cluster_dict, indent=2))
230 |     print("Testing finished!\n")
231 | 
232 | 
233 | def main(args):
234 |     random.seed(args.random_seed)
235 |     torch.manual_seed(args.random_seed)
236 |     
237 |     params, l_params = read_params(args.params_path, args.learning_params_path)
238 |     curdir = os.path.dirname(os.path.abspath(__file__))
239 |     output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(args.train, curdir, params, l_params)
240 |     env = Slime(render_mode="human", **params)    
241 |     
242 |     if not os.path.isdir(os.path.join(output_dir, "models")) and args.train:
243 |         os.makedirs(os.path.join(output_dir, "models"))
244 |     
245 |     n_actions = len(l_params["actions"])
246 |     if args.positional_encoding:
247 |         n_observations = 100
248 |     else:
249 |         n_observations = 51
250 |     
251 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
252 |     print(f"[INFO] Device selected: {device}")
253 |     
254 |     policy_net = DQN(n_observations, n_actions, epsilon).to(device)
255 |     target_net = DQN(n_observations, n_actions, epsilon).to(device)
256 |     
257 |     if args.models_path == "" or args.train:
258 |         args.model_path = output_dir
259 |     
260 |     if args.resume or args.test:
261 |         if os.path.isfile(os.path.join(args.model_path, "models", args.policy_model_name)) and \
262 |             os.path.isfile(os.path.join(args.model_path, "models", args.target_model_name)):
263 |             policy_model_path = os.path.join(args.model_path, "models", args.policy_model_name)
264 |             target_model_path = os.path.join(args.model_path, "models", args.target_model_name)
265 |             policy_net.load_state_dict(torch.load(policy_model_path), strict=False)
266 |             target_net.load_state_dict(torch.load(target_model_path), strict=False)
267 |     else:
268 |         target_net.load_state_dict(policy_net.state_dict())
269 |     
270 |     if args.train:
271 |         policy_net, env = train(env, params, l_params, device, policy_net, target_net, train_episodes, train_log_every, output_file, output_dir, args.normalize_input, args.positional_encoding)
272 |         
273 |     if args.test:
274 |         test(env, params, l_params, policy_net, test_episodes, test_log_every, device, args.normalize_input, args.positional_encoding)
275 | 
276 |     env.close()
277 |     
278 | 
279 | if __name__ == "__main__":
280 |     parser = argparse.ArgumentParser()
281 |     parser.add_argument("params_path", type=str)
282 |     parser.add_argument("learning_params_path", type=str)
283 |     parser.add_argument("--policy-model-name", type=str, default="")
284 |     parser.add_argument("--target-model-name", type=str, default="")
285 |     parser.add_argument("--models-path", type=str, default="")
286 |     parser.add_argument("--normalize-input", action="store_true")
287 |     parser.add_argument("--positional-encoding", action="store_true")
288 |     parser.add_argument("--train", action="store_true")
289 |     parser.add_argument("--test", action="store_true")
290 |     parser.add_argument("--resume", action="store_true")
291 |     parser.add_argument("--random-seed", type=int, default=0)
292 |     
293 |     args = parser.parse_args()
294 |     
295 |     assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file"
296 |     assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file"
297 |     
298 |     main(args)


--------------------------------------------------------------------------------
/slime_environments/agents/DQNet_Decentralized/Decentralized.py:
--------------------------------------------------------------------------------
  1 | from slime_environments.environments.SlimeEnvMultiAgent import Slime
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  7 | sys.path.append(parent_dir)
  8 | 
  9 | from utils.utils import read_params, save_env_image, setup, positional_encoding, update_summary, video_from_images
 10 | from utils.DQN import DQN, ReplayMemory, optimize_model, select_action
 11 | 
 12 | import argparse
 13 | 
 14 | import os
 15 | import json
 16 | import random
 17 | import datetime
 18 | from collections import namedtuple
 19 | from tqdm import tqdm
 20 | 
 21 | import torch
 22 | import torch.optim as optim
 23 | from torch.optim.lr_scheduler import StepLR
 24 |     
 25 |     
 26 | def train(env, 
 27 |           params, 
 28 |           l_params, 
 29 |           device, 
 30 |           policy_nets, 
 31 |           target_nets, 
 32 |           train_episodes,
 33 |           train_log_every,
 34 |           output_file,
 35 |           output_dir,
 36 |           normalize,
 37 |           pos_enc):
 38 |     Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))
 39 | 
 40 |     batch_size = l_params["batch_size"]
 41 |     learning_rate = l_params["lr"]
 42 |     epsilon_end = l_params["epsilon_end"]
 43 |     alpha = l_params["alpha"]
 44 |     gamma = l_params["gamma"]
 45 |     decay = l_params["decay"]
 46 |     n_actions = len(l_params["actions"])
 47 |     population = params['population']
 48 |     learner_population = params['learner_population']
 49 |     update_net_every = l_params['update_net_every']
 50 |     memory_capacity = l_params["memory_capacity"]
 51 |     
 52 |     optimizers = {i: optim.AdamW(policy_nets[i].parameters(), lr=learning_rate, amsgrad=True) for i in range(params['learner_population'])}
 53 |     schedulers = {i: StepLR(optimizers[i], step_size=1, gamma=l_params["step_lr"]) for i in range(params['learner_population'])}
 54 |     memory = {i: ReplayMemory(Transition, memory_capacity) for i in range(params['learner_population'])}
 55 |     
 56 |     old_s = {}
 57 |     old_a = {}
 58 |     cluster_dict = {}
 59 |     
 60 |     actions_dict = {str(ep): {str(ac): 0 for ac in range(n_actions)} for ep in range(1, train_episodes + 1)}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
 61 |     action_dict = {str(ep): {str(ag): {str(ac): 0 for ac in range(n_actions)} for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 62 |     reward_dict = {str(ep): {str(ag): 0 for ag in range(population, population + learner_population)} for ep in range(1, train_episodes + 1)}
 63 |     epsilon = 0
 64 |     cur_lr = 0
 65 |    
 66 |     max_possible_reward = (((params['episode_ticks'] - 150)/params['episode_ticks']) * params['rew']) + \
 67 |         ((params['learner_population'] / params["cluster_threshold"]) * (params['rew'] ** 2))
 68 |     max_possible_pherormone = env.lay_amount * params['learner_population'] * 5
 69 | 
 70 |     for ep in range(1, train_episodes + 1):
 71 |         env.reset()
 72 |         losses = []
 73 |         
 74 |         # Initialize the environment and get it's state
 75 |         for tick in tqdm(range(1, params['episode_ticks'] + 1)):
 76 |             for agent in env.agent_iter(max_iter=params['learner_population']):
 77 |                 next_state, reward, _, _  = env.last(agent)
 78 |                 next_state = torch.tensor(next_state.observe(), dtype=torch.float32, device=device)
 79 | 
 80 |                 if pos_enc:
 81 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
 82 |                     pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0)
 83 |                     new_pherormone = pos_encoding + new_pherormone 
 84 |                 else:
 85 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
 86 |                 
 87 |                 #normalization is done considering all the agents in the same patch dropping at the same time pherormone
 88 |                 if normalize:
 89 |                     new_pherormone /= max_possible_pherormone
 90 |                     
 91 |                 next_state = torch.cat((torch.flatten(new_pherormone), next_state)).unsqueeze(0)
 92 |                 
 93 |                 if ep == 1 and tick == 1:
 94 |                     next_action = env.action_space(agent).sample()
 95 |                     next_action = torch.tensor([next_action], dtype=torch.long, device=device).unsqueeze(0)
 96 |                 else:
 97 |                     state = old_s[agent]
 98 |                     action = old_a[agent]
 99 |                     next_action, policy_nets[agent] = select_action(env, agent, next_state, ep, policy_nets[agent], device, epsilon_end, decay)
100 |                     
101 |                     #normalization is done considering the max reward a single agent can receive
102 |                     reward = torch.tensor([reward], device=device) if not normalize \
103 |                         else torch.tensor([reward / max_possible_reward], device=device)
104 |                     
105 |                     # Store the transition in memory
106 |                     memory[agent].push(state, action, next_state, reward)
107 |                     
108 |                     # Perform one step of the optimization (on the policy network)
109 |                     policy_nets[agent], target_nets[agent], loss_single = optimize_model(Transition, memory[agent], policy_nets[agent], target_nets[agent], gamma, batch_size, device)
110 |                     if loss_single is not None:
111 |                         # Optimize the model
112 |                         optimizers[agent].zero_grad()
113 |                         loss_single.backward()
114 |                         losses.append(torch.Tensor.clone(loss_single.detach()))
115 |                         
116 |                         # In-place gradient clipping
117 |                         torch.nn.utils.clip_grad_value_(policy_nets[agent].parameters(), 100)
118 |                         optimizers[agent].step()
119 |                         schedulers[agent].step()
120 |                     
121 |                     # Soft update of the target network's weights
122 |                     # θ′ ← τ θ + (1 −τ )θ′
123 |                     if (agent + tick * learner_population + ep * params['episode_ticks'] * learner_population) % update_net_every == 0:
124 |                         target_net_state_dict = target_nets[agent].state_dict()
125 |                         policy_net_state_dict = policy_nets[agent].state_dict()
126 |                         for key in policy_net_state_dict:
127 |                             target_net_state_dict[key] = policy_net_state_dict[key] * alpha + target_net_state_dict[key] * (1 - alpha)
128 |                         target_nets[agent].load_state_dict(target_net_state_dict)
129 |                     
130 |                 epsilon = policy_nets[agent].epsilon
131 |                 cur_lr = optimizers[agent].param_groups[0]['lr']
132 |                     
133 |                 env.step(next_action.item())
134 |                 old_s[agent] = next_state
135 |                 old_a[agent] = next_action
136 |                 
137 |                 actions_dict[str(ep)][str(next_action.item())] += 1
138 |                 action_dict[str(ep)][str(agent)][str(next_action.item())] += 1
139 |                 reward_dict[str(ep)][str(agent)] += round(reward.item(), 2) if isinstance(reward, torch.Tensor) else round(reward, 2)                
140 |                 
141 |             env.move()
142 |             env._evaporate()
143 |             env._diffuse()
144 |             image = env.render()
145 |             
146 |             if ep in [l_params["fist_saveimages_episode"], l_params["middle_saveimages_episode"], l_params["last_saveimages_episode"]]:
147 |                 if not os.path.exists(os.path.join(output_dir, "images")):
148 |                         os.makedirs(os.path.join(output_dir, "images"))
149 |                 
150 |                 if ep == int(l_params["fist_saveimages_episode"]):
151 |                     save_env_image(image, tick, output_dir, "first_episode")
152 |                 elif ep == int(l_params["middle_saveimages_episode"]):
153 |                     save_env_image(image, tick, output_dir, "middle_episode")
154 |                 elif ep == int(l_params["last_saveimages_episode"]):
155 |                     save_env_image(image, tick, output_dir, "last_episode")
156 |             
157 |             elif ep == int(l_params["fist_saveimages_episode"]) + 1 and tick == 1:
158 |                 video_from_images(output_dir, "first_episode")
159 |             elif ep == int(l_params["middle_saveimages_episode"]) + 1 and tick == 1:
160 |                 video_from_images(output_dir, "middle_episode")
161 |             
162 |             
163 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
164 |         if ep % train_log_every == 0:
165 |             print("EPISODE: {}\tepsilon: {:.5f}\tavg loss: {:.8f}\tlearning rate {:.10f}".format(ep, epsilon, sum(losses)/len(losses), cur_lr))
166 |             update_summary(output_file, ep, params, cluster_dict, actions_dict, action_dict, reward_dict, losses, cur_lr)
167 |             
168 |                     
169 |     #print(json.dumps(cluster_dict, indent=2))
170 |     print("Training finished!\n")
171 |     video_from_images(output_dir, "last_episode")
172 |     
173 |     env.reset()
174 |     now = datetime.datetime.now()
175 |     for agent in range(params['learner_population']):
176 |         policy_model_name = os.path.join(f"policy_{agent}_"  + now.strftime("%m_%d_%Y__%H_%M_%S") + ".pth")
177 |         target_model_name = os.path.join(f"target_{agent}_"  + now.strftime("%m_%d_%Y__%H_%M_%S") + ".pth")
178 |         torch.save(policy_nets[agent].state_dict(), os.path.join(output_dir, "models", "policies", policy_model_name))
179 |         torch.save(target_nets[agent].state_dict(), os.path.join(output_dir, "models", "targets", target_model_name))
180 | 
181 |     return policy_nets, env
182 | 
183 | 
184 | def test(env, params, l_params, policy_nets, test_episodes, test_log_every, device, normalize, pos_enc):
185 |     cluster_dict = {}
186 |     print("[INFO] Start testing...")
187 |     
188 |     epsilon_end = l_params["epsilon_end"]
189 |     epsilon_test = l_params["epsilon_test"]
190 |     decay = l_params["decay"]
191 |     
192 |     max_possible_pherormone = env.lay_amount * params['learner_population'] * 5
193 |     for ep in range(1, test_episodes + 1):
194 |         env.reset()
195 |         for tick in tqdm(range(1, params['episode_ticks'] + 1), desc=f"epsilon: {policy_net.epsilon}"):
196 |             for agent in env.agent_iter(max_iter=params['learner_population']):
197 |                 if ep == 1 and tick == 1:
198 |                     policy_nets[agent].epsilon = epsilon_test
199 |                 state, reward, _, _  = env.last(agent)
200 |                 state = torch.tensor(state.observe(), dtype=torch.float32, device=device)
201 | 
202 |                 if pos_enc:
203 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
204 |                     pos_encoding = torch.tensor(positional_encoding(new_pherormone.numel(), 2), dtype=torch.float32).to(device).unsqueeze(0)
205 |                     new_pherormone = pos_encoding + new_pherormone 
206 |                 else:
207 |                     new_pherormone = torch.tensor(env.get_neighborood_chemical(agent, True).reshape(-1,1), dtype=torch.float32).to(device).unsqueeze(0)
208 |                 
209 |                 #normalization is done considering all the agents in the same patch dropping at the same time pherormone
210 |                 if normalize:
211 |                     new_pherormone /= max_possible_pherormone
212 |                     
213 |                 state = torch.cat((torch.flatten(new_pherormone), state)).unsqueeze(0)
214 |                     
215 |                 action, policy_net = select_action(env, agent, state, ep, policy_nets[agent], device, epsilon_end, decay)
216 |                 env.step(action)
217 |                 
218 |             env.move()
219 |             env._evaporate()
220 |             env._diffuse()
221 |             env.render()
222 |             
223 |         if ep % test_log_every == 0:
224 |             print(f"EPISODE: {ep}")
225 |             print(f"\tepsilon: {policy_net.epsilon}")
226 |             # print(f"\tepisode reward: {reward_episode}")
227 |         cluster_dict[str(ep)] = round(env.avg_cluster(), 2)
228 |         
229 |     print(json.dumps(cluster_dict, indent=2))
230 |     print("Testing finished!\n")
231 | 
232 | 
233 | def main(args):
234 |     random.seed(args.random_seed)
235 |     torch.manual_seed(args.random_seed)
236 |     
237 |     params, l_params = read_params(args.params_path, args.learning_params_path)
238 |     curdir = os.path.dirname(os.path.abspath(__file__))
239 |     output_dir, output_file, alpha, gamma, epsilon, decay, train_episodes, train_log_every, test_episodes, test_log_every = setup(args.train, curdir, params, l_params)
240 |     env = Slime(render_mode="human", **params)    
241 |     
242 |     if not os.path.isdir(os.path.join(output_dir, "models")) and args.train:
243 |         os.makedirs(os.path.join(output_dir, "models"))
244 |         
245 |     if not os.path.isdir(os.path.join(output_dir, "models", "policies")) and args.train:
246 |         os.makedirs(os.path.join(output_dir, "models", "policies"))
247 |         
248 |     if not os.path.isdir(os.path.join(output_dir, "models", "targets")) and args.train:
249 |         os.makedirs(os.path.join(output_dir, "models", "targets"))
250 |     
251 |     n_actions = len(l_params["actions"])
252 |     if args.positional_encoding:
253 |         n_observations = 100
254 |     else:
255 |         n_observations = 51
256 |     
257 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
258 |     print(f"[INFO] Device selected: {device}")
259 |     
260 |     population = params['population']
261 |     learner_population = params['learner_population']
262 |     policy_nets = {ag: DQN(n_observations, n_actions, epsilon).to(device) for ag in range(population, population + learner_population)}
263 |     target_nets = {ag: DQN(n_observations, n_actions, epsilon).to(device) for ag in range(population, population + learner_population)}
264 |     
265 |     if args.models_path == "" or args.train:
266 |         args.model_path = output_dir
267 |         
268 |     policies_path = os.path.join(args.models_path, "models", "policies")
269 |     targets_path = os.path.join(args.models_path, "models", "targets")
270 |     if args.resume or args.test:
271 |         if os.path.exists(policies_path) and os.path.exists(targets_path):
272 |             policies = [os.path.join(root, file) for root, dirs, files in os.walk(policies_path) for file in files if os.path.isfile(os.path.join(root, file))]
273 |             targets = [os.path.join(root, file) for root, dirs, files in os.walk(targets_path) for file in files if os.path.isfile(os.path.join(root, file))]
274 | 
275 |             assert len(policies) == params['learner_population'], f"policies weights {len(policies)} and learner population {params['learner_population']} are different!"
276 |             assert len(targets) == params['learner_population'], f"targets weights {len(targets)} and learner population {params['learner_population']} are different!"
277 |             
278 |             for i, file in enumerate(policies):
279 |                 policy_nets[i].load_state_dict(torch.load(file), strict=False)
280 |             
281 |             for i, file in enumerate(targets):
282 |                 target_nets[i].load_state_dict(torch.load(file), strict=False)
283 |     else:
284 |         for ag in range(population, population + learner_population):
285 |             target_nets[ag].load_state_dict(policy_nets[ag].state_dict())
286 |     
287 |     if args.train:
288 |         policy_nets, env = train(env, params, l_params, device, policy_nets, target_nets, train_episodes, train_log_every, output_file, output_dir, args.normalize_input, args.positional_encoding)
289 |         
290 |     if args.test:
291 |         test(env, params, l_params, policy_nets, test_episodes, test_log_every, device, args.normalize_input, args.positional_encoding)
292 | 
293 |     env.close()
294 |     
295 | 
296 | if __name__ == "__main__":
297 |     parser = argparse.ArgumentParser()
298 |     parser.add_argument("params_path", type=str)
299 |     parser.add_argument("learning_params_path", type=str)
300 |     parser.add_argument("--policy-model-name", type=str, default="")
301 |     parser.add_argument("--target-model-name", type=str, default="")
302 |     parser.add_argument("--models-path", type=str, default="")
303 |     parser.add_argument("--normalize-input", action="store_true")
304 |     parser.add_argument("--positional-encoding", action="store_true")
305 |     parser.add_argument("--train", action="store_true")
306 |     parser.add_argument("--test", action="store_true")
307 |     parser.add_argument("--resume", action="store_true")
308 |     parser.add_argument("--random-seed", type=int, default=0)
309 |     
310 |     args = parser.parse_args()
311 |     
312 |     assert args.params_path != "" and os.path.isfile(args.params_path) and args.params_path.endswith(".json"), "[ERROR] params path is empty or is not a file or is not a json file"
313 |     assert args.learning_params_path != "" and os.path.isfile(args.learning_params_path) and args.learning_params_path.endswith(".json"), "[ERROR] learning params path is empty or is not a file or is not a json file"
314 |     
315 |     main(args)


--------------------------------------------------------------------------------
/slime_environments/agents/QLearning/runs/multi_test_01_06_10_2023__21_12_57.csv:
--------------------------------------------------------------------------------
 1 | {
 2 |   "population": 0,
 3 |   "learner_population": 100,
 4 |   "sniff_threshold": 0.9,
 5 |   "diffuse_area": 2,
 6 |   "diffuse_mode": "cascade",
 7 |   "follow_mode": "prob",
 8 |   "smell_area": 3,
 9 |   "lay_area": 1,
10 |   "lay_amount": 3,
11 |   "evaporation": 0.9,
12 |   "cluster_threshold": 30,
13 |   "cluster_radius": 5,
14 |   "rew": 100,
15 |   "penalty": -1,
16 |   "episode_ticks": 500,
17 |   "W": 66,
18 |   "H": 38,
19 |   "PATCH_SIZE": 20,
20 |   "TURTLE_SIZE": 16,
21 |   "FPS": 30,
22 |   "SHADE_STRENGTH": 10,
23 |   "SHOW_CHEM_TEXT": false,
24 |   "CLUSTER_FONT_SIZE": 12,
25 |   "CHEMICAL_FONT_SIZE": 8,
26 |   "gui": true
27 | }
28 | ----------
29 | TRAIN_EPISODES = 100
30 | TEST_EPISODES = 10
31 | ----------
32 | alpha = 0.2
33 | gamma = 0.8
34 | epsilon = 0.9
35 | decay = 0.9995
36 | ----------
37 | Episode, Tick, Avg cluster size X tick, move-toward-chemical, random-walk, drop-chemical, (learner 0)-move-toward-chemical, (learner 0)-random-walk, (learner 0)-drop-chemical, (learner 1)-move-toward-chemical, (learner 1)-random-walk, (learner 1)-drop-chemical, (learner 2)-move-toward-chemical, (learner 2)-random-walk, (learner 2)-drop-chemical, (learner 3)-move-toward-chemical, (learner 3)-random-walk, (learner 3)-drop-chemical, (learner 4)-move-toward-chemical, (learner 4)-random-walk, (learner 4)-drop-chemical, (learner 5)-move-toward-chemical, (learner 5)-random-walk, (learner 5)-drop-chemical, (learner 6)-move-toward-chemical, (learner 6)-random-walk, (learner 6)-drop-chemical, (learner 7)-move-toward-chemical, (learner 7)-random-walk, (learner 7)-drop-chemical, (learner 8)-move-toward-chemical, (learner 8)-random-walk, (learner 8)-drop-chemical, (learner 9)-move-toward-chemical, (learner 9)-random-walk, (learner 9)-drop-chemical, (learner 10)-move-toward-chemical, (learner 10)-random-walk, (learner 10)-drop-chemical, (learner 11)-move-toward-chemical, (learner 11)-random-walk, (learner 11)-drop-chemical, (learner 12)-move-toward-chemical, (learner 12)-random-walk, (learner 12)-drop-chemical, (learner 13)-move-toward-chemical, (learner 13)-random-walk, (learner 13)-drop-chemical, (learner 14)-move-toward-chemical, (learner 14)-random-walk, (learner 14)-drop-chemical, (learner 15)-move-toward-chemical, (learner 15)-random-walk, (learner 15)-drop-chemical, (learner 16)-move-toward-chemical, (learner 16)-random-walk, (learner 16)-drop-chemical, (learner 17)-move-toward-chemical, (learner 17)-random-walk, (learner 17)-drop-chemical, (learner 18)-move-toward-chemical, (learner 18)-random-walk, (learner 18)-drop-chemical, (learner 19)-move-toward-chemical, (learner 19)-random-walk, (learner 19)-drop-chemical, (learner 20)-move-toward-chemical, (learner 20)-random-walk, (learner 20)-drop-chemical, (learner 21)-move-toward-chemical, (learner 21)-random-walk, (learner 21)-drop-chemical, (learner 22)-move-toward-chemical, (learner 22)-random-walk, (learner 22)-drop-chemical, (learner 23)-move-toward-chemical, (learner 23)-random-walk, (learner 23)-drop-chemical, (learner 24)-move-toward-chemical, (learner 24)-random-walk, (learner 24)-drop-chemical, (learner 25)-move-toward-chemical, (learner 25)-random-walk, (learner 25)-drop-chemical, (learner 26)-move-toward-chemical, (learner 26)-random-walk, (learner 26)-drop-chemical, (learner 27)-move-toward-chemical, (learner 27)-random-walk, (learner 27)-drop-chemical, (learner 28)-move-toward-chemical, (learner 28)-random-walk, (learner 28)-drop-chemical, (learner 29)-move-toward-chemical, (learner 29)-random-walk, (learner 29)-drop-chemical, (learner 30)-move-toward-chemical, (learner 30)-random-walk, (learner 30)-drop-chemical, (learner 31)-move-toward-chemical, (learner 31)-random-walk, (learner 31)-drop-chemical, (learner 32)-move-toward-chemical, (learner 32)-random-walk, (learner 32)-drop-chemical, (learner 33)-move-toward-chemical, (learner 33)-random-walk, (learner 33)-drop-chemical, (learner 34)-move-toward-chemical, (learner 34)-random-walk, (learner 34)-drop-chemical, (learner 35)-move-toward-chemical, (learner 35)-random-walk, (learner 35)-drop-chemical, (learner 36)-move-toward-chemical, (learner 36)-random-walk, (learner 36)-drop-chemical, (learner 37)-move-toward-chemical, (learner 37)-random-walk, (learner 37)-drop-chemical, (learner 38)-move-toward-chemical, (learner 38)-random-walk, (learner 38)-drop-chemical, (learner 39)-move-toward-chemical, (learner 39)-random-walk, (learner 39)-drop-chemical, (learner 40)-move-toward-chemical, (learner 40)-random-walk, (learner 40)-drop-chemical, (learner 41)-move-toward-chemical, (learner 41)-random-walk, (learner 41)-drop-chemical, (learner 42)-move-toward-chemical, (learner 42)-random-walk, (learner 42)-drop-chemical, (learner 43)-move-toward-chemical, (learner 43)-random-walk, (learner 43)-drop-chemical, (learner 44)-move-toward-chemical, (learner 44)-random-walk, (learner 44)-drop-chemical, (learner 45)-move-toward-chemical, (learner 45)-random-walk, (learner 45)-drop-chemical, (learner 46)-move-toward-chemical, (learner 46)-random-walk, (learner 46)-drop-chemical, (learner 47)-move-toward-chemical, (learner 47)-random-walk, (learner 47)-drop-chemical, (learner 48)-move-toward-chemical, (learner 48)-random-walk, (learner 48)-drop-chemical, (learner 49)-move-toward-chemical, (learner 49)-random-walk, (learner 49)-drop-chemical, (learner 50)-move-toward-chemical, (learner 50)-random-walk, (learner 50)-drop-chemical, (learner 51)-move-toward-chemical, (learner 51)-random-walk, (learner 51)-drop-chemical, (learner 52)-move-toward-chemical, (learner 52)-random-walk, (learner 52)-drop-chemical, (learner 53)-move-toward-chemical, (learner 53)-random-walk, (learner 53)-drop-chemical, (learner 54)-move-toward-chemical, (learner 54)-random-walk, (learner 54)-drop-chemical, (learner 55)-move-toward-chemical, (learner 55)-random-walk, (learner 55)-drop-chemical, (learner 56)-move-toward-chemical, (learner 56)-random-walk, (learner 56)-drop-chemical, (learner 57)-move-toward-chemical, (learner 57)-random-walk, (learner 57)-drop-chemical, (learner 58)-move-toward-chemical, (learner 58)-random-walk, (learner 58)-drop-chemical, (learner 59)-move-toward-chemical, (learner 59)-random-walk, (learner 59)-drop-chemical, (learner 60)-move-toward-chemical, (learner 60)-random-walk, (learner 60)-drop-chemical, (learner 61)-move-toward-chemical, (learner 61)-random-walk, (learner 61)-drop-chemical, (learner 62)-move-toward-chemical, (learner 62)-random-walk, (learner 62)-drop-chemical, (learner 63)-move-toward-chemical, (learner 63)-random-walk, (learner 63)-drop-chemical, (learner 64)-move-toward-chemical, (learner 64)-random-walk, (learner 64)-drop-chemical, (learner 65)-move-toward-chemical, (learner 65)-random-walk, (learner 65)-drop-chemical, (learner 66)-move-toward-chemical, (learner 66)-random-walk, (learner 66)-drop-chemical, (learner 67)-move-toward-chemical, (learner 67)-random-walk, (learner 67)-drop-chemical, (learner 68)-move-toward-chemical, (learner 68)-random-walk, (learner 68)-drop-chemical, (learner 69)-move-toward-chemical, (learner 69)-random-walk, (learner 69)-drop-chemical, (learner 70)-move-toward-chemical, (learner 70)-random-walk, (learner 70)-drop-chemical, (learner 71)-move-toward-chemical, (learner 71)-random-walk, (learner 71)-drop-chemical, (learner 72)-move-toward-chemical, (learner 72)-random-walk, (learner 72)-drop-chemical, (learner 73)-move-toward-chemical, (learner 73)-random-walk, (learner 73)-drop-chemical, (learner 74)-move-toward-chemical, (learner 74)-random-walk, (learner 74)-drop-chemical, (learner 75)-move-toward-chemical, (learner 75)-random-walk, (learner 75)-drop-chemical, (learner 76)-move-toward-chemical, (learner 76)-random-walk, (learner 76)-drop-chemical, (learner 77)-move-toward-chemical, (learner 77)-random-walk, (learner 77)-drop-chemical, (learner 78)-move-toward-chemical, (learner 78)-random-walk, (learner 78)-drop-chemical, (learner 79)-move-toward-chemical, (learner 79)-random-walk, (learner 79)-drop-chemical, (learner 80)-move-toward-chemical, (learner 80)-random-walk, (learner 80)-drop-chemical, (learner 81)-move-toward-chemical, (learner 81)-random-walk, (learner 81)-drop-chemical, (learner 82)-move-toward-chemical, (learner 82)-random-walk, (learner 82)-drop-chemical, (learner 83)-move-toward-chemical, (learner 83)-random-walk, (learner 83)-drop-chemical, (learner 84)-move-toward-chemical, (learner 84)-random-walk, (learner 84)-drop-chemical, (learner 85)-move-toward-chemical, (learner 85)-random-walk, (learner 85)-drop-chemical, (learner 86)-move-toward-chemical, (learner 86)-random-walk, (learner 86)-drop-chemical, (learner 87)-move-toward-chemical, (learner 87)-random-walk, (learner 87)-drop-chemical, (learner 88)-move-toward-chemical, (learner 88)-random-walk, (learner 88)-drop-chemical, (learner 89)-move-toward-chemical, (learner 89)-random-walk, (learner 89)-drop-chemical, (learner 90)-move-toward-chemical, (learner 90)-random-walk, (learner 90)-drop-chemical, (learner 91)-move-toward-chemical, (learner 91)-random-walk, (learner 91)-drop-chemical, (learner 92)-move-toward-chemical, (learner 92)-random-walk, (learner 92)-drop-chemical, (learner 93)-move-toward-chemical, (learner 93)-random-walk, (learner 93)-drop-chemical, (learner 94)-move-toward-chemical, (learner 94)-random-walk, (learner 94)-drop-chemical, (learner 95)-move-toward-chemical, (learner 95)-random-walk, (learner 95)-drop-chemical, (learner 96)-move-toward-chemical, (learner 96)-random-walk, (learner 96)-drop-chemical, (learner 97)-move-toward-chemical, (learner 97)-random-walk, (learner 97)-drop-chemical, (learner 98)-move-toward-chemical, (learner 98)-random-walk, (learner 98)-drop-chemical, (learner 99)-move-toward-chemical, (learner 99)-random-walk, (learner 99)-drop-chemical, Avg reward X episode
38 | 10, 5000, 9.71, 16763, 16714, 16523, 184, 158, 158, 172, 169, 159, 163, 163, 174, 155, 178, 167, 160, 174, 166, 151, 185, 164, 179, 173, 148, 159, 167, 174, 147, 157, 196, 181, 153, 166, 175, 164, 161, 152, 164, 184, 172, 185, 143, 179, 163, 158, 172, 169, 159, 155, 170, 175, 152, 177, 171, 164, 193, 143, 182, 163, 155, 171, 166, 163, 173, 148, 179, 156, 174, 170, 165, 163, 172, 174, 156, 170, 164, 164, 172, 163, 163, 174, 189, 158, 153, 179, 159, 162, 157, 172, 171, 152, 168, 180, 180, 172, 148, 158, 170, 172, 179, 167, 154, 176, 163, 161, 160, 168, 172, 152, 186, 162, 153, 184, 163, 178, 159, 163, 188, 159, 153, 182, 174, 144, 178, 153, 169, 154, 183, 163, 155, 166, 179, 153, 166, 181, 177, 150, 173, 171, 161, 168, 169, 178, 153, 163, 175, 162, 167, 180, 153, 179, 158, 163, 169, 172, 159, 173, 162, 165, 174, 165, 161, 160, 173, 167, 164, 182, 154, 172, 158, 170, 158, 163, 179, 178, 151, 171, 159, 180, 161, 154, 185, 161, 163, 175, 162, 190, 156, 154, 184, 158, 158, 172, 167, 161, 164, 166, 170, 163, 159, 178, 181, 191, 128, 169, 168, 163, 157, 149, 194, 178, 167, 155, 148, 169, 183, 171, 175, 154, 175, 148, 177, 166, 183, 151, 177, 158, 165, 167, 157, 176, 175, 172, 153, 173, 178, 149, 150, 181, 169, 157, 169, 174, 157, 181, 162, 180, 162, 158, 169, 159, 172, 169, 151, 180, 172, 172, 156, 171, 164, 165, 178, 150, 172, 153, 184, 163, 156, 170, 174, 170, 172, 158, 145, 178, 177, 172, 152, 176, 177, 159, 164, 178, 151, 171, 173, 180, 147, 159, 157, 184, 180, 147, 173, 153, 173, 174, 164, 168, 168, 177, 159, 164, 3342.5985687999996
39 | 20, 10000, 7.02, 16622, 16673, 16705, 158, 163, 179, 157, 171, 172, 180, 171, 149, 167, 167, 166, 152, 156, 192, 173, 161, 166, 166, 170, 164, 160, 159, 181, 178, 164, 158, 176, 170, 154, 173, 154, 173, 168, 161, 171, 161, 166, 173, 157, 176, 167, 149, 177, 174, 174, 158, 168, 166, 173, 161, 182, 151, 167, 169, 172, 159, 165, 182, 153, 166, 157, 177, 195, 157, 148, 165, 166, 169, 181, 159, 160, 188, 147, 165, 172, 162, 166, 168, 158, 174, 158, 178, 164, 181, 152, 167, 185, 156, 159, 166, 163, 171, 148, 179, 173, 182, 147, 171, 171, 163, 166, 170, 171, 159, 147, 168, 185, 177, 149, 174, 156, 170, 174, 169, 196, 135, 135, 186, 179, 174, 165, 161, 153, 169, 178, 190, 152, 158, 152, 176, 172, 169, 169, 162, 179, 160, 161, 157, 162, 181, 163, 191, 146, 156, 137, 207, 173, 171, 156, 164, 153, 183, 147, 182, 171, 166, 176, 158, 164, 163, 173, 156, 184, 160, 162, 166, 172, 183, 150, 167, 164, 178, 158, 172, 153, 175, 163, 145, 192, 152, 160, 188, 169, 161, 170, 155, 162, 183, 145, 169, 186, 154, 176, 170, 159, 140, 201, 138, 182, 180, 188, 155, 157, 145, 182, 173, 165, 165, 170, 163, 182, 155, 181, 164, 155, 169, 155, 176, 170, 174, 156, 166, 172, 162, 163, 158, 179, 174, 165, 161, 165, 178, 157, 176, 161, 163, 150, 184, 166, 163, 186, 151, 176, 168, 156, 164, 163, 173, 169, 167, 164, 159, 159, 182, 151, 190, 159, 167, 158, 175, 167, 154, 179, 161, 190, 149, 172, 182, 146, 153, 165, 182, 171, 167, 162, 197, 145, 158, 173, 176, 151, 167, 184, 149, 169, 182, 149, 158, 168, 174, 172, 171, 157, 175, 174, 151, 173, 171, 156, 3057.8800402000006
40 | 30, 15000, 9.1, 16844, 16722, 16434, 150, 174, 176, 168, 187, 145, 154, 180, 166, 177, 173, 150, 174, 171, 155, 185, 154, 161, 160, 181, 159, 170, 170, 160, 142, 177, 181, 166, 163, 171, 174, 164, 162, 157, 176, 167, 159, 174, 167, 153, 180, 167, 142, 173, 185, 160, 175, 165, 160, 156, 184, 154, 169, 177, 160, 144, 196, 164, 177, 159, 175, 163, 162, 166, 181, 153, 174, 156, 170, 163, 158, 179, 185, 158, 157, 179, 154, 167, 151, 167, 182, 169, 172, 159, 186, 175, 139, 194, 156, 150, 170, 169, 161, 151, 183, 166, 179, 168, 153, 183, 164, 153, 177, 170, 153, 161, 175, 164, 169, 171, 160, 176, 167, 157, 154, 175, 171, 175, 163, 162, 168, 167, 165, 173, 169, 158, 174, 170, 156, 176, 152, 172, 166, 187, 147, 151, 160, 189, 174, 145, 181, 160, 180, 160, 171, 158, 171, 150, 180, 170, 187, 148, 165, 161, 183, 156, 175, 168, 157, 170, 175, 155, 185, 138, 177, 170, 166, 164, 178, 156, 166, 176, 148, 176, 153, 173, 174, 165, 161, 174, 166, 158, 176, 167, 148, 185, 162, 188, 150, 169, 164, 167, 154, 190, 156, 177, 168, 155, 158, 187, 155, 168, 155, 177, 178, 175, 147, 162, 184, 154, 184, 149, 167, 150, 187, 163, 165, 167, 168, 188, 153, 159, 177, 171, 152, 172, 154, 174, 144, 169, 187, 185, 165, 150, 177, 164, 159, 173, 161, 166, 175, 156, 169, 148, 179, 173, 166, 177, 157, 165, 167, 168, 163, 179, 158, 172, 162, 166, 174, 178, 148, 178, 174, 148, 163, 161, 176, 171, 160, 169, 182, 155, 163, 179, 171, 150, 187, 155, 158, 155, 171, 174, 178, 166, 156, 172, 171, 157, 179, 154, 167, 157, 161, 182, 175, 157, 168, 180, 164, 156, 2968.114642999999
41 | 40, 20000, 7.43, 16595, 16744, 16661, 163, 162, 175, 175, 159, 166, 170, 169, 161, 154, 161, 185, 163, 189, 148, 147, 195, 158, 179, 156, 165, 141, 182, 177, 177, 159, 164, 177, 180, 143, 157, 189, 154, 155, 168, 177, 178, 159, 163, 176, 157, 167, 148, 174, 178, 157, 157, 186, 170, 166, 164, 190, 149, 161, 159, 163, 178, 170, 185, 145, 164, 175, 161, 182, 161, 157, 183, 151, 166, 141, 180, 179, 175, 152, 173, 165, 171, 164, 177, 160, 163, 147, 170, 183, 166, 163, 171, 182, 153, 165, 179, 153, 168, 172, 165, 163, 147, 184, 169, 169, 149, 182, 172, 151, 177, 169, 173, 158, 167, 178, 155, 158, 158, 184, 155, 177, 168, 175, 170, 155, 176, 165, 159, 170, 162, 168, 157, 181, 162, 160, 171, 169, 161, 177, 162, 164, 177, 159, 160, 159, 181, 153, 165, 182, 156, 171, 173, 163, 180, 157, 156, 163, 181, 190, 150, 160, 152, 174, 174, 165, 167, 168, 157, 179, 164, 171, 170, 159, 168, 165, 167, 158, 187, 155, 141, 188, 171, 181, 152, 167, 160, 154, 186, 157, 150, 193, 180, 155, 165, 191, 167, 142, 154, 168, 178, 170, 166, 164, 176, 171, 153, 168, 161, 171, 169, 164, 167, 168, 165, 167, 182, 155, 163, 144, 178, 178, 157, 178, 165, 174, 146, 180, 158, 176, 166, 161, 168, 171, 165, 196, 139, 163, 179, 158, 167, 170, 163, 155, 176, 169, 171, 170, 159, 200, 147, 153, 172, 171, 157, 159, 168, 173, 186, 154, 160, 180, 163, 157, 173, 170, 157, 157, 181, 162, 142, 168, 190, 164, 176, 160, 161, 161, 178, 168, 152, 180, 171, 146, 183, 153, 166, 181, 208, 156, 136, 149, 186, 165, 167, 173, 160, 157, 174, 169, 164, 166, 170, 164, 177, 159, 3241.0066775999985
42 | 50, 25000, 10.23, 16754, 16729, 16517, 161, 178, 161, 163, 167, 170, 152, 162, 186, 161, 159, 180, 160, 164, 176, 160, 165, 175, 187, 125, 188, 175, 166, 159, 161, 159, 180, 170, 158, 172, 159, 169, 172, 181, 152, 167, 158, 188, 154, 169, 164, 167, 152, 177, 171, 176, 158, 166, 182, 151, 167, 169, 162, 169, 179, 159, 162, 166, 180, 154, 158, 179, 163, 177, 175, 148, 129, 172, 199, 156, 181, 163, 170, 157, 173, 183, 132, 185, 154, 191, 155, 166, 189, 145, 169, 177, 154, 194, 165, 141, 172, 159, 169, 183, 154, 163, 179, 161, 160, 149, 181, 170, 174, 175, 151, 170, 168, 162, 170, 161, 169, 150, 198, 152, 165, 168, 167, 177, 158, 165, 155, 169, 176, 178, 142, 180, 165, 163, 172, 179, 154, 167, 176, 159, 165, 182, 173, 145, 163, 173, 164, 157, 176, 167, 171, 176, 153, 148, 188, 164, 159, 171, 170, 163, 158, 179, 180, 154, 166, 192, 140, 168, 170, 165, 165, 172, 167, 161, 183, 170, 147, 171, 157, 172, 169, 188, 143, 165, 165, 170, 155, 168, 177, 161, 169, 170, 165, 157, 178, 157, 174, 169, 164, 172, 164, 165, 167, 168, 164, 181, 155, 199, 148, 153, 181, 165, 154, 168, 178, 154, 152, 179, 169, 149, 157, 194, 147, 168, 185, 175, 168, 157, 160, 177, 163, 161, 176, 163, 157, 157, 186, 178, 162, 160, 158, 180, 162, 153, 172, 175, 182, 156, 162, 163, 157, 180, 178, 177, 145, 164, 178, 158, 158, 179, 163, 156, 178, 166, 176, 162, 162, 156, 191, 153, 175, 162, 163, 177, 162, 161, 203, 152, 145, 177, 145, 178, 178, 163, 159, 162, 191, 147, 173, 180, 147, 165, 171, 164, 160, 155, 185, 159, 179, 162, 174, 172, 154, 165, 172, 163, 3185.480032000001
43 | 60, 30000, 7.98, 16676, 16745, 16579, 147, 164, 189, 174, 148, 178, 171, 174, 155, 194, 153, 153, 166, 177, 157, 198, 160, 142, 154, 183, 163, 158, 183, 159, 159, 168, 173, 151, 193, 156, 203, 150, 147, 156, 174, 170, 177, 176, 147, 159, 177, 164, 153, 163, 184, 166, 157, 177, 154, 166, 180, 192, 147, 161, 181, 166, 153, 173, 164, 163, 177, 166, 157, 140, 168, 192, 178, 156, 166, 173, 170, 157, 160, 142, 198, 172, 184, 144, 189, 168, 143, 175, 171, 154, 172, 163, 165, 169, 159, 172, 184, 157, 159, 178, 173, 149, 172, 164, 164, 145, 191, 164, 156, 170, 174, 170, 156, 174, 159, 191, 150, 184, 155, 161, 161, 171, 168, 178, 149, 173, 151, 188, 161, 160, 182, 158, 169, 164, 167, 172, 172, 156, 156, 162, 182, 151, 184, 165, 161, 182, 157, 174, 157, 169, 156, 181, 163, 151, 176, 173, 156, 174, 170, 172, 152, 176, 180, 136, 184, 173, 173, 154, 179, 157, 164, 177, 165, 158, 154, 182, 164, 155, 176, 169, 163, 175, 162, 161, 162, 177, 176, 167, 157, 179, 165, 156, 143, 176, 181, 168, 181, 151, 164, 171, 165, 163, 157, 180, 171, 174, 155, 172, 158, 170, 154, 163, 183, 175, 179, 146, 182, 165, 153, 164, 169, 167, 182, 145, 173, 168, 172, 160, 159, 179, 162, 161, 161, 178, 160, 155, 185, 179, 151, 170, 145, 173, 182, 172, 165, 163, 198, 153, 149, 155, 159, 186, 142, 186, 172, 175, 178, 147, 167, 167, 166, 150, 161, 189, 162, 183, 155, 159, 169, 172, 152, 169, 179, 131, 185, 184, 162, 176, 162, 191, 144, 165, 170, 163, 167, 161, 157, 182, 152, 167, 181, 163, 169, 168, 181, 147, 172, 156, 179, 165, 180, 167, 153, 183, 173, 144, 3104.9011563999998
44 | 70, 35000, 6.53, 16537, 16650, 16813, 161, 161, 178, 185, 172, 143, 153, 160, 187, 167, 193, 140, 154, 190, 156, 171, 167, 162, 159, 181, 160, 168, 165, 167, 161, 162, 177, 178, 178, 144, 166, 149, 185, 164, 171, 165, 161, 155, 184, 191, 169, 140, 173, 147, 180, 158, 171, 171, 157, 168, 175, 151, 171, 178, 163, 180, 157, 157, 160, 183, 153, 173, 174, 182, 158, 160, 183, 156, 161, 169, 149, 182, 166, 173, 161, 170, 161, 169, 166, 167, 167, 165, 171, 164, 181, 155, 164, 161, 161, 178, 172, 167, 161, 170, 155, 175, 156, 181, 163, 160, 160, 180, 167, 167, 166, 145, 188, 167, 169, 166, 165, 186, 152, 162, 162, 199, 139, 168, 165, 167, 166, 187, 147, 141, 174, 185, 153, 164, 183, 159, 164, 177, 170, 155, 175, 139, 174, 187, 158, 161, 181, 164, 165, 171, 168, 174, 158, 160, 157, 183, 165, 180, 155, 182, 165, 153, 156, 174, 170, 167, 167, 166, 144, 158, 198, 164, 162, 174, 173, 175, 152, 168, 161, 171, 169, 168, 163, 162, 187, 151, 169, 160, 171, 157, 147, 196, 160, 172, 168, 152, 161, 187, 163, 151, 186, 178, 162, 160, 152, 197, 151, 176, 163, 161, 175, 149, 176, 181, 166, 153, 151, 175, 174, 188, 161, 151, 136, 171, 193, 154, 174, 172, 165, 180, 155, 169, 158, 173, 187, 155, 158, 185, 133, 182, 149, 199, 152, 187, 145, 168, 169, 195, 136, 180, 153, 167, 175, 156, 169, 147, 171, 182, 155, 149, 196, 187, 143, 170, 182, 158, 160, 146, 162, 192, 167, 169, 164, 155, 158, 187, 186, 146, 168, 143, 197, 160, 157, 180, 163, 162, 163, 175, 169, 178, 153, 185, 145, 170, 159, 188, 153, 174, 183, 143, 156, 154, 190, 172, 157, 171, 2945.1333222000003
45 | 80, 40000, 10.3, 16641, 16639, 16720, 171, 175, 154, 161, 169, 170, 164, 165, 171, 167, 154, 179, 169, 179, 152, 181, 157, 162, 162, 163, 175, 150, 178, 172, 151, 172, 177, 164, 160, 176, 168, 157, 175, 162, 166, 172, 189, 152, 159, 172, 158, 170, 177, 176, 147, 174, 158, 168, 199, 150, 151, 162, 158, 180, 172, 173, 155, 155, 178, 167, 147, 192, 161, 166, 175, 159, 171, 181, 148, 167, 171, 162, 178, 171, 151, 184, 154, 162, 163, 165, 172, 172, 149, 179, 161, 171, 168, 162, 175, 163, 175, 161, 164, 156, 182, 162, 162, 168, 170, 176, 155, 169, 162, 172, 166, 148, 157, 195, 149, 182, 169, 177, 180, 143, 176, 169, 155, 169, 179, 152, 180, 165, 155, 154, 188, 158, 165, 179, 156, 159, 162, 179, 165, 165, 170, 159, 176, 165, 137, 172, 191, 161, 151, 188, 161, 167, 172, 173, 158, 169, 171, 154, 175, 161, 167, 172, 157, 168, 175, 163, 152, 185, 169, 166, 165, 187, 152, 161, 188, 131, 181, 173, 167, 160, 185, 160, 155, 155, 195, 150, 171, 178, 151, 184, 167, 149, 149, 177, 174, 166, 176, 158, 171, 156, 173, 141, 163, 196, 150, 159, 191, 171, 176, 153, 168, 173, 159, 163, 169, 168, 177, 162, 161, 150, 182, 168, 188, 163, 149, 186, 146, 168, 185, 162, 153, 147, 168, 185, 156, 165, 179, 166, 169, 165, 164, 178, 158, 162, 144, 194, 159, 175, 166, 173, 153, 174, 161, 184, 155, 152, 168, 180, 176, 144, 180, 158, 172, 170, 160, 157, 183, 178, 147, 175, 169, 167, 164, 163, 173, 164, 174, 160, 166, 167, 159, 174, 150, 184, 166, 158, 173, 169, 169, 158, 173, 171, 158, 171, 172, 169, 159, 162, 197, 141, 170, 161, 169, 170, 145, 185, 3405.7440263999997
46 | 90, 45000, 8.04, 16675, 16806, 16519, 169, 163, 168, 167, 165, 168, 168, 156, 176, 176, 169, 155, 168, 168, 164, 159, 155, 186, 176, 152, 172, 142, 174, 184, 172, 172, 156, 182, 164, 154, 162, 167, 171, 158, 185, 157, 144, 173, 183, 173, 171, 156, 184, 182, 134, 157, 160, 183, 159, 186, 155, 163, 173, 164, 158, 186, 156, 167, 159, 174, 174, 170, 156, 180, 156, 164, 191, 147, 162, 190, 163, 147, 162, 153, 185, 166, 168, 166, 159, 166, 175, 153, 158, 189, 164, 173, 163, 161, 187, 152, 190, 152, 158, 172, 177, 151, 177, 172, 151, 199, 156, 145, 160, 200, 140, 163, 176, 161, 161, 174, 165, 176, 169, 155, 172, 158, 170, 175, 174, 151, 174, 167, 159, 166, 166, 168, 190, 153, 157, 161, 172, 167, 166, 181, 153, 160, 170, 170, 164, 183, 153, 165, 160, 175, 170, 167, 163, 164, 157, 179, 166, 155, 179, 161, 172, 167, 164, 174, 162, 158, 179, 163, 176, 147, 177, 146, 177, 177, 170, 150, 180, 153, 175, 172, 167, 169, 164, 167, 161, 172, 147, 171, 182, 169, 179, 152, 149, 179, 172, 167, 163, 170, 170, 172, 158, 164, 165, 171, 178, 166, 156, 167, 168, 165, 164, 153, 183, 175, 155, 170, 159, 171, 170, 157, 163, 180, 163, 161, 176, 164, 184, 152, 170, 191, 139, 163, 158, 179, 164, 176, 160, 172, 168, 160, 155, 162, 183, 175, 177, 148, 170, 181, 149, 179, 180, 141, 156, 168, 176, 162, 166, 172, 145, 161, 194, 188, 166, 146, 164, 157, 179, 143, 181, 176, 152, 183, 165, 164, 170, 166, 168, 153, 179, 161, 172, 167, 171, 156, 173, 181, 158, 161, 168, 161, 171, 179, 168, 153, 167, 186, 147, 167, 174, 159, 175, 160, 165, 166, 159, 175, 3175.9600239999986
47 | 100, 50000, 7.56, 16486, 16666, 16848, 144, 165, 191, 145, 180, 175, 196, 129, 175, 151, 186, 163, 170, 166, 164, 172, 172, 156, 189, 142, 169, 171, 171, 158, 148, 189, 163, 159, 172, 169, 130, 188, 182, 168, 157, 175, 166, 162, 172, 165, 176, 159, 147, 156, 197, 171, 158, 171, 160, 170, 170, 167, 175, 158, 158, 168, 174, 183, 143, 174, 167, 156, 177, 166, 157, 177, 189, 159, 152, 147, 172, 181, 185, 160, 155, 162, 135, 203, 157, 175, 168, 171, 153, 176, 153, 175, 172, 167, 167, 166, 163, 154, 183, 155, 176, 169, 173, 154, 173, 164, 170, 166, 172, 157, 171, 149, 162, 189, 171, 174, 155, 163, 162, 175, 153, 179, 168, 145, 178, 177, 171, 170, 159, 140, 183, 177, 189, 143, 168, 144, 190, 166, 156, 174, 170, 191, 171, 138, 158, 138, 204, 168, 167, 165, 150, 177, 173, 174, 145, 181, 157, 172, 171, 173, 158, 169, 159, 189, 152, 181, 154, 165, 171, 151, 178, 151, 162, 187, 173, 154, 173, 193, 169, 138, 177, 153, 170, 181, 167, 152, 180, 187, 133, 178, 171, 151, 158, 157, 185, 156, 186, 158, 138, 180, 182, 179, 161, 160, 155, 168, 177, 169, 176, 155, 173, 150, 177, 178, 162, 160, 161, 178, 161, 158, 175, 167, 153, 161, 186, 135, 179, 186, 162, 167, 171, 148, 171, 181, 194, 166, 140, 173, 162, 165, 174, 150, 176, 146, 188, 166, 176, 172, 152, 162, 163, 175, 191, 168, 141, 144, 184, 172, 163, 184, 153, 167, 172, 161, 143, 178, 179, 180, 152, 168, 150, 179, 171, 178, 153, 169, 160, 170, 170, 169, 178, 153, 172, 148, 180, 179, 156, 165, 153, 174, 173, 185, 167, 148, 158, 167, 175, 175, 171, 154, 158, 167, 175, 166, 181, 153, 3228.1533041999983
48 | 
49 | test
50 | "1": 7.83,
51 | "2": 8.71,
52 | "3": 7.21,
53 | "4": 9.76,
54 | "5": 10.4,
55 | "6": 9.55,
56 | "7": 10.2,
57 | "8": 9.03,
58 | "9": 9.02,
59 | "10": 8.0


--------------------------------------------------------------------------------
/slime_environments/environments/SlimeEnvSingleAgent.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | import sys
  4 | from typing import Optional
  5 | from itertools import product
  6 | 
  7 | import gym
  8 | import numpy as np
  9 | import pygame
 10 | from gym import spaces
 11 | from gym.spaces import MultiBinary
 12 | 
 13 | BLACK = (0, 0, 0)
 14 | BLUE = (0, 0, 255)
 15 | WHITE = (255, 255, 255)
 16 | RED = (190, 0, 0)
 17 | GREEN = (0, 190, 0)
 18 | 
 19 | 
 20 | class BooleanSpace(gym.Space):
 21 |     @property
 22 |     def is_np_flattenable(self):
 23 |         return True
 24 | 
 25 |     def __init__(self, size=None):
 26 |         """
 27 |         A space of boolean values
 28 |         :param size: how many boolean values the space is made of
 29 |         """
 30 |         assert isinstance(size, int) and size > 0
 31 |         self.size = size
 32 |         self._values = list(product([True, False], repeat=self.size))
 33 |         gym.Space.__init__(self, (2,), bool)
 34 | 
 35 |     def contains(self, x):
 36 |         return x in self._values
 37 | 
 38 |     def sample(self):
 39 |         return random.choice(self._values)
 40 |         # return self.values
 41 | 
 42 | 
 43 | class Slime(gym.Env):
 44 |     metadata = {"render_modes": ["human"], "render_fps": 30}
 45 | 
 46 |     def __init__(self,
 47 |                  render_mode: Optional[str] = None,
 48 |                  **kwargs):
 49 |         """
 50 |         :param population:          Controls the number of non-learning slimes (= green turtles)
 51 |         :param sniff_threshold:     Controls how sensitive slimes are to pheromone (higher values make slimes less
 52 |                                     sensitive to pheromone)—unclear effect on learning, could be negligible
 53 |         :param diffuse_area         Controls the diffusion radius
 54 |         :param diffuse_mode         Controls in which order patches with pheromone to diffuse are visited:
 55 |                                         'simple' = Python-dependant (dict keys "ordering")
 56 |                                         'rng' = random visiting
 57 |                                         'sorted' = diffuse first the patches with more pheromone
 58 |                                         'filter' = do not re-diffuse patches receiving pheromone due to diffusion
 59 |                                         'cascade' = step-by-step, incremental (recursive) diffusion within 'diffuse_area'
 60 |         :param follow_mode          Controls how non-learning agents follow pheromone:
 61 |                                         'det' = follow greatest pheromone
 62 |                                         'prob' = follow greatest pheromone probabilistically (pheromone strength as weight)
 63 |         :param smell_area:          Controls the radius of the square area sorrounding the turtle whithin which it smells pheromone
 64 |         :param lay_area:            Controls the radius of the square area sorrounding the turtle where pheromone is laid
 65 |         :param lay_amount:          Controls how much pheromone is laid
 66 |         :param evaporation:         Controls how much pheromone evaporates at each step
 67 |         :param cluster_threshold:   Controls the minimum number of slimes needed to consider an aggregate within
 68 |                                     cluster-radius a cluster (the higher the more difficult to consider an aggregate a
 69 |                                     cluster)—the higher the more difficult to obtain a positive reward for being within
 70 |                                     a cluster for learning slimes
 71 |         :param cluster_radius:      Controls the range considered by slimes to count other slimes within a cluster (the
 72 |                                     higher the easier to form clusters, as turtles far apart are still counted together)
 73 |                                     —the higher the easier it is to obtain a positive reward for being within a cluster
 74 |                                     for learning slimes
 75 |         :param rew:                 Base reward for being in a cluster
 76 |         :param penalty:             Base penalty for not being in a cluster
 77 |         :param episode_ticks:       Number of ticks for episode termination
 78 |         :param W:                   Window width in # patches
 79 |         :param H:                   Window height in # patches
 80 |         :param PATCH_SIZE:          Patch size in pixels
 81 |         :param TURTLE_SIZE:         Turtle size in pixels
 82 |         :param FPS:                 Rendering FPS
 83 |         :param SHADE_STRENGTH:      Strength of color shading for pheromone rendering (higher -> brighter color)
 84 |         :param SHOW_CHEM_TEXT:      Whether to show pheromone amount on patches (when >= sniff-threshold)
 85 |         :param CLUSTER_FONT_SIZE:   Font size of cluster number (for overlapping agents)
 86 |         :param CHEMICAL_FONT_SIZE:  Font size of phermone amount (if SHOW_CHEM_TEXT is true)
 87 |         :param render_mode:
 88 |         """
 89 |         assert render_mode is None or render_mode in self.metadata["render_modes"]
 90 | 
 91 |         self.population = kwargs['population']
 92 |         self.sniff_threshold = kwargs['sniff_threshold']
 93 |         self.diffuse_area = kwargs['diffuse_area']
 94 |         self.smell_area = kwargs['smell_area']
 95 |         self.lay_area = kwargs['lay_area']
 96 |         self.lay_amount = kwargs['lay_amount']
 97 |         self.evaporation = kwargs['evaporation']
 98 |         self.diffuse_mode = kwargs['diffuse_mode']
 99 |         self.follow_mode = kwargs['follow_mode']
100 |         self.cluster_threshold = kwargs['cluster_threshold']
101 |         self.cluster_radius = kwargs['cluster_radius']
102 |         self.reward = kwargs['rew']
103 |         self.penalty = kwargs['penalty']
104 |         self.episode_ticks = kwargs['episode_ticks']
105 | 
106 |         self.W = kwargs['W']
107 |         self.H = kwargs['H']
108 |         self.patch_size = kwargs['PATCH_SIZE']
109 |         self.turtle_size = kwargs['TURTLE_SIZE']
110 |         self.fps = kwargs['FPS']
111 |         self.shade_strength = kwargs['SHADE_STRENGTH']
112 |         self.show_chem_text = kwargs['SHOW_CHEM_TEXT']
113 |         self.cluster_font_size = kwargs['CLUSTER_FONT_SIZE']
114 |         self.chemical_font_size = kwargs['CHEMICAL_FONT_SIZE']
115 | 
116 |         self.coords = []
117 |         self.offset = self.patch_size // 2
118 |         self.W_pixels = self.W * self.patch_size
119 |         self.H_pixels = self.H * self.patch_size
120 |         for x in range(self.offset, (self.W_pixels - self.offset) + 1, self.patch_size):
121 |             for y in range(self.offset, (self.H_pixels - self.offset) + 1, self.patch_size):
122 |                 self.coords.append((x, y))  # "centre" of the patch or turtle (also ID of the patch)
123 | 
124 |         n_coords = len(self.coords)
125 |         # create learner turtle
126 |         self.learner = {"pos": self.coords[np.random.randint(n_coords)]}
127 |         # create NON learner turtles
128 |         self.turtles = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population)}
129 | 
130 |         # patches-own [chemical] - amount of pheromone in each patch
131 |         self.patches = {self.coords[i]: {"id": i,
132 |                                          'chemical': 0.0,
133 |                                          'turtles': []} for i in range(n_coords)}
134 |         self.patches[self.learner['pos']]['turtles'].append(-1)  # DOC id of learner turtle
135 |         for t in self.turtles:
136 |             self.patches[self.turtles[t]['pos']]['turtles'].append(t)
137 | 
138 |         # pre-compute relevant structures to speed-up computation during rendering steps
139 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed smell area for each patch, including itself
140 |         self.smell_patches = {}
141 |         self._find_neighbours(self.smell_patches, self.smell_area)
142 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed lay area for each patch, including itself
143 |         self.lay_patches = {}
144 |         self._find_neighbours(self.lay_patches, self.lay_area)
145 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed diffusion area for each patch, including itself
146 |         self.diffuse_patches = {}
147 |         if self.diffuse_mode == 'cascade':
148 |             self._find_neighbours_cascade(self.diffuse_patches, self.diffuse_area)
149 |         else:
150 |             self._find_neighbours(self.diffuse_patches, self.diffuse_area)
151 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed cluster-check for each patch, including itself
152 |         self.cluster_patches = {}
153 |         self._find_neighbours(self.cluster_patches, self.cluster_radius)
154 | 
155 |         self.action_space = spaces.Discrete(3)  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone TODO as dict
156 |         self.observation_space = MultiBinary(2)  # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
157 |         self._action_to_name = {0: "random-walk", 1: "drop-chemical", 2: "move-toward-chemical"}
158 | 
159 |         self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels))
160 |         self.clock = pygame.time.Clock()
161 |         pygame.font.init()
162 |         self.cluster_font = pygame.font.SysFont("arial", self.cluster_font_size)
163 |         self.chemical_font = pygame.font.SysFont("arial", self.chemical_font_size)
164 | 
165 |         self.rewards = []
166 |         self.cluster_ticks = 0
167 | 
168 |         self.first_gui = True
169 | 
170 |     def _find_neighbours_cascade(self, neighbours: dict, area: int):
171 |         """
172 |         For each patch, find neighbouring patches within square radius 'area', 1 step at a time
173 |         (visiting first 1-hop patches, then 2-hops patches, and so on)
174 | 
175 |         :param neighbours: empty dictionary to fill
176 |             (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...})
177 |         :param area: integer representing the number of patches to consider in the 8 directions around each patch
178 |         :return: None (1st argument modified as side effect)
179 |         """
180 |         for p in self.patches:
181 |             neighbours[p] = []
182 |             for ring in range(area):
183 |                 for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, self.patch_size):
184 |                     for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, self.patch_size):
185 |                         #x, y = self._wrap(x, y)
186 |                         if (x, y) not in neighbours[p]:
187 |                             neighbours[p].append((x, y))
188 |                 for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, -self.patch_size):
189 |                     for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, -self.patch_size):
190 |                         #x, y = self._wrap(x, y)
191 |                         if (x, y) not in neighbours[p]:
192 |                             neighbours[p].append((x, y))
193 |                 for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1, self.patch_size):
194 |                     for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1, -self.patch_size):
195 |                         #x, y = self._wrap(x, y)
196 |                         if (x, y) not in neighbours[p]:
197 |                             neighbours[p].append((x, y))
198 |                 for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1, -self.patch_size):
199 |                     for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1, self.patch_size):
200 |                         #x, y = self._wrap(x, y)
201 |                         if (x, y) not in neighbours[p]:
202 |                             neighbours[p].append((x, y))
203 |             neighbours[p] = [self._wrap(x, y) for (x, y) in neighbours[p]]
204 |             #neighbours[p] = list(set(neighbours[p]))
205 | 
206 |     def _find_neighbours(self, neighbours: dict, area: int):
207 |         """
208 |         For each patch, find neighbouring patches within square radius 'area'
209 |         
210 |         :param neighbours: empty dictionary to fill
211 |             (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...})
212 |         :param area: integer representing the number of patches to consider in the 8 directions around each patch
213 |         :return: None (1st argument modified as side effect)
214 |         """
215 |         for p in self.patches:
216 |             neighbours[p] = []
217 |             for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size):
218 |                 for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size):
219 |                     x, y = self._wrap(x, y)
220 |                     neighbours[p].append((x, y))
221 |             for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size):
222 |                 for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size):
223 |                     x, y = self._wrap(x, y)
224 |                     neighbours[p].append((x, y))
225 |             for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size):
226 |                 for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size):
227 |                     x, y = self._wrap(x, y)
228 |                     neighbours[p].append((x, y))
229 |             for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size):
230 |                 for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size):
231 |                     x, y = self._wrap(x, y)
232 |                     neighbours[p].append((x, y))
233 |             neighbours[p] = list(set(neighbours[p]))
234 | 
235 |     def _wrap(self, x: int, y: int):
236 |         """
237 |         Wrap x,y coordinates around the torus
238 | 
239 |         :param x: the x coordinate to wrap
240 |         :param y: the y coordinate to wrap
241 |         :return: the wrapped x, y
242 |         """
243 |         if x < 0:
244 |             x = self.W_pixels + x
245 |         elif x > self.W_pixels:
246 |             x = x - self.W_pixels
247 |         if y < 0:
248 |             y = self.H_pixels + y
249 |         elif y > self.H_pixels:
250 |             y = y - self.H_pixels
251 |         return x, y
252 | 
253 |     def step(self, action: int):
254 |         """
255 |         OpenAI Gym env step function. Actions are: 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
256 | 
257 |         :param action: 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
258 |         :return: current observation, current reward, episode done, info
259 |         """
260 | 
261 |         # non learners act
262 |         for turtle in self.turtles:
263 |             pos = self.turtles[turtle]['pos']
264 |             t = self.turtles[turtle]
265 |             max_pheromone, max_coords = self._find_max_pheromone(pos)
266 | 
267 |             if max_pheromone >= self.sniff_threshold:
268 |                 self.follow_pheromone(max_coords, t, turtle)
269 |             else:
270 |                 self.walk(t, turtle)
271 | 
272 |             self.lay_pheromone(self.turtles[turtle]['pos'], self.lay_amount)
273 | 
274 |         # learner acts
275 |         if action == 0:  # DOC walk
276 |             self.walk(self.learner, -1)
277 |         elif action == 1:  # DOC lay_pheromone
278 |             self.lay_pheromone(self.learner['pos'], self.lay_amount)
279 |         elif action == 2:  # DOC follow_pheromone
280 |             max_pheromone, max_coords = self._find_max_pheromone(self.learner['pos'])
281 |             if max_pheromone >= self.sniff_threshold:
282 |                 self.follow_pheromone(max_coords, self.learner, -1)
283 |             else:
284 |                 self.walk(self.learner, -1)
285 | 
286 |         self._diffuse()
287 |         self._evaporate()
288 | 
289 |         cur_reward = self.reward_cluster_and_time_punish_time()
290 | 
291 |         return self._get_obs(), cur_reward, False, {}  # DOC Gym v26 has additional 'truncated' boolean
292 | 
293 |     def lay_pheromone(self, pos, amount: int):
294 |         """
295 |         Lay 'amount' pheromone in square 'area' centred in 'pos'
296 | 
297 |         :param pos: the x,y position taken as centre of pheromone deposit area
298 |         :param amount: the amount of pheromone to deposit
299 |         :return: None (environment properties are changed as side effect)
300 |         """
301 |         for p in self.lay_patches[pos]:
302 |             self.patches[p]['chemical'] += amount
303 | 
304 |     def _diffuse(self):
305 |         """
306 |         Diffuses pheromone from each patch to nearby patches controlled through self.diffuse_area patches in a way
307 |         controlled through self.diffuse_mode:
308 |             'simple' = Python-dependant (dict keys "ordering")
309 |             'rng' = random visiting
310 |             'sorted' = diffuse first the patches with more pheromone
311 |             'filter' = do not re-diffuse patches receiving pheromone due to diffusion
312 | 
313 |         :return: None (environment properties are changed as side effect)
314 |         """
315 |         n_size = len(self.diffuse_patches[list(self.patches.keys())[0]])  # same for every patch
316 |         patch_keys = list(self.patches.keys())
317 |         if self.diffuse_mode == 'rng':
318 |             random.shuffle(patch_keys)
319 |         elif self.diffuse_mode == 'sorted':
320 |             patch_list = list(self.patches.items())
321 |             patch_list = sorted(patch_list, key=lambda t: t[1]['chemical'], reverse=True)
322 |             patch_keys = [t[0] for t in patch_list]
323 |         elif self.diffuse_mode == 'filter':
324 |             patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0]
325 |         elif self.diffuse_mode == 'rng-filter':
326 |             patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0]
327 |             random.shuffle(patch_keys)
328 |         for patch in patch_keys:
329 |             p = self.patches[patch]['chemical']
330 |             ratio = p / n_size
331 |             if p > 0:
332 |                 diffuse_keys = self.diffuse_patches[patch][:]
333 |                 for n in diffuse_keys:
334 |                     self.patches[n]['chemical'] += ratio
335 |                 self.patches[patch]['chemical'] = ratio
336 | 
337 |     def _evaporate(self):
338 |         """
339 |         Evaporates pheromone from each patch according to param self.evaporation
340 | 
341 |         :return: None (environment properties are changed as side effect)
342 |         """
343 |         for patch in self.patches.keys():
344 |             if self.patches[patch]['chemical'] > 0:
345 |                 self.patches[patch]['chemical'] *= self.evaporation
346 | 
347 |     def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
348 |         """
349 |         Action 0: move in random direction (8 sorrounding cells)
350 | 
351 |         :param _id: the id of the turtle to move
352 |         :param turtle: the turtle to move (dict mapping 'pos' to position as x,y)
353 |         :return: None (pos is updated after movement as side-effect)
354 |         """
355 |         choice = [self.patch_size, -self.patch_size, 0]
356 |         x, y = turtle['pos']
357 |         self.patches[turtle['pos']]['turtles'].remove(_id)
358 |         x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice)
359 |         x2, y2 = self._wrap(x2, y2)
360 |         turtle['pos'] = (x2, y2)
361 |         self.patches[turtle['pos']]['turtles'].append(_id)
362 | 
363 |     def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int):
364 |         """
365 |         Action 2: move turtle towards greatest pheromone found
366 | 
367 |         :param _id: the id of the turtle to move
368 |         :param ph_coords: the position where max pheromone has been sensed
369 |         :param turtle: the turtle looking for pheromone
370 |         :return: None (pos is updated after movement as side-effect)
371 |         """
372 |         x, y = turtle['pos']
373 |         self.patches[turtle['pos']]['turtles'].remove(_id)
374 |         if ph_coords[0] > x and ph_coords[1] > y:  # top right
375 |             x += self.patch_size
376 |             y += self.patch_size
377 |         elif ph_coords[0] < x and ph_coords[1] < y:  # bottom left
378 |             x -= self.patch_size
379 |             y -= self.patch_size
380 |         elif ph_coords[0] > x and ph_coords[1] < y:  # bottom right
381 |             x += self.patch_size
382 |             y -= self.patch_size
383 |         elif ph_coords[0] < x and ph_coords[1] > y:  # top left
384 |             x -= self.patch_size
385 |             y += self.patch_size
386 |         elif ph_coords[0] == x and ph_coords[1] < y:  # below me
387 |             y -= self.patch_size
388 |         elif ph_coords[0] == x and ph_coords[1] > y:  # above me
389 |             y += self.patch_size
390 |         elif ph_coords[0] > x and ph_coords[1] == y:  # right
391 |             x += self.patch_size
392 |         elif ph_coords[0] < x and ph_coords[1] == y:  # left
393 |             x -= self.patch_size
394 |         else:  # my patch
395 |             pass
396 |         x, y = self._wrap(x, y)
397 |         turtle['pos'] = (x, y)
398 |         self.patches[turtle['pos']]['turtles'].append(_id)
399 | 
400 |     def _find_max_pheromone(self, pos: tuple[int, int]):
401 |         """
402 |         Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'.
403 |         Following pheromone modeis controlled by param self.follow_mode:
404 |             'det' = follow greatest pheromone
405 |             'prob' = follow greatest pheromone probabilistically (pheromone strength as weight)
406 | 
407 |         :param pos: the x,y position of the turtle looking for pheromone
408 |         :return: the maximum pheromone level found and its x,y position
409 |         """
410 |         if self.follow_mode == "prob":
411 |             population = [k for k in self.smell_patches[pos]]
412 |             weights = [self.patches[k]['chemical'] for k in self.smell_patches[pos]]
413 |             if all([w == 0 for w in weights]):
414 |                 winner = population[np.random.choice(len(population))]
415 |             else:
416 |                 winner = random.choices(population, weights=weights, k=1)[0]
417 |             max_ph = self.patches[winner]['chemical']
418 |         else:
419 |             max_ph = -1
420 |             max_pos = [pos]
421 |             for p in self.smell_patches[pos]:
422 |                 chem = self.patches[p]['chemical']
423 |                 if chem > max_ph:
424 |                     max_ph = chem
425 |                     max_pos = [p]
426 |                 elif chem == max_ph:
427 |                     max_pos.append(p)
428 |             winner = max_pos[np.random.choice(len(max_pos))]
429 | 
430 |         return max_ph, winner
431 | 
432 |     def _compute_cluster(self):
433 |         """
434 |         Checks whether the learner turtle is within a cluster, given 'cluster_radius' and 'cluster_threshold'
435 | 
436 |         :return: a boolean
437 |         """
438 |         cluster = 1
439 |         for p in self.cluster_patches[self.learner['pos']]:
440 |             cluster += len(self.patches[p]['turtles'])
441 | 
442 |         return cluster
443 | 
444 |     def _check_chemical(self):
445 |         """
446 |         Checks whether there is pheromone on the patch where the learner turtle is
447 | 
448 |         :return: a boolean
449 |         """
450 |         return self.patches[self.learner['pos']][
451 |                    'chemical'] >= self.sniff_threshold
452 | 
453 |     def reward_cluster_punish_time(self):
454 |         """
455 |         Reward is (positve) proportional to cluster size (quadratic) and (negative) proportional to time spent outside
456 |         clusters
457 | 
458 |         :return: the reward
459 |         """
460 |         cluster = self._compute_cluster()
461 |         if cluster >= self.cluster_threshold:
462 |             self.cluster_ticks += 1
463 | 
464 |         cur_reward = ((cluster ^ 2) / self.cluster_threshold) * self.reward + (
465 |                 ((self.episode_ticks - self.cluster_ticks) / self.episode_ticks) * self.penalty)
466 | 
467 |         self.rewards.append(cur_reward)
468 |         return cur_reward
469 | 
470 |     def reward_cluster_and_time_punish_time(self):
471 |         """
472 | 
473 |         :return:
474 |         """
475 |         cluster = self._compute_cluster()
476 |         if cluster >= self.cluster_threshold:
477 |             self.cluster_ticks += 1
478 | 
479 |         cur_reward = (self.cluster_ticks / self.episode_ticks) * self.reward + \
480 |                      (cluster / self.cluster_threshold) * (self.reward ** 2) + \
481 |                      (((self.episode_ticks - self.cluster_ticks) / self.episode_ticks) * self.penalty)
482 | 
483 |         self.rewards.append(cur_reward)
484 |         return cur_reward
485 | 
486 |     def reset(self):
487 |         # super().reset()
488 |         # empty stuff
489 |         self.rewards = []
490 |         self.cluster_ticks = 0
491 | 
492 |         # re-position learner turtle
493 |         self.patches[self.learner['pos']]['turtles'].remove(-1)
494 |         self.learner['pos'] = self.coords[np.random.randint(len(self.coords))]
495 |         self.patches[self.learner['pos']]['turtles'].append(-1)  # DOC id of learner turtle
496 |         # re-position NON learner turtles
497 |         for t in self.turtles:
498 |             self.patches[self.turtles[t]['pos']]['turtles'].remove(t)
499 |             self.turtles[t]['pos'] = self.coords[np.random.randint(len(self.coords))]
500 |             self.patches[self.turtles[t]['pos']]['turtles'].append(t)
501 |         # patches-own [chemical] - amount of pheromone in the patch
502 |         for p in self.patches:
503 |             self.patches[p]['chemical'] = 0.0
504 | 
505 |         return self._get_obs()
506 | 
507 |     def render(self, mode="human",**kwargs):
508 |         for event in pygame.event.get():
509 |             if event.type == pygame.QUIT:  # window closed -> program quits
510 |                 pygame.quit()
511 | 
512 |         if self.first_gui:
513 |             self.first_gui = False
514 |             pygame.init()
515 |             pygame.display.set_caption("SLIME")
516 | 
517 |         self.screen.fill(BLACK)
518 |         # draw patches
519 |         for p in self.patches:
520 |             chem = round(self.patches[p]['chemical']) * self.shade_strength
521 |             pygame.draw.rect(self.screen, (0, chem if chem <= 255 else 255, 0),
522 |                              pygame.Rect(p[0] - self.offset, p[1] - self.offset, self.patch_size, self.patch_size))
523 |             if self.show_chem_text and (not sys.gettrace() is None or
524 |                                         self.patches[p]['chemical'] >= self.sniff_threshold):  # if debugging show text everywhere, even 0
525 |                 text = self.chemical_font.render(str(round(self.patches[p]['chemical'], 1)), True, GREEN)
526 |                 self.screen.blit(text, text.get_rect(center=p))
527 | 
528 |         # draw learner
529 |         pygame.draw.circle(self.screen, RED, (self.learner['pos'][0], self.learner['pos'][1]),
530 |                            self.turtle_size // 2)
531 |         # draw NON learners
532 |         for turtle in self.turtles.values():
533 |             pygame.draw.circle(self.screen, BLUE, (turtle['pos'][0], turtle['pos'][1]), self.turtle_size // 2)
534 | 
535 |         for p in self.patches:
536 |             if len(self.patches[p]['turtles']) > 1:
537 |                 text = self.cluster_font.render(str(len(self.patches[p]['turtles'])), True,
538 |                                                 RED if -1 in self.patches[p]['turtles'] else WHITE)
539 |                 self.screen.blit(text, text.get_rect(center=p))
540 | 
541 |         self.clock.tick(self.fps)
542 |         pygame.display.flip()
543 | 
544 |     def close(self):
545 |         if self.screen is not None:
546 |             pygame.display.quit()
547 |             pygame.quit()
548 | 
549 |     def _get_obs(self):
550 |         return np.array([self._compute_cluster() >= self.cluster_threshold, self._check_chemical()])
551 | 
552 | 
553 | if __name__ == "__main__":
554 |     PARAMS_FILE = "../agents/single-agent-params.json"
555 |     EPISODES = 5
556 |     LOG_EVERY = 1
557 | 
558 |     with open(PARAMS_FILE) as f:
559 |         params = json.load(f)
560 |     env = Slime(render_mode="human", **params)
561 | 
562 |     for ep in range(1, EPISODES + 1):
563 |         env.reset()
564 |         print(
565 |             f"-------------------------------------------\nEPISODE: {ep}\n-------------------------------------------")
566 |         for tick in range(params['episode_ticks']):
567 |             observation, reward, _, _, _ = env.step(env.action_space.sample())
568 |             if tick % LOG_EVERY == 0:
569 |                 print(f"{tick}: {observation}, {reward}")
570 |             env.render()
571 |     env.close()


--------------------------------------------------------------------------------
/slime_environments/environments/SlimeEnvMultiAgent.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | import sys
  4 | from typing import Optional
  5 | 
  6 | import gym
  7 | import numpy as np
  8 | import pygame
  9 | from gym import spaces
 10 | from pettingzoo import AECEnv
 11 | from pettingzoo.utils import agent_selector
 12 | from pettingzoo.utils.env import ObsType
 13 | 
 14 | BLACK = (0, 0, 0)
 15 | BLUE = (0, 0, 255)
 16 | WHITE = (255, 255, 255)
 17 | RED = (190, 0, 0)
 18 | GREEN = (0, 190, 0)
 19 | 
 20 | 
 21 | class BooleanSpace(gym.Space):
 22 |     def __init__(self, size=None):
 23 |         """
 24 |         A space of boolean values
 25 |         :param size: how many boolean values the space is made of
 26 |         """
 27 |         assert isinstance(size, int) and size > 0
 28 |         self.size = size
 29 |         self.values = [False for _ in range(self.size)]
 30 |         gym.Space.__init__(self, (), bool)
 31 | 
 32 |     def contains(self, x):
 33 |         return x in self.values
 34 | 
 35 |     def sample(self):
 36 |         return [random.choice([True, False]) for _ in range(self.size)]
 37 |         # return self.values
 38 | 
 39 |     def observe(self):
 40 |         """
 41 |         Get the current observation
 42 |         :return: the current observation
 43 |         """
 44 |         return self.values
 45 | 
 46 |     def change(self, p, value):
 47 |         """
 48 |         Set a specific boolean value for the current observation
 49 |         :param p: which boolean values to change (position index)
 50 |         :param value: the boolean value to set
 51 |         :return: None
 52 |         """
 53 |         self.values[p] = value
 54 | 
 55 |     def change_all(self, values):
 56 |         """
 57 |         Set all the boolean values for the current observation
 58 |         :param values: the boolean values to set
 59 |         :return: None
 60 |         """
 61 |         self.values = values
 62 | 
 63 | 
 64 | class Slime(AECEnv):
 65 |     def seed(self, seed: Optional[int] = None) -> None:
 66 |         pass
 67 | 
 68 |     def observe(self, agent: str) -> ObsType:
 69 |         pass
 70 | 
 71 |     def state(self) -> np.ndarray:
 72 |         pass
 73 | 
 74 |     metadata = {"render_modes": ["human", "server"]}
 75 | 
 76 |     def __init__(self,
 77 |                  render_mode: Optional[str] = None,
 78 |                  **kwargs):
 79 |         """
 80 |         :param population:          Controls the number of non-learning slimes (= green turtles)
 81 |         :param sniff_threshold:     Controls how sensitive slimes are to pheromone (higher values make slimes less
 82 |                                     sensitive to pheromone)—unclear effect on learning, could be negligible
 83 |         :param diffuse_area         Controls the diffusion radius
 84 |         :param diffuse_mode         Controls in which order patches with pheromone to diffuse are visited:
 85 |                                         'simple' = Python-dependant (dict keys "ordering")
 86 |                                         'rng' = random visiting
 87 |                                         'sorted' = diffuse first the patches with more pheromone
 88 |                                         'filter' = do not re-diffuse patches receiving pheromone due to diffusion
 89 |                                         'cascade' = step-by-step diffusion within 'diffuse_area'
 90 |         :param follow_mode          Controls how non-learning agents follow pheromone:
 91 |                                         'det' = follow greatest pheromone
 92 |                                         'prob' = follow greatest pheromone probabilistically (pheromone strength as weight)
 93 |         :param smell_area:          Controls the radius of the square area sorrounding the turtle whithin which it smells pheromone
 94 |         :param lay_area:            Controls the radius of the square area sorrounding the turtle where pheromone is laid
 95 |         :param lay_amount:          Controls how much pheromone is laid
 96 |         :param evaporation:         Controls how much pheromone evaporates at each step
 97 |         :param cluster_threshold:   Controls the minimum number of slimes needed to consider an aggregate within
 98 |                                     cluster-radius a cluster (the higher the more difficult to consider an aggregate a
 99 |                                     cluster)—the higher the more difficult to obtain a positive reward for being within
100 |                                     a cluster for learning slimes
101 |         :param cluster_radius:      Controls the range considered by slimes to count other slimes within a cluster (the
102 |                                     higher the easier to form clusters, as turtles far apart are still counted together)
103 |                                     —the higher the easier it is to obtain a positive reward for being within a cluster
104 |                                     for learning slimes
105 |         :param rew:                 Base reward for being in a cluster
106 |         :param penalty:             Base penalty for not being in a cluster
107 |         :param episode_ticks:       Number of ticks for episode termination
108 |         :param W:                   Window width in # patches
109 |         :param H:                   Window height in # patches
110 |         :param PATCH_SIZE:          Patch size in pixels
111 |         :param TURTLE_SIZE:         Turtle size in pixels
112 |         :param FPS:                 Rendering FPS
113 |         :param SHADE_STRENGTH:      Strength of color shading for pheromone rendering (higher -> brighter color)
114 |         :param SHOW_CHEM_TEXT:      Whether to show pheromone amount on patches (when >= sniff-threshold)
115 |         :param CLUSTER_FONT_SIZE:   Font size of cluster number (for overlapping agents)
116 |         :param CHEMICAL_FONT_SIZE:  Font size of phermone amount (if SHOW_CHEM_TEXT is true)
117 |         :param render_mode:
118 |         """
119 |         assert render_mode is None or render_mode in self.metadata["render_modes"]
120 | 
121 |         self.population = kwargs['population']
122 |         self.learner_population = kwargs['learner_population']
123 |         self.sniff_threshold = kwargs['sniff_threshold']
124 |         self.diffuse_area = kwargs['diffuse_area']
125 |         self.smell_area = kwargs['smell_area']
126 |         self.lay_area = kwargs['lay_area']
127 |         self.lay_amount = kwargs['lay_amount']
128 |         self.evaporation = kwargs['evaporation']
129 |         self.diffuse_mode = kwargs['diffuse_mode']
130 |         self.follow_mode = kwargs['follow_mode']
131 |         self.cluster_threshold = kwargs['cluster_threshold']
132 |         self.cluster_radius = kwargs['cluster_radius']
133 |         self.reward = kwargs['rew']
134 |         self.penalty = kwargs['penalty']
135 |         self.episode_ticks = kwargs['episode_ticks']
136 | 
137 |         self.W = kwargs['W']
138 |         self.H = kwargs['H']
139 |         self.patch_size = kwargs['PATCH_SIZE']
140 |         self.turtle_size = kwargs['TURTLE_SIZE']
141 |         self.fps = kwargs['FPS']
142 |         self.shade_strength = kwargs['SHADE_STRENGTH']
143 |         self.show_chem_text = kwargs['SHOW_CHEM_TEXT']
144 |         self.cluster_font_size = kwargs['CLUSTER_FONT_SIZE']
145 |         self.chemical_font_size = kwargs['CHEMICAL_FONT_SIZE']
146 |         self.gui = kwargs["gui"]
147 | 
148 |         self.coords = []
149 |         self.offset = self.patch_size // 2
150 |         self.W_pixels = self.W * self.patch_size
151 |         self.H_pixels = self.H * self.patch_size
152 |         for x in range(self.offset, (self.W_pixels - self.offset) + 1, self.patch_size):
153 |             for y in range(self.offset, (self.H_pixels - self.offset) + 1, self.patch_size):
154 |                 self.coords.append((x, y))  # "centre" of the patch or turtle (also ID of the patch)
155 | 
156 |         pop_tot = self.population + self.learner_population
157 |         self.agents = [i for i in range(self.population, pop_tot)]  # DOC learning agents IDs
158 |         self._agent_selector = agent_selector(self.agents)
159 |         self.agent = self._agent_selector.next()
160 | 
161 |         n_coords = len(self.coords)
162 |         # create learners turtle
163 |         self.learners = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population, pop_tot)}
164 |         # create NON learner turtles
165 |         self.turtles = {i: {"pos": self.coords[np.random.randint(n_coords)]} for i in range(self.population)}
166 | 
167 |         # patches-own [chemical] - amount of pheromone in each patch
168 |         self.patches = {self.coords[i]: {"id": i,
169 |                                          'chemical': 0.0,
170 |                                          'turtles': []} for i in range(n_coords)}
171 |         for l in self.learners:
172 |             self.patches[self.learners[l]['pos']]['turtles'].append(l)  # DOC id of learner turtles
173 |         for t in self.turtles:
174 |             self.patches[self.turtles[t]['pos']]['turtles'].append(t)
175 | 
176 |         # pre-compute relevant structures to speed-up computation during rendering steps
177 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed smell area for each patch, including itself
178 |         self.smell_patches = {}
179 |         self._find_neighbours(self.smell_patches, self.smell_area)
180 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed lay area for each patch, including itself
181 |         self.lay_patches = {}
182 |         self._find_neighbours(self.lay_patches, self.lay_area)
183 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed diffusion area for each patch, including itself
184 |         self.diffuse_patches = {}
185 |         if self.diffuse_mode == 'cascade':
186 |             self._find_neighbours_cascade(self.diffuse_patches, self.diffuse_area)
187 |         else:
188 |             self._find_neighbours(self.diffuse_patches, self.diffuse_area)
189 |         # DOC {(x,y): [(x,y), ..., (x,y)]} pre-computed cluster-check for each patch, including itself
190 |         self.cluster_patches = {}
191 |         self._find_neighbours(self.cluster_patches, self.cluster_radius)
192 | 
193 |         self.action_spaces = {a: spaces.Discrete(3) for a in
194 |                               self.agents}  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone
195 |         self.observation_space = BooleanSpace(
196 |             size=2)  # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
197 |         self.obs_dict = {a: BooleanSpace(size=2) for a in self.agents}
198 | 
199 |         if self.gui:
200 |             self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels))
201 |             self.clock = pygame.time.Clock()
202 |             pygame.font.init()
203 |             self.cluster_font = pygame.font.SysFont("arial", self.cluster_font_size)
204 |             self.chemical_font = pygame.font.SysFont("arial", self.chemical_font_size)
205 |             self.first_gui = True
206 | 
207 |         self.rewards = {i: [] for i in range(self.population, pop_tot)}
208 |         self.cluster_ticks = {i: 0 for i in range(self.population, pop_tot)}
209 | 
210 |     def _find_neighbours_cascade(self, neighbours: dict, area: int):
211 |         """
212 |         For each patch, find neighbouring patches within square radius 'area', 1 step at a time
213 |         (visiting first 1-hop patches, then 2-hops patches, and so on)
214 | 
215 |         :param neighbours: empty dictionary to fill
216 |             (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...})
217 |         :param area: integer representing the number of patches to consider in the 8 directions around each patch
218 |         :return: None (1st argument modified as side effect)
219 |         """
220 |         for p in self.patches:
221 |             neighbours[p] = []
222 |             for ring in range(area):
223 |                 for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1,
224 |                                self.patch_size):
225 |                     for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1,
226 |                                    self.patch_size):
227 |                         if (x, y) not in neighbours[p]:
228 |                             neighbours[p].append((x, y))
229 |                 for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1,
230 |                                -self.patch_size):
231 |                     for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1,
232 |                                    -self.patch_size):
233 |                         if (x, y) not in neighbours[p]:
234 |                             neighbours[p].append((x, y))
235 |                 for x in range(p[0] + (ring * self.patch_size), p[0] + ((ring + 1) * self.patch_size) + 1,
236 |                                self.patch_size):
237 |                     for y in range(p[1] + (ring * self.patch_size), p[1] - ((ring + 1) * self.patch_size) - 1,
238 |                                    -self.patch_size):
239 |                         if (x, y) not in neighbours[p]:
240 |                             neighbours[p].append((x, y))
241 |                 for x in range(p[0] + (ring * self.patch_size), p[0] - ((ring + 1) * self.patch_size) - 1,
242 |                                -self.patch_size):
243 |                     for y in range(p[1] + (ring * self.patch_size), p[1] + ((ring + 1) * self.patch_size) + 1,
244 |                                    self.patch_size):
245 |                         if (x, y) not in neighbours[p]:
246 |                             neighbours[p].append((x, y))
247 |             neighbours[p] = [self._wrap(x, y) for (x, y) in neighbours[p]]
248 |             # neighbours[p] = list(set(neighbours[p]))
249 | 
250 |     def _find_neighbours(self, neighbours: dict, area: int):
251 |         """
252 |         For each patch, find neighbouring patches within square radius 'area'
253 | 
254 |         :param neighbours: empty dictionary to fill
255 |             (will be dict mapping each patch to list of neighouring patches {(x, y): [(nx, ny), ...], ...})
256 |         :param area: integer representing the number of patches to consider in the 8 directions around each patch
257 |         :return: None (1st argument modified as side effect)
258 |         """
259 |         for p in self.patches:
260 |             neighbours[p] = []
261 |             for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size):
262 |                 for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size):
263 |                     x, y = self._wrap(x, y)
264 |                     neighbours[p].append((x, y))
265 |             for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size):
266 |                 for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size):
267 |                     x, y = self._wrap(x, y)
268 |                     neighbours[p].append((x, y))
269 |             for x in range(p[0], p[0] + (area * self.patch_size) + 1, self.patch_size):
270 |                 for y in range(p[1], p[1] - (area * self.patch_size) - 1, -self.patch_size):
271 |                     x, y = self._wrap(x, y)
272 |                     neighbours[p].append((x, y))
273 |             for x in range(p[0], p[0] - (area * self.patch_size) - 1, -self.patch_size):
274 |                 for y in range(p[1], p[1] + (area * self.patch_size) + 1, self.patch_size):
275 |                     x, y = self._wrap(x, y)
276 |                     neighbours[p].append((x, y))
277 |             neighbours[p] = list(set(neighbours[p]))
278 | 
279 |     def _wrap(self, x: int, y: int):
280 |         """
281 |         Wrap x,y coordinates around the torus
282 | 
283 |         :param x: the x coordinate to wrap
284 |         :param y: the y coordinate to wrap
285 |         :return: the wrapped x, y
286 |         """
287 |         if x < 0:
288 |             x = self.W_pixels + x
289 |         elif x > self.W_pixels:
290 |             x = x - self.W_pixels
291 |         if y < 0:
292 |             y = self.H_pixels + y
293 |         elif y > self.H_pixels:
294 |             y = y - self.H_pixels
295 |         return x, y
296 | 
297 |     # learners act
298 |     def step(self, action: int):
299 |         agent_in_charge = self.agent_selection  # ID of agent
300 |         if action == 0:  # DOC walk
301 |             self.walk(self.learners[agent_in_charge], agent_in_charge)
302 |         elif action == 1:  # DOC lay_pheromone
303 |             self.lay_pheromone(self.learners[agent_in_charge]['pos'], self.lay_amount)
304 |         elif action == 2:  # DOC follow_pheromone
305 |             max_pheromone, max_coords = self._find_max_pheromone(self.learners[agent_in_charge]['pos'])
306 |             if max_pheromone >= self.sniff_threshold:
307 |                 self.follow_pheromone(max_coords, self.learners[agent_in_charge], agent_in_charge)
308 |             else:
309 |                 self.walk(self.learners[agent_in_charge], agent_in_charge)
310 | 
311 |         self.agent_selection = self._agent_selector.next()
312 | 
313 |     # non learners act
314 |     def move(self):
315 |         for turtle in self.turtles:
316 |             pos = self.turtles[turtle]['pos']
317 |             t = self.turtles[turtle]
318 |             max_pheromone, max_coords = self._find_max_pheromone(pos)
319 | 
320 |             if max_pheromone >= self.sniff_threshold:
321 |                 self.follow_pheromone(max_coords, t, turtle)
322 |             else:
323 |                 self.walk(t, turtle)
324 | 
325 |             self.lay_pheromone(self.turtles[turtle]['pos'], self.lay_amount)
326 | 
327 |     # not using ".change_all" method form BooleanSpace
328 |     def last(self, current_agent):
329 |         #self._evaporate()
330 |         #self._diffuse()
331 | 
332 |         self.agent = current_agent
333 |         self.obs_dict[self.agent].change(0, self._compute_cluster(self.agent) >= self.cluster_threshold)
334 |         self.obs_dict[self.agent].change(1, self._check_chemical(self.agent))
335 |         cur_reward = self.reward_cluster_and_time_punish_time(self.agent)
336 | 
337 |         return self.obs_dict[self.agent], cur_reward, False, {}
338 | 
339 |     def lay_pheromone(self, pos: tuple[int, int], amount: int):
340 |         """
341 |         Lay 'amount' pheromone in square 'area' centred in 'pos'
342 |         :param pos: the x,y position taken as centre of pheromone deposit area
343 |         :param amount: the amount of pheromone to deposit
344 |         :return: None (environment properties are changed as side effect)
345 |         """
346 |         for p in self.lay_patches[pos]:
347 |             self.patches[p]['chemical'] += amount
348 | 
349 |     def _diffuse(self):
350 |         """
351 |         Diffuses pheromone from each patch to nearby patches controlled through self.diffuse_area patches in a way
352 |         controlled through self.diffuse_mode:
353 |             'simple' = Python-dependant (dict keys "ordering")
354 |             'rng' = random visiting
355 |             'sorted' = diffuse first the patches with more pheromone
356 |             'filter' = do not re-diffuse patches receiving pheromone due to diffusion
357 | 
358 |         :return: None (environment properties are changed as side effect)
359 |         """
360 |         n_size = len(self.diffuse_patches[list(self.patches.keys())[0]])  # same for every patch
361 |         patch_keys = list(self.patches.keys())
362 |         if self.diffuse_mode == 'rng':
363 |             random.shuffle(patch_keys)
364 |         elif self.diffuse_mode == 'sorted':
365 |             patch_list = list(self.patches.items())
366 |             patch_list = sorted(patch_list, key=lambda t: t[1]['chemical'], reverse=True)
367 |             patch_keys = [t[0] for t in patch_list]
368 |         elif self.diffuse_mode == 'filter':
369 |             patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0]
370 |         elif self.diffuse_mode == 'rng-filter':
371 |             patch_keys = [k for k in self.patches if self.patches[k]['chemical'] > 0]
372 |             random.shuffle(patch_keys)
373 |         for patch in patch_keys:
374 |             p = self.patches[patch]['chemical']
375 |             ratio = p / n_size
376 |             if p > 0:
377 |                 diffuse_keys = self.diffuse_patches[patch][:]
378 |                 for n in diffuse_keys:
379 |                     self.patches[n]['chemical'] += ratio
380 |                 self.patches[patch]['chemical'] = ratio
381 | 
382 |     def _evaporate(self):
383 |         """
384 |         Evaporates pheromone from each patch according to param self.evaporation
385 | 
386 |         :return: None (environment properties are changed as side effect)
387 |         """
388 |         for patch in self.patches.keys():
389 |             if self.patches[patch]['chemical'] > 0:
390 |                 self.patches[patch]['chemical'] *= self.evaporation
391 | 
392 |     def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
393 |         """
394 |         Action 0: move in random direction (8 sorrounding cells)
395 | 
396 |         :param _id: the id of the turtle to move
397 |         :param turtle: the turtle to move (dict mapping 'pos' to position as x,y)
398 |         :return: None (pos is updated after movement as side-effect)
399 |         """
400 |         choice = [self.patch_size, -self.patch_size, 0]
401 |         x, y = turtle['pos']
402 |         self.patches[turtle['pos']]['turtles'].remove(_id)
403 |         x2, y2 = x + np.random.choice(choice), y + np.random.choice(choice)
404 |         x2, y2 = self._wrap(x2, y2)
405 |         turtle['pos'] = (x2, y2)
406 |         self.patches[turtle['pos']]['turtles'].append(_id)
407 | 
408 |     def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int):
409 |         """
410 |         Action 2: move turtle towards greatest pheromone found
411 |         :param _id: the id of the turtle to move
412 |         :param ph_coords: the position where max pheromone has been sensed
413 |         :param turtle: the turtle looking for pheromone
414 |         :return: None (pos is updated after movement as side-effect)
415 |         """
416 |         x, y = turtle['pos']
417 |         self.patches[turtle['pos']]['turtles'].remove(_id)
418 |         if ph_coords[0] > x and ph_coords[1] > y:  # top right
419 |             x += self.patch_size
420 |             y += self.patch_size
421 |         elif ph_coords[0] < x and ph_coords[1] < y:  # bottom left
422 |             x -= self.patch_size
423 |             y -= self.patch_size
424 |         elif ph_coords[0] > x and ph_coords[1] < y:  # bottom right
425 |             x += self.patch_size
426 |             y -= self.patch_size
427 |         elif ph_coords[0] < x and ph_coords[1] > y:  # top left
428 |             x -= self.patch_size
429 |             y += self.patch_size
430 |         elif ph_coords[0] == x and ph_coords[1] < y:  # below me
431 |             y -= self.patch_size
432 |         elif ph_coords[0] == x and ph_coords[1] > y:  # above me
433 |             y += self.patch_size
434 |         elif ph_coords[0] > x and ph_coords[1] == y:  # right
435 |             x += self.patch_size
436 |         elif ph_coords[0] < x and ph_coords[1] == y:  # left
437 |             x -= self.patch_size
438 |         else:  # my patch
439 |             pass
440 |         x, y = self._wrap(x, y)
441 |         turtle['pos'] = (x, y)
442 |         self.patches[turtle['pos']]['turtles'].append(_id)
443 | 
444 |     def _find_max_pheromone(self, pos: tuple[int, int]):
445 |         """
446 |         Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'.
447 |         Following pheromone modeis controlled by param self.follow_mode:
448 |             'det' = follow greatest pheromone
449 |             'prob' = follow greatest pheromone probabilistically (pheromone strength as weight)
450 | 
451 |         :param pos: the x,y position of the turtle looking for pheromone
452 |         :return: the maximum pheromone level found and its x,y position
453 |         """
454 |         if self.follow_mode == "prob":
455 |             population = [k for k in self.smell_patches[pos]]
456 |             weights = [self.patches[k]['chemical'] for k in self.smell_patches[pos]]
457 |             if all([w == 0 for w in weights]):
458 |                 winner = population[np.random.choice(len(population))]
459 |             else:
460 |                 winner = random.choices(population, weights=weights, k=1)[0]
461 |             max_ph = self.patches[winner]['chemical']
462 |         else:
463 |             max_ph = -1
464 |             max_pos = [pos]
465 |             for p in self.smell_patches[pos]:
466 |                 chem = self.patches[p]['chemical']
467 |                 if chem > max_ph:
468 |                     max_ph = chem
469 |                     max_pos = [p]
470 |                 elif chem == max_ph:
471 |                     max_pos.append(p)
472 |             winner = max_pos[np.random.choice(len(max_pos))]
473 | 
474 |         return max_ph, winner
475 | 
476 |     def _compute_cluster(self, current_agent):
477 |         """
478 |         Checks whether the learner turtle is within a cluster, given 'cluster_radius' and 'cluster_threshold'
479 | 
480 |         :return: a boolean
481 |         """
482 |         self.agent = current_agent
483 |         cluster = 1
484 |         for p in self.cluster_patches[self.learners[self.agent]['pos']]:
485 |             cluster += len(self.patches[p]['turtles'])
486 | 
487 |         return cluster
488 | 
489 |     def avg_cluster(self):
490 |         """
491 |         Record the cluster size
492 |         :return: avg cluster size
493 |         """
494 |         cluster_sizes = []  # registra la dim. dei cluster
495 |         for l in self.learners:
496 |             cluster = []  # tiene conto di quali turtle sono in quel cluster
497 |             for p in self.cluster_patches[self.learners[l]['pos']]:
498 |                 for t in self.patches[p]['turtles']:
499 |                     cluster.append(t)
500 |             cluster.sort()
501 |             if cluster not in cluster_sizes:
502 |                 cluster_sizes.append(cluster)
503 | 
504 |         # cleaning process: confornta i cluster (nello stesso episodio) e se ne trova 2 con più del 90% di turtle uguali ne elimina 1
505 |         for cluster in cluster_sizes:
506 |             for cl in cluster_sizes:
507 |                 if cl != cluster:
508 |                     intersection = list(set(cluster) & set(cl))
509 |                     if len(intersection) > len(cluster) * 0.90:
510 |                         cluster_sizes.remove(cl)
511 | 
512 |         # calcolo avg_cluster_size
513 |         somma = 0
514 |         for cluster in cluster_sizes:
515 |             somma += len(cluster)
516 |         avg_cluster_size = somma / len(cluster_sizes)
517 | 
518 |         return avg_cluster_size
519 | 
520 |     def _check_chemical(self, current_agent):
521 |         """
522 |         Checks whether there is pheromone on the patch where the learner turtle is
523 | 
524 |         :return: a boolean
525 |         """
526 |         self.agent = current_agent
527 |         return self.patches[self.learners[self.agent]['pos']][
528 |                    'chemical'] > self.sniff_threshold
529 | 
530 |     # not a real reward function
531 |     def test_reward(self, current_agent):  # trying to invert rewards process, GOAL: check any strange behaviour
532 |         """
533 |         :return: the reward
534 |         """
535 |         self.agent = current_agent
536 |         chem = 0
537 |         for p in self.patches.values():
538 |             if self.agent in p['turtles']:
539 |                 chem = p['chemical']
540 |         if chem >= 5:
541 |             cur_reward = -1000
542 |         else:
543 |             cur_reward = 100
544 | 
545 |         self.rewards[self.agent].append(cur_reward)
546 |         return cur_reward
547 | 
548 |     def reward_cluster_punish_time(self, current_agent):  # DOC NetLogo rewardFunc7
549 |         """
550 |         Reward is (positve) proportional to cluster size (quadratic) and (negative) proportional to time spent outside
551 |         clusters
552 | 
553 |         :return: the reward
554 |         """
555 |         self.agent = current_agent
556 |         cluster = self._compute_cluster(self.agent)
557 |         if cluster >= self.cluster_threshold:
558 |             self.cluster_ticks[self.agent] += 1
559 | 
560 |         cur_reward = ((cluster ^ 2) / self.cluster_threshold) * self.reward + (
561 |                 ((self.episode_ticks - self.cluster_ticks[self.agent]) / self.episode_ticks) * self.penalty)
562 | 
563 |         self.rewards[self.agent].append(cur_reward)
564 |         return cur_reward
565 | 
566 |     def reward_cluster_and_time_punish_time(self, current_agent):  # DOC NetLogo rewardFunc8
567 |         """
568 | 
569 |         :return:
570 |         """
571 |         self.agent = current_agent
572 |         cluster = self._compute_cluster(self.agent)
573 |         if cluster >= self.cluster_threshold:
574 |             self.cluster_ticks[self.agent] += 1
575 | 
576 |         cur_reward = (self.cluster_ticks[self.agent] / self.episode_ticks) * self.reward + \
577 |                      (cluster / self.cluster_threshold) * (self.reward ** 2) + \
578 |                      (((self.episode_ticks - self.cluster_ticks[self.agent]) / self.episode_ticks) * self.penalty)
579 | 
580 |         self.rewards[self.agent].append(cur_reward)
581 |         return cur_reward
582 | 
583 |     def reset(self):
584 |         # empty stuff
585 |         pop_tot = self.population + self.learner_population
586 |         self.rewards = {i: [] for i in range(self.population, pop_tot)}
587 |         self.cluster_ticks = {i: 0 for i in range(self.population, pop_tot)}
588 |         self.obs_dict = {a: BooleanSpace(size=2) for a in self.agents}
589 |         # re-position learner turtle
590 |         for l in self.learners:
591 |             self.patches[self.learners[l]['pos']]['turtles'].remove(l)
592 |             self.learners[l]['pos'] = self.coords[np.random.randint(len(self.coords))]
593 |             self.patches[self.learners[l]['pos']]['turtles'].append(l)  # DOC id of learner turtle
594 |         # re-position NON learner turtles
595 |         for t in self.turtles:
596 |             self.patches[self.turtles[t]['pos']]['turtles'].remove(t)
597 |             self.turtles[t]['pos'] = self.coords[np.random.randint(len(self.coords))]
598 |             self.patches[self.turtles[t]['pos']]['turtles'].append(t)
599 |         # patches-own [chemical] - amount of pheromone in the patch
600 |         for p in self.patches:
601 |             self.patches[p]['chemical'] = 0.0
602 | 
603 |         self._agent_selector.reinit(self.agents)
604 |         self.agent_selection = self._agent_selector.next()
605 | 
606 |         # return self.obs_dict[self.agent], 0, False, {}
607 | 
608 |     def render(self, **kwargs):
609 |         if self.gui:
610 | 
611 |             for event in pygame.event.get():
612 |                 if event.type == pygame.QUIT:  # window closed -> program quits
613 |                     pygame.quit()
614 | 
615 |             if self.first_gui:
616 |                 self.first_gui = False
617 |                 pygame.init()
618 |                 pygame.display.set_caption("SLIME")
619 | 
620 |             self.screen.fill(BLACK)
621 |             # draw patches
622 |             for p in self.patches:
623 |                 chem = round(self.patches[p]['chemical']) * self.shade_strength
624 |                 pygame.draw.rect(self.screen, (0, chem if chem <= 255 else 255, 0),
625 |                                  pygame.Rect(p[0] - self.offset, p[1] - self.offset, self.patch_size, self.patch_size))
626 |                 if self.show_chem_text and (not sys.gettrace() is None or
627 |                                             self.patches[p][
628 |                                                 'chemical'] >= self.sniff_threshold):  # if debugging show text everywhere, even 0
629 |                     text = self.chemical_font.render(str(round(self.patches[p]['chemical'], 1)), True, GREEN)
630 |                     self.screen.blit(text, text.get_rect(center=p))
631 | 
632 |             # draw learners
633 |             for learner in self.learners.values():
634 |                 pygame.draw.circle(self.screen, RED, (learner['pos'][0], learner['pos'][1]), self.turtle_size // 2)
635 |             # draw NON learners
636 |             for turtle in self.turtles.values():
637 |                 pygame.draw.circle(self.screen, BLUE, (turtle['pos'][0], turtle['pos'][1]), self.turtle_size // 2)
638 | 
639 |             for p in self.patches:
640 |                 if len(self.patches[p]['turtles']) > 1:
641 |                     text = self.cluster_font.render(str(len(self.patches[p]['turtles'])), True,
642 |                                                     RED if -1 in self.patches[p]['turtles'] else WHITE)
643 |                     self.screen.blit(text, text.get_rect(center=p))
644 | 
645 |             self.clock.tick(self.fps)
646 |             pygame.display.flip()
647 |             return pygame.surfarray.array3d(self.screen)
648 | 
649 |     def close(self):
650 |         if self.gui:
651 |             if self.screen is not None:
652 |                 pygame.display.quit()
653 |                 pygame.quit()
654 |                 
655 |                 
656 |     def get_neighborood_chemical(self, agent, as_vectors=False):
657 |         agent_pos = self.learners[agent]["pos"]
658 |         smell_patches = self.smell_patches[agent_pos]
659 |         
660 |         output_mask = []
661 |         for patch in smell_patches:
662 |             output_mask.append(self.patches[patch]["chemical"] - self.patches[agent_pos]["chemical"]) if as_vectors else output_mask.append(self.patches[patch]["chemical"])
663 | 
664 |         return np.array([output_mask], dtype=np.float32)
665 | 
666 | 
667 | if __name__ == "__main__":
668 |     PARAMS_FILE = "../agents/multi-agent-params.json"
669 |     EPISODES = 5
670 |     LOG_EVERY = 1
671 | 
672 |     with open(PARAMS_FILE) as f:
673 |         params = json.load(f)
674 |     if params["gui"]:
675 |         render = "human"
676 |     else:
677 |         render = "server"
678 |     env = Slime(render_mode=render, **params)
679 | 
680 |     for ep in range(1, EPISODES + 1):
681 |         env.reset()
682 |         print(
683 |             f"-------------------------------------------\nEPISODE: {ep}\n-------------------------------------------")
684 |         for tick in range(params['episode_ticks']):
685 |             for agent in env.agent_iter(max_iter=params["learner_population"]):
686 |                 observation, reward, done, info = env.last(agent)
687 |                 env.step(env.action_space(agent).sample())
688 |             # env.evaporate_chemical()
689 |             env.move()
690 |             env._evaporate()
691 |             env._diffuse()
692 |             env.render()
693 |     env.close()


--------------------------------------------------------------------------------