├── elsciRL
    ├── agents
    │   ├── __init__.py
    │   ├── clean_rl
    │   │   └── __init__.py
    │   ├── LLM_agents
    │   │   └── agent_modelfiles
    │   │   │   └── llama3_2.modelfile
    │   ├── random_agent.py
    │   ├── agent_abstract.py
    │   ├── stable_baselines
    │   │   ├── SB3_DQN.py
    │   │   ├── SB3_PPO.py
    │   │   └── SB3_A2C.py
    │   └── DQN.py
    ├── examples
    │   ├── placeholder.png
    │   ├── sailing_setup.png
    │   ├── local_configs
    │   │   ├── gym_frozenlake_config_local.py
    │   │   └── sailing_config_local.py
    │   ├── Readme.md
    │   ├── experiment_config.py
    │   ├── adapters
    │   │   ├── gym_frozenlake_default.py
    │   │   ├── gym_frozenlake_language.py
    │   │   ├── elsciRL_sailing_default.py
    │   │   └── elsciRL_sailing_language.py
    │   ├── environments
    │   │   ├── gym_frozenlake.py
    │   │   └── elsciRL_sailing.py
    │   └── DemoExperiment.py
    ├── analysis
    │   ├── tabular_output.py
    │   ├── combined_tabular_results.py
    │   └── convergence_measure.py
    ├── experiments
    │   ├── experiment_utils
    │   │   ├── config_utils.py
    │   │   ├── render_current_results.py
    │   │   ├── policy_agent_factory.py
    │   │   ├── result_manager.py
    │   │   ├── env_manager.py
    │   │   └── agent_factory.py
    │   └── training_procedures
    │   │   ├── policy_gradient.py
    │   │   └── default_exp_training.py
    ├── adapters
    │   ├── __init__.py
    │   ├── LLM_state_generators
    │   │   ├── base_prompt.py
    │   │   └── text_gpt-4.1.py
    │   └── LLM_logic_generators
    │   │   ├── adapter_prompt.py
    │   │   └── ollama_adapter_generator.py
    ├── application_suite
    │   ├── search_agent.py
    │   ├── experiment_agent.py
    │   └── CACHE_README.md
    ├── encoders
    │   ├── encoder_abstract.py
    │   ├── __init__.py
    │   ├── poss_actions_encoded.py
    │   ├── observable_objects_encoded.py
    │   ├── prior_actions_encoded.py
    │   ├── poss_state_encoded.py
    │   └── language_transformers
    │   │   └── MiniLM_L6v2.py
    ├── environment_setup
    │   ├── instruction_reward_wrapper.py
    │   ├── imports.py
    │   ├── results_table.py
    │   ├── elsciRL_info.py
    │   └── gym_translator.py
    ├── GUI
    │   ├── templates
    │   │   └── _generic_agent_param_form.html
    │   ├── prerender_encoder.py
    │   ├── LLM_tools
    │   │   └── LLM_utils.py
    │   └── static
    │   │   └── app_setup.md
    ├── instruction_following
    │   ├── instr_utils
    │   │   └── elsciRL_instr_input.py
    │   └── LLM_instr_planner
    │   │   └── LLM_instr_validator.py
    ├── __init__.py
    ├── config_local.py
    ├── config.py
    └── interaction_loops
    │   ├── state_search.py
    │   ├── policy_gradient.py
    │   └── standard_gym.py
├── .github
    └── FUNDING.yml
├── requirements.txt
├── pyelsciRL.toml
├── setup.py
├── .gitignore
└── tests
    └── test_policy_gradient_classroom.py


/elsciRL/agents/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: pdfosborne
4 | 


--------------------------------------------------------------------------------
/elsciRL/agents/clean_rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .ppo import CleanRLPPO
2 | 
3 | __all__ = ["CleanRLPPO"]
4 | 


--------------------------------------------------------------------------------
/elsciRL/examples/placeholder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pdfosborne/elsciRL/HEAD/elsciRL/examples/placeholder.png


--------------------------------------------------------------------------------
/elsciRL/examples/sailing_setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pdfosborne/elsciRL/HEAD/elsciRL/examples/sailing_setup.png


--------------------------------------------------------------------------------
/elsciRL/agents/LLM_agents/agent_modelfiles/llama3_2.modelfile:
--------------------------------------------------------------------------------
1 | FROM llama3.2
2 | 
3 | # Set temperature to 0 for deterministic responses
4 | PARAMETER temperature 0
5 | 
6 | # Set context length to 4000 tokens
7 | PARAMETER num_ctx 4000
8 | 
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | flask
 2 | numpy
 3 | pandas
 4 | matplotlib
 5 | seaborn
 6 | scipy>=1.10.1
 7 | torch
 8 | tqdm
 9 | httpimport
10 | sentence-transformers
11 | gymnasium
12 | stable-baselines3
13 | ollama
14 | openai
15 | markdown
16 | pyboy


--------------------------------------------------------------------------------
/elsciRL/examples/local_configs/gym_frozenlake_config_local.py:
--------------------------------------------------------------------------------
1 | LocalConfigData ={
2 |     "adapter_select": ["Default", "Language"],
3 |     "training_action_cap": 100,
4 |     "testing_action_cap":100,
5 |     "reward_signal": [1,-0.01,-0.1],
6 |     "sub_goal": "None"
7 |     }


--------------------------------------------------------------------------------
/elsciRL/examples/Readme.md:
--------------------------------------------------------------------------------
 1 | # elsciRL Examples
 2 | 
 3 | These are designed to be be run quickly to test installation.
 4 | 
 5 | After installing elsciRL simply use the following Python commands:
 6 | 
 7 | ```python
 8 | import elsciRL.examples.experiment.DemoExperiment
 9 | 
10 | exp = DemoExperiment()
11 | 
12 | exp.run()
13 | exp.evaluate()
14 | ```


--------------------------------------------------------------------------------
/elsciRL/examples/local_configs/sailing_config_local.py:
--------------------------------------------------------------------------------
 1 | LocalConfigData = {
 2 |     "env_select":"simple_river",
 3 |     "adapter_select": ["Default", "Language"],
 4 |     "training_action_cap": 100,
 5 |     "testing_action_cap":100,
 6 |     "reward_signal": [0.5,0,-0.1],
 7 |     "sub_goal": "None",
 8 |     "supervised_rewards":"False",
 9 |     "y_limit":25,
10 |     "obs_precision":2
11 | }


--------------------------------------------------------------------------------
/elsciRL/analysis/tabular_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | class TabularOutput:
 5 |     def __init__(self, results_data, save_dir):
 6 |         self.results_data = results_data
 7 |         self.save_dir = save_dir
 8 |         self.num_episodes = np.max(results_data['episode'])
 9 | 
10 |     def save_results(self):
11 |         pd.DataFrame(self.results_data).to_csv(self.save_dir+'/results.csv')


--------------------------------------------------------------------------------
/pyelsciRL.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "elscirl"
 3 | version = "1.0.0"
 4 | authors = [
 5 |   { name="Philip Osborne", email="pdfosborne@gmail.com" },
 6 | ]
 7 | description = "Applying the elsciRL architecture to Reinforcement Learning problems."
 8 | readme = "README.md"
 9 | requires-python = ">=3.11"
10 | classifiers = [
11 |     "Programming Language :: Python :: 3",
12 |     "License :: OSI Approved :: Apache-2.0 license",
13 |     "Operating System :: OS Independent",
14 | ]
15 | 
16 | [project.urls]
17 | "Homepage" = "https://github.com/pdfosborne/elscirl"
18 | "Bug Tracker" = "https://github.com/pdfosborne/elscirl/issues"


--------------------------------------------------------------------------------
/elsciRL/examples/experiment_config.py:
--------------------------------------------------------------------------------
 1 | ExperimentConfigData = {
 2 |     "name": "Example Experiment",
 3 |     "problem_type": "Examples",
 4 |         
 5 |     "number_training_episodes": 100,
 6 |     "number_training_repeats": 5,
 7 |     "number_training_seeds": 1,
 8 | 
 9 |     "test_agent_type":"best",
10 |     "number_test_episodes": 25,
11 |     "number_test_repeats": 5,
12 | 
13 |     "agent_select": ["Qlearntab", "Qlearntab"],
14 |     "agent_parameters":{
15 |         "Qlearntab":{
16 |             "alpha": 0.1,
17 |             "gamma": 0.95,
18 |             "epsilon": 0.2,
19 |             "epsilon_step":0.01
20 |             }
21 |         }
22 |     }


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/config_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | def ensure_dir(path):
 5 |     if not os.path.exists(path):
 6 |         os.makedirs(path)
 7 | 
 8 | 
 9 | def load_config(config_path):
10 |     with open(config_path, 'r') as f:
11 |         if config_path.endswith('.json'):
12 |             return json.load(f)
13 |         # Add more config formats if needed
14 |         raise ValueError("Unsupported config file format.")
15 | 
16 | 
17 | def merge_configs(config1, config2):
18 |     # Simple dict merge, can be improved for deep merge
19 |     merged = config1.copy()
20 |     merged.update(config2)
21 |     return merged
22 | 


--------------------------------------------------------------------------------
/elsciRL/agents/random_agent.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from elsciRL.agents.agent_abstract import Agent
 3 | import torch
 4 | from torch import Tensor
 5 | 
 6 | class RandomAgent(Agent):
 7 |     """This is simply a random decision maker, does not learn."""
 8 |     def __init__(self):
 9 |         super().__init__()
10 | 
11 |     def policy(self, state: Tensor, legal_actions: list) -> str:
12 |         action = random.choice(legal_actions)
13 |         return action
14 |     
15 |     def learn(self, state: Tensor, next_state: Tensor, r_p: float, 
16 |               action_code: str) -> float:
17 |         # Do nothing.
18 |         return None
19 |     
20 |     def q_result(self):
21 |         """Random agent has no knowledge."""
22 |         total_q = 0
23 |         mean_q = 0
24 |         return total_q, mean_q
25 | 


--------------------------------------------------------------------------------
/elsciRL/adapters/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List, Any
 3 | 
 4 | class StateAdapter(ABC):
 5 |     def __init__(self, raw_state):
 6 |         super().__init__()
 7 |         # Define the fields that describe the state features:
 8 |         self.state: list = self._read(raw_state)
 9 | 
10 |     @abstractmethod
11 |     def _read(raw_state) -> list:
12 |         # Read the data.
13 |         # fill in the feature fields
14 |         raise NotImplementedError
15 |     
16 |     def adapter(self):
17 |         "Returns the adapted form, may require input flag for encoded or non-encoded output."
18 |         
19 |         
20 |     def sample(self):
21 |         """Returns a sample of an adapted state form (typically initial position of the environment)."""
22 | 
23 |         
24 | 


--------------------------------------------------------------------------------
/elsciRL/application_suite/search_agent.py:
--------------------------------------------------------------------------------
 1 | class DefaultAgentConfig:
 2 |     def __init__(self):
 3 |         self.data ={   
 4 |             "name": "Default",
 5 |             "problem_type": "Default",
 6 |                          
 7 |             "number_training_episodes": 1000,
 8 |             "number_training_repeats": 5,
 9 |             "number_training_seeds": 1,
10 | 
11 |             "test_agent_type":"best",
12 |             "number_test_episodes": 200,
13 |             "number_test_repeats": 10,
14 | 
15 |             "agent_select": ["Qlearntab"],
16 |             "adapter_select": ["default"],
17 |             "agent_parameters":{
18 |                 "Qlearntab":{
19 |                     "alpha": 0.1,
20 |                     "gamma": 0.95,
21 |                     "epsilon": 1,
22 |                     "epsilon_step":0
23 |                     },
24 |             }
25 |         }
26 | 


--------------------------------------------------------------------------------
/elsciRL/agents/agent_abstract.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Iterable, Hashable, Any
 3 | from torch import Tensor
 4 | 
 5 | class Agent(ABC):
 6 |     @abstractmethod
 7 |     def policy(self, **kwargs) -> str:
 8 |         pass
 9 | 
10 |     def learn(self, **kwargs) -> str:
11 |         pass
12 | 
13 | class QLearningAgent(Agent):
14 |     def policy(self, state:Tensor, game_over:bool, 
15 |                legal_actions:list, **kwargs) -> Hashable:
16 |         pass
17 |     
18 |     def learn(self, state:Tensor, action:Hashable, next_state:Iterable[Any], 
19 |               immediate_reward:float, **kwargs):
20 |         pass
21 | 
22 | 
23 | class LLMAgentAbstract(Agent):
24 |     def policy(self, state:str, legal_actions:list, **kwargs) -> str:
25 |         pass
26 |     
27 |     def learn(self, state:str, action:str, next_state:str, reward:float, **kwargs) -> str:
28 |         pass
29 | 
30 | 


--------------------------------------------------------------------------------
/elsciRL/encoders/encoder_abstract.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from typing import List, Dict
 4 | from abc import ABC, abstractmethod
 5 | from torch import Tensor
 6 | 
 7 | class Encoder(ABC):
 8 |     @abstractmethod
 9 |     def encode(self, *args, **kwargs) -> Tensor:
10 |         pass
11 | 
12 | class StateEncoder(Encoder):
13 |     tensor_cache: Dict[int, Tensor] = dict()
14 |     tensor_cache_index: int = 0
15 | 
16 |     @staticmethod
17 |     def cache_insert(t: Tensor):
18 |         StateEncoder.tensor_cache[StateEncoder.tensor_cache_index] = t
19 |         StateEncoder.tensor_cache_index += 1
20 |     
21 |     @staticmethod
22 |     def cache_retrieve(offset: int, index: int):
23 |         return StateEncoder.tensor_cache[offset + index]
24 | 
25 |         
26 |     def encode(self, state:list = None, legal_actions:list = None, episode_action_history:str = None) -> Tensor:
27 |         pass
28 |     


--------------------------------------------------------------------------------
/elsciRL/adapters/LLM_state_generators/base_prompt.py:
--------------------------------------------------------------------------------
 1 | elsciRL_base_prompt = """
 2 | You are a helpful assistant that needs to describe the current state of a reinforcement learning environment to help an agent understand the context of the problem and how to act optimally.
 3 | 
 4 | The state can be text but is typically a list of numbers, you will be provided with prior actions and their outcome states and should use this information to describe the current state.
 5 | 
 6 | If no actions are provided, you should still describe the current state as best as you can.
 7 | 
 8 | You will be provided with a list of legal actions that the agent can take in the current state, you should describe these actions in a way that is useful for the agent to understand what it can do.
 9 | 
10 | You do not need to provide any details about what the agent should do,  just describe the current state and the legal actions available to the agent in a single paragraph with less than 200 words.
11 | 
12 | 
13 | """


--------------------------------------------------------------------------------
/elsciRL/environment_setup/instruction_reward_wrapper.py:
--------------------------------------------------------------------------------
 1 | """Gym wrapper utilities for instruction-following reward shaping."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Callable, Dict, Optional
 5 | 
 6 | import numpy as np
 7 | 
 8 | from elsciRL.environment_setup.gym_wrapper_abstract import RewardWrapper
 9 | 
10 | 
11 | class InstructionRewardWrapper(RewardWrapper):
12 |     """Adds adapter-derived instruction rewards to a Gym environment."""
13 | 
14 |     def __init__(self, env, reward_fn: Optional[Callable[[np.ndarray | None, Dict], float]] = None):
15 |         super().__init__(env)
16 |         self.reward_fn = reward_fn
17 | 
18 |     def reward(self, reward):
19 |         if self.reward_fn is None:
20 |             return reward
21 |         obs = getattr(self.env, "last_obs", None)
22 |         info = getattr(self.env, "last_info", {})
23 |         shaped_reward = self.reward_fn(obs, info)
24 |         if shaped_reward is None:
25 |             return reward
26 |         return reward + shaped_reward
27 | 


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/render_current_results.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def render_current_result(training_setup, current_environment, current_agent, local_save_dir):
 4 |     """Apply fixed policy to render current decision making for limited number of episodes."""
 5 |     # Override input training setups with previously saved 
 6 |     
 7 |     test_setup_info = training_setup.copy()
 8 | 
 9 |     test_setup_info['train'] = False # Testing Phase
10 |     test_setup_info['training_results'] = False
11 |     test_setup_info['observed_states'] = False
12 |     test_setup_info['experience_sampling'] = False
13 |     print("----------")
14 |     print("Rendering trained agent's policy:")
15 | 
16 |     env = current_environment
17 |     # ---
18 |     env.number_episodes = 1 # Only render 1 episode
19 |     env.agent = current_agent
20 |     env.agent.epsilon = 0 # Remove random actions
21 |     # ---
22 |     # Render results
23 |     if not os.path.exists(local_save_dir):
24 |         os.mkdir(local_save_dir)
25 |     env.episode_loop(render=True, render_save_dir=local_save_dir) 


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/policy_agent_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Type
 2 | 
 3 | from elsciRL.agents.stable_baselines.SB3_PPO import SB_PPO
 4 | from elsciRL.agents.stable_baselines.SB3_A2C import SB_A2C
 5 | from elsciRL.agents.stable_baselines.SB3_DQN import SB_DQN
 6 | from elsciRL.agents.clean_rl.ppo import CleanRLPPO
 7 | 
 8 | 
 9 | class PolicyAgentFactory:
10 |     """Factory for Gym/PyTorch policy-gradient agents (SB3-backed)."""
11 | 
12 |     def __init__(self):
13 |         self.agent_types: Dict[str, Type] = {
14 |             "SB3_PPO": SB_PPO,
15 |             "SB3_A2C": SB_A2C,
16 |             "SB3_DQN": SB_DQN,
17 |             "PPO": CleanRLPPO,
18 |         }
19 | 
20 |     def register_agent(self, name: str, agent_cls: Type):
21 |         self.agent_types[name] = agent_cls
22 | 
23 |     def create(self, agent_type: str, agent_parameters: Dict, env):
24 |         if agent_type not in self.agent_types:
25 |             raise ValueError(f"Unknown policy agent type: {agent_type}")
26 |         agent_cls = self.agent_types[agent_type]
27 |         # Most SB3 wrappers accept the env kwarg directly.
28 |         return agent_cls(env=env, **agent_parameters)
29 | 


--------------------------------------------------------------------------------
/elsciRL/GUI/templates/_generic_agent_param_form.html:
--------------------------------------------------------------------------------
 1 | <div id="{{ agent_id }}-params" class="config-group" style="flex: 1; display: none;">
 2 |     <h3>{{ agent_config.display_name }} Parameters</h3>
 3 |     {% for param_key, param_config in agent_config.params.items() %}
 4 |     <div class="training-params">
 5 |         <label for="{{ agent_id }}_{{ param_key }}">{{ param_config.label }}:</label>
 6 |         {% if param_config.type == 'textarea' %}
 7 |         <textarea id="{{ agent_id }}_{{ param_key }}" name="{{ agent_id }}_{{ param_key }}" rows="{{ param_config.rows | default(4) }}" placeholder="{{ param_config.placeholder | default('') }}">{{ param_config.default }}</textarea>
 8 |         {% else %}
 9 |         <input type="{{ param_config.type }}" id="{{ agent_id }}_{{ param_key }}" name="{{ agent_id }}_{{ param_key }}"
10 |                {% if param_config.min is defined %}min="{{ param_config.min }}"{% endif %}
11 |                {% if param_config.max is defined %}max="{{ param_config.max }}"{% endif %}
12 |                {% if param_config.step is defined %}step="{{ param_config.step }}"{% endif %}
13 |                value="{{ param_config.default }}">
14 |         {% endif %}
15 |     </div>
16 |     {% endfor %}
17 | </div> 


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/result_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | class ResultManager:
 5 |     """Handles saving, loading, and reporting of results."""
 6 |     def __init__(self, analysis):
 7 |         self.analysis = analysis
 8 | 
 9 |     def save_results(self, results, save_dir, filename):
10 |         os.makedirs(save_dir, exist_ok=True)
11 |         path = os.path.join(save_dir, filename)
12 |         results.to_csv(path)
13 | 
14 |     def load_results(self, path):
15 |         # Assumes CSV for now
16 |         import pandas as pd
17 |         return pd.read_csv(path)
18 | 
19 |     def train_report(self, training_results, save_dir, show_figures):
20 |         return self.analysis.train_report(training_results, save_dir, show_figures)
21 | 
22 |     def test_report(self, testing_results, save_dir, show_figures):
23 |         return self.analysis.test_report(testing_results, save_dir, show_figures)
24 | 
25 |     def training_variance_report(self, save_dir, show_figures):
26 |         return self.analysis.training_variance_report(save_dir, show_figures)
27 | 
28 |     def testing_variance_report(self, save_dir, show_figures):
29 |         return self.analysis.testing_variance_report(save_dir, show_figures)
30 | 


--------------------------------------------------------------------------------
/elsciRL/instruction_following/instr_utils/elsciRL_instr_input.py:
--------------------------------------------------------------------------------
 1 | class elsciRLInput:
 2 |     def __init__(self, description_lookup:dict=None):
 3 |         self.description_lookup = description_lookup
 4 |         # New: store descriptions provided so the user doesn't need to provide multiple times
 5 |         self.descriptions_stored:dict={}
 6 |         
 7 |     def user_input(self):
 8 |         instructions = []
 9 |         instruction_descriptions = []
10 |         while True:
11 |             instr = input("Please provide the current instruction... ([e/exit] to end path)")
12 |             if (instr == "e")|(instr=="exit"):
13 |                 break
14 |             
15 |             if not self.description_lookup:
16 |                 if instr not in self.descriptions_stored:
17 |                     description = input("Please provide a description of the instruction...")
18 |                 else:
19 |                     print("Instruction description provided previously.")
20 |                     description = self.descriptions_stored[instr]
21 |             if description == "None":
22 |                 description = instr
23 |             
24 |             instructions.append(instr)
25 |             instruction_descriptions.append(description)
26 |             self.descriptions_stored[instr] = description
27 | 
28 | 
29 |         return instructions, instruction_descriptions


--------------------------------------------------------------------------------
/elsciRL/application_suite/experiment_agent.py:
--------------------------------------------------------------------------------
 1 | class DefaultAgentConfig:
 2 |     def __init__(self):
 3 |         self.data ={   
 4 |             "name": "Default",
 5 |             "problem_type": "Default",
 6 | 
 7 |             "instruction_chain": True,
 8 |             "instruction_chain_how": "continuous",
 9 |                          
10 |             "number_training_episodes": 1000,
11 |             "number_training_repeats": 5,
12 |             "number_training_seeds": 1,
13 | 
14 |             "test_agent_type":"best",
15 |             "number_test_episodes": 200,
16 |             "number_test_repeats": 10,
17 | 
18 |             "agent_select": ["Qlearntab"],
19 |             "adapter_select": ["default"],
20 |             "agent_parameters":{
21 |                 "Qlearntab":{
22 |                     "alpha": 0.1,
23 |                     "gamma": 0.95,
24 |                     "epsilon": 1,
25 |                     "epsilon_step":0
26 |                     },
27 |                 "DQN":{
28 |                     "learning_rate": 0.001,
29 |                     "gamma": 0.99,
30 |                     "epsilon": 1.0,
31 |                     "epsilon_min": 0.01,
32 |                     "epsilon_decay": 0.995,
33 |                     "memory_size": 10000,
34 |                     "batch_size": 64,
35 |                     "target_update": 10,
36 |                     "hidden_size": 128
37 |                 },
38 |             }
39 |         }
40 | 


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/env_manager.py:
--------------------------------------------------------------------------------
 1 | from elsciRL.environment_setup.gym_translator import EngineToGym
 2 | 
 3 | class EnvManager:
 4 |     """Handles environment setup and management."""
 5 |     def __init__(self, interaction_loop_class, adapters):
 6 |         self.interaction_loop_class = interaction_loop_class
 7 |         self.adapters = adapters
 8 | 
 9 |     def create_env(self, Engine, Adapters, local_setup_info):
10 |         return self.interaction_loop_class(Engine=Engine, Adapters=Adapters, local_setup_info=local_setup_info)
11 | 
12 |     def create_gym_env(self, Engine, Adapter, setup_info, wrappers=None):
13 |         """Create a Gym environment from an elsciRL Engine and Adapter using gym_translator.
14 | 
15 |         Adapter can be either the adapter class itself or the lookup key registered in
16 |         ``self.adapters``. Optional wrappers can be provided to post-process the created
17 |         environment (e.g., to add reward shaping).
18 |         """
19 |         adapter_cls = Adapter
20 |         if not callable(Adapter):
21 |             adapter_cls = self.adapters.get(Adapter)
22 |         if adapter_cls is None:
23 |             raise ValueError(f"Adapter '{Adapter}' not found when creating Gym environment.")
24 | 
25 |         gym_env = EngineToGym()
26 |         gym_env.load(Engine, Adapter=adapter_cls, setup_info=setup_info)
27 | 
28 |         if wrappers:
29 |             for wrapper in wrappers:
30 |                 gym_env = wrapper(gym_env)
31 |         return gym_env
32 | 


--------------------------------------------------------------------------------
/elsciRL/__init__.py:
--------------------------------------------------------------------------------
 1 | # Try to import modules, handle missing dependencies gracefully
 2 | try:
 3 |     from .examples.DemoExperiment import DemoExperiment as Demo
 4 | except ImportError as e:
 5 |     print(f"Warning: Could not import DemoExperiment: {e}")
 6 |     Demo = None
 7 | 
 8 | try:
 9 |     from .GUI.app import app as App
10 | except ImportError as e:
11 |     print(f"Warning: Could not import GUI app: {e}")
12 |     App = None
13 | 
14 | try:
15 |     from .GUI.prerender import Prerender as get_prerender_data
16 | except ImportError as e:
17 |     print(f"Warning: Could not import Prerender: {e}")
18 |     get_prerender_data = None
19 | 
20 | try:
21 |     from .experiments.standard import Experiment as STANDARD_RL
22 | except ImportError as e:
23 |     print(f"Warning: Could not import STANDARD_RL: {e}")
24 |     STANDARD_RL = None
25 | 
26 | try:
27 |     from .instruction_following.elsciRL_instruction_search import elsciRLSearch as elsciRL_SEARCH
28 | except ImportError as e:
29 |     print(f"Warning: Could not import elsciRL_SEARCH: {e}")
30 |     elsciRL_SEARCH = None
31 | 
32 | try:
33 |     from .instruction_following.elsciRL_instruction_following import elsciRLOptimize as elsciRL_OPTIMIZE
34 | except ImportError as e:
35 |     print(f"Warning: Could not import elsciRL_OPTIMIZE: {e}")
36 |     elsciRL_OPTIMIZE = None
37 | 
38 | try:
39 |     from .analysis.combined_variance_visual import combined_variance_analysis_graph as COMBINED_VARIANCE_ANALYSIS_GRAPH
40 | except ImportError as e:
41 |     print(f"Warning: Could not import COMBINED_VARIANCE_ANALYSIS_GRAPH: {e}")
42 |     COMBINED_VARIANCE_ANALYSIS_GRAPH = None
43 | 


--------------------------------------------------------------------------------
/elsciRL/encoders/__init__.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | import pandas as pd
 4 | from typing import List, Dict, Iterable
 5 | from abc import ABC, abstractmethod
 6 | from elsciRL.adapters import StateAdapter
 7 | from torch import Tensor
 8 | 
 9 | 
10 | class Encoder(ABC):
11 |     @abstractmethod
12 |     def encode(self, *args, **kwargs) -> Tensor:
13 |         pass
14 | 
15 | class StateEncoder(Encoder):
16 |     tensor_cache: Dict[int, Tensor] = dict()
17 |     tensor_cache_index: int = 0
18 | 
19 |     @staticmethod
20 |     def cache_insert(t: Tensor):
21 |         StateEncoder.tensor_cache[StateEncoder.tensor_cache_index] = t
22 |         StateEncoder.tensor_cache_index += 1
23 |     
24 |     @staticmethod
25 |     def cache_retrieve(offset: int, index: int):
26 |         return StateEncoder.tensor_cache[offset + index]
27 |     
28 |     # index_objects are the complete list of adapter specific elements used to define the encoder's index
29 |     def encode(self, index_objects:list=None, state:list = None, legal_actions:list = None, prior_action:str = None,
30 |                 opponent_action:str = None, indexed: bool = False) -> Tensor:
31 |         pass
32 |     
33 |     
34 | class EncodedState(ABC):
35 |     @abstractmethod
36 |     def data() -> Iterable:
37 |         raise NotImplementedError
38 | 
39 | 
40 | class StateConverter(ABC):
41 |     def __init__(self, adapter: StateAdapter):
42 |         super().__init__()
43 |         # Calls the conversion procedure
44 |         self.data: EncodedState = self.convert(adapter.s)
45 | 
46 | 
47 |     def convert(state: list) -> EncodedState:
48 |         pass


--------------------------------------------------------------------------------
/elsciRL/encoders/poss_actions_encoded.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import List
 3 | from torch import Tensor
 4 | import numpy as np
 5 | 
 6 | #from elsciRL.encoders.encoder_abstract import StateEncoder
 7 | class PossibleActionsEncoder(): 
 8 |     def __init__(self, all_possible_actions):
 9 |         self.all_possible_actions = all_possible_actions
10 |         device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter
11 |         self.vectors: Tensor = torch.cat([torch.eye(len(self.all_possible_actions)), torch.zeros(1, len(self.all_possible_actions))]).to(device) 
12 |         
13 |         self.all_possible_actions_dict_init = {}
14 |         for action in self.all_possible_actions:
15 |             self.all_possible_actions_dict_init[action] = 0
16 | 
17 |         self.name = "PossibleActionsEncoder"
18 |         self.input_type = "list"
19 |         self.output_type = "tensor"
20 |         self.output_dim = len(self.all_possible_actions)**2
21 | 
22 |     def encode(self, state: List[str] = None, legal_actions:list = None, episode_action_history:list = None,
23 |                indexed: bool = False) -> Tensor:
24 |         """Vector of possible actions."""        
25 |         # Binary vector for all currently possible action to denote if it exists in all known possible actions
26 |         all_possible_actions = self.all_possible_actions_dict_init.copy()
27 |         for a,action in enumerate(legal_actions): 
28 |             all_possible_actions[action] = int(1)
29 | 
30 |         state_encoded = torch.tensor(list(all_possible_actions.values()))
31 |         if (not indexed):
32 |             state_encoded = self.vectors[state_encoded].flatten()
33 |         
34 |         return state_encoded


--------------------------------------------------------------------------------
/elsciRL/examples/adapters/gym_frozenlake_default.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | import pandas as pd
 3 | import torch
 4 | from torch import Tensor
 5 | # StateAdapter includes static methods for adapters
 6 | from elsciRL.encoders.poss_state_encoded import StateEncoder
 7 | 
 8 | class DefaultAdapter:
 9 |     _cached_state_idx: Dict[str, int] = dict()
10 | 
11 |     def __init__(self, setup_info:dict={}):
12 |         # NOTE: Update this based on the current problem, each requires preset
13 |         # knowledge of all possible states/actions/objects
14 |         # - Possible Atates
15 |         # - Possible Actions
16 |         # - Prior Actions
17 |         # - Possible Objects
18 |     
19 |         # Initialise encoder based on all possilbe env states
20 |         all_possible_states = [i for i in range(4*4)]
21 |         self.encoder = StateEncoder(all_possible_states)
22 |     
23 |     def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor:
24 |         """ Use Language name for every piece name for current board position """
25 |        
26 |         # Encode to Tensor for agents
27 |         if encode:
28 |             state_encoded = self.encoder.encode(state=state)
29 |         else:
30 |             state_encoded = state
31 | 
32 |         if (indexed):
33 |             state_indexed = list()
34 |             for sent in state:
35 |                 if (sent not in DefaultAdapter._cached_state_idx):
36 |                     DefaultAdapter._cached_state_idx[sent] = len(DefaultAdapter._cached_state_idx)
37 |                 state_indexed.append(DefaultAdapter._cached_state_idx[sent])
38 | 
39 |             state_encoded = torch.tensor(state_indexed)
40 | 
41 |         return state_encoded


--------------------------------------------------------------------------------
/elsciRL/examples/environments/gym_frozenlake.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | 
 3 | class Engine:
 4 |     """Defines the environment function from the generator engine.
 5 |        Expects the following:
 6 |         - reset() to reset the env a start position(s)
 7 |         - step() to make an action and update the game state
 8 |         - legal_moves_generator() to generate the list of legal moves
 9 |     """
10 |     def __init__(self, local_setup_info:dict={}) -> None:
11 |         """Initialize Engine"""
12 |         self.Environment = gym.make('FrozenLake-v1', desc=None, map_name="4x4", 
13 |                                     is_slippery=True,
14 |                                     render_mode='rgb_array')
15 |         
16 |     def reset(self, start_obs:str=None):
17 |         """Fully reset the environment."""
18 |         obs, info = self.Environment.reset()
19 |         return obs
20 | 
21 |     
22 |     def step(self, state:any, action:any):
23 |         """Enact an action."""
24 |         # In problems where the agent can choose to reset the env
25 |         if (state=="ENV_RESET")|(action=="ENV_RESET"):
26 |             self.reset()
27 |             
28 |         obs, reward, terminated, truncated, info = self.Environment.step(action)
29 |         return obs, reward, terminated, info
30 | 
31 |     def legal_move_generator(self, obs:any=None):
32 |         """Define legal moves at each position"""
33 |         legal_moves = [0,1,2,3]
34 |         return legal_moves
35 | 
36 |     def render(self):
37 |         """Render an image or text of the environment."""
38 |         return self.Environment.render()
39 |         
40 |     def close(self):
41 |         """Close/Exit the environment."""
42 |         self.Environment.close()
43 |         print("Environment Closed")
44 | 


--------------------------------------------------------------------------------
/elsciRL/adapters/LLM_logic_generators/adapter_prompt.py:
--------------------------------------------------------------------------------
 1 | adapter_prompt = """
 2 |     Your role is to generate pseudocode for an adapter function that will be used to transform the state of an environment into a form that can be used by an agent.
 3 |     
 4 |     Adapters unify problems into a standard form so any agent in the elsciRL library can be used.
 5 | 
 6 |     In short, it transforms the state to a new form, optionally adding more context and then outputting a tensor.
 7 | 
 8 |         inputs: state, legal moves, action history for episode
 9 |         outputs: tensor for the encoded form of the adapted state
10 | 
11 |     # numeric adapter (numeric.py)
12 |     class DefaultAdapter(setup_info):
13 |     def __init__():
14 |         # Determine discrete environment size: e.g. "4x4" => 16 positions
15 |         # Initialize a StateEncoder for these positions
16 |         # Optionally define an observation space (e.g., Discrete) needed for Gym agents
17 | 
18 |     def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
19 |         # If encode=True, convert the numeric state to a tensor (StateEncoder)
20 |         # If indexed=True, map states to integer IDs
21 | 
22 |         return tensor(state_encoded)
23 | 
24 |     # language adapter (language.py)
25 |     class LanguageAdapter(setup_info):
26 |     def __init__():
27 |         # Build obs_mapping dictionary describing each state as text
28 |         # Initialize LanguageEncoder
29 | 
30 |     def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
31 |         # Convert numeric state ID to a text description (obs_mapping)
32 |         # Optionally encode the text into a tensor (LanguageEncoder)
33 |         # Optionally map each unique description to an indexed ID
34 | 
35 |         return tensor(state_encoded)
36 | 
37 | """


--------------------------------------------------------------------------------
/elsciRL/encoders/observable_objects_encoded.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import List
 3 | from torch import Tensor
 4 | 
 5 | from elsciRL.encoders.encoder_abstract import StateEncoder
 6 | 
 7 | class ObjectEncoder():
 8 |     def __init__(self, local_objects):
 9 |         """Encoder for default state representation produced by the environment/engine."""
10 |         self.local_objects = {obj: i for i, obj in enumerate(local_objects)}
11 |         device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter
12 |         self.vectors: Tensor = torch.cat([torch.eye(len(self.local_objects)), torch.zeros(1, len(self.local_objects))]).to(device)         # tensor needs to be defined to len(local_object)
13 |         self.name = "ObjectEncoder"
14 |         self.input_type = "list"
15 |         self.output_type = "tensor"
16 |         self.output_dim = len(self.local_objects)**2
17 |     
18 |     def encode(self, state:list = None, legal_actions:list = None, episode_action_history:list = None,
19 |                indexed: bool = False) -> Tensor:
20 |         """ NO CHANGE - Board itself is used as state as is and simply converted to a vector"""
21 |         # Goes through every item in state and labels based on the known objects available in the environment
22 |         # New vector encoded form, for Chess: 64x12 flattened into 768x1 int vector to denote object occurance
23 |         # NOT BINARY vector, value is the occurance of each object type. 
24 |         #  -> In chess this happens to be [1 or 0] because you cant have more than one piece in each position.
25 |         state_encoded: Tensor = torch.tensor([self.local_objects.get(state_pos, len(self.local_objects)) for state_pos in state], 
26 |                                              device=self.vectors.device)
27 |         
28 |         if (not indexed):
29 |             state_encoded = self.vectors[state_encoded].flatten()
30 | 
31 |         return state_encoded    


--------------------------------------------------------------------------------
/elsciRL/encoders/prior_actions_encoded.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import List
 3 | from torch import Tensor
 4 | import numpy as np
 5 | 
 6 | #from elsciRL.encoders.encoder_abstract import StateEncoder
 7 | class PriorActionsEncoder():
 8 |     def __init__(self, all_possible_actions):
 9 |         self.all_possible_actions = all_possible_actions
10 |         device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter
11 |         self.vectors: Tensor = torch.cat([torch.eye(len(self.all_possible_actions)), torch.zeros(1, len(self.all_possible_actions))]).to(device) 
12 | 
13 |         self.all_possible_actions_dict_init = {}
14 |         for action in self.all_possible_actions:
15 |             self.all_possible_actions_dict_init[action] = int(0)
16 | 
17 |         self.name = "PriorActionsEncoder"
18 |         self.input_type = "list"
19 |         self.output_type = "tensor"
20 |         self.output_dim = len(self.all_possible_actions)**2
21 | 
22 |     def encode(self, state: List[str] = None, legal_actions:list = None, episode_action_history:list = None, 
23 |                indexed: bool = False) -> Tensor:
24 |         """Vector of prio actions in game so far, similar to blindfold chess."""
25 |         # STATE ENCODER
26 |         # - Updated to use all possible actions for consistency with poss action encoder and generally more suitable
27 |         # - Chess has loads of possible actions which is somewhat unique to the problem
28 |         # - BUT order must be preserved in the prior action encoder
29 |         all_possible_actions = self.all_possible_actions_dict_init.copy()
30 |         for a,action in enumerate(episode_action_history): 
31 |             all_possible_actions[action] = int(a)
32 | 
33 |         state_encoded = torch.tensor(list(all_possible_actions.values()))
34 |         if (not indexed):
35 |             state_encoded = self.vectors[state_encoded].flatten()
36 |         
37 |         return state_encoded


--------------------------------------------------------------------------------
/elsciRL/examples/adapters/gym_frozenlake_language.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | import pandas as pd
 3 | import torch
 4 | from torch import Tensor
 5 | # StateAdapter includes static methods for adapters
 6 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder
 7 | 
 8 | class LanguageAdapter:
 9 |     _cached_state_idx: Dict[str, int] = dict()
10 | 
11 |     def __init__(self, setup_info:dict={}):
12 |         # Language encoder doesn't require any preset knowledge of env to use
13 |         self.encoder = LanguageEncoder()
14 |         self.obs_mapping = {0:'You are at the start position.', 1:'You are on ice.', 2:'You are on ice.', 3:'You are on ice.',
15 |                        4:'You are on ice.', 5:'You fell through a hole in the ice!', 6:'You are on ice.', 7:'You fell through a hole in the ice!',
16 |                        8:'You are on ice.', 9:'You are on ice.', 10:'You are on ice.', 11:'You fell through a hole in the ice!',
17 |                        12:'You fell through a hole in the ice!', 13:'You are on ice.', 14:'You are on ice.', 15:'You found the chest!'}
18 |         self.key_found = False
19 |     
20 |     def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor:
21 |         """ Use Language name for every piece name for current board position """
22 |         # ---
23 |         # Convert to lanugage
24 |         state = self.obs_mapping[state]
25 |         # ---
26 |         
27 |         # Encode to Tensor for agents
28 |         if encode:
29 |             state_encoded = self.encoder.encode(state=state)
30 |         else:
31 |             state_encoded = state
32 | 
33 |         if (indexed):
34 |             state_indexed = list()
35 |             for sent in state:
36 |                 if (sent not in LanguageAdapter._cached_state_idx):
37 |                     LanguageAdapter._cached_state_idx[sent] = len(LanguageAdapter._cached_state_idx)
38 |                 state_indexed.append(LanguageAdapter._cached_state_idx[sent])
39 | 
40 |             state_encoded = torch.tensor(state_indexed)
41 | 
42 |         return state_encoded


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # from distutils.core import setup
 2 | from setuptools import setup, find_packages
 3 | 
 4 | setup(
 5 |     name='elsciRL',
 6 |     version='0.4.0',
 7 |     packages=[
 8 |         'elsciRL', 
 9 |         'elsciRL.adapters',
10 |         'elsciRL.adapters.LLM_state_generators',
11 |         'elsciRL.agents',
12 |         'elsciRL.agents.LLM_agents',
13 |         'elsciRL.agents.stable_baselines',
14 |         'elsciRL.analysis',
15 |         'elsciRL.application_suite',
16 |         'elsciRL.encoders', 
17 |         'elsciRL.encoders.language_transformers',
18 |         'elsciRL.environment_setup',
19 |         'elsciRL.evaluation',
20 |         'elsciRL.examples',
21 |         'elsciRL.examples.adapters',
22 |         'elsciRL.examples.environments',
23 |         'elsciRL.examples.local_configs',
24 |         'elsciRL.experiments',
25 |         'elsciRL.experiments.experiment_utils',
26 |         'elsciRL.experiments.training_procedures',
27 |         'elsciRL.GUI',
28 |         'elsciRL.GUI.static',
29 |         'elsciRL.GUI.templates',
30 |         'elsciRL.experiments',
31 |         'elsciRL.instruction_following',
32 |         'elsciRL.instruction_following.LLM_instr_planner',
33 |         'elsciRL.instruction_following.instr_utils',
34 |         'elsciRL.interaction_loops',
35 |         'elsciRL.published_experiments',
36 |         ],
37 |     package_data={
38 |         'elsciRL.GUI.templates': ['index.html', '_generic_agent_param_form.html'],
39 |         'elsciRL.GUI.static': ['styles.css', 'app_setup.md'],
40 |     },
41 |     include_package_data=True,
42 |     url='https://github.com/pdfosborne/elsciRL',
43 |     license='Apache-2.0 license',
44 |     author='Philip Osborne',
45 |     author_email='pdfosborne@gmail.com',
46 |     description='Apply language solutions to Reinforcement Learning problems.',
47 |     install_requires=[
48 |         'numpy',
49 |         'pandas',
50 |         'matplotlib',
51 |         'seaborn',
52 |         'scipy>=1.10.1',
53 |         'torch',
54 |         'tqdm',
55 |         'httpimport',
56 |         'sentence-transformers',
57 |         'gymnasium',
58 |         'stable-baselines3',
59 |         'flask',
60 |         'ollama',
61 |         'markdown',
62 |     ] 
63 | )
64 | 


--------------------------------------------------------------------------------
/elsciRL/encoders/poss_state_encoded.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import List, Any
 3 | from torch import Tensor
 4 | from tqdm import tqdm
 5 | from elsciRL.encoders.encoder_abstract import StateEncoder
 6 | 
 7 | class StateEncoder(StateEncoder):
 8 |     def __init__(self, num_states):
 9 |         """Encoder for default state representation produced by the environment/engine."""
10 |         # Create dict lookup
11 |         # - get binary list that indexes the state e.g. 0_0 -> [1,0,0,0] or 0_3 -> [0,0,0,1]
12 |         # UPDATED - Now uses torch.nn.functional.one_hot for one-hot encoding
13 |         # Using one-hot encoder is incredibly inefficient for large state spaces
14 |         # Instead, we consider using an index-based encoding where each unique state is assigned a unique index.
15 |         self.device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter
16 |         self.vectors: Tensor = torch.cat([torch.eye(num_states), torch.zeros(1,num_states)]).to(self.device)         # tensor needs to be defined to len(local_object)
17 |         self.name = "StateEncoder"
18 |         self.input_type = "list"
19 |         self.output_type = "tensor"
20 |         self.output_dim = num_states
21 | 
22 |         self.encoder = {}
23 |         self.encoder_idx = 0
24 |         self.num_states = num_states
25 | 
26 |     def encode(self, state:Any = None, legal_actions:list = None, episode_action_history:list = None,
27 |                indexed: bool = False) -> Tensor:
28 |         """ Set of all possible states are simply converted to a vector"""
29 |         # One hot encode the state if it is not already indexed
30 |         if state not in self.encoder:
31 |             state_encoded = self.encoder_idx  # Use the index as the state encoded value
32 |             # Store the encoded state in the encoder dictionary
33 |             self.encoder[state] = state_encoded
34 |             # Increment the encoder index for the next unique state
35 |             self.encoder_idx += 1
36 |         else:
37 |             state_encoded = self.encoder[state]
38 | 
39 |         # If indexed, use one-hot encoding
40 |         # If not indexed, use the unique index to retrieve the vector
41 |         if indexed:
42 |             state_encoded = torch.nn.functional.one_hot(torch.tensor(state_encoded), num_classes=self.num_states).float().to(self.device)
43 |         else:
44 |             state_encoded = self.vectors[int(state_encoded)].flatten()
45 |         
46 |         return state_encoded    


--------------------------------------------------------------------------------
/elsciRL/config_local.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | # Define Agent's parameters for problem
 5 | # Opponent is considered a 'local' specification as benchmarks vary between setting
 6 | 
 7 | class LocalConfig:
 8 |     def __init__(self, config_file_path: str):
 9 |         if (config_file_path):
10 |             with open(config_file_path) as config_file:
11 |                 self.data = json.load(config_file)
12 |                 self.config_file_path = config_file_path
13 |                 
14 |         else:
15 |             self.data = dict()
16 |             self.config_path = ""
17 |             logging.info("No arguments given, using default configuration...")
18 |         
19 |     def __getitem__(self, key: str):
20 |         item = None
21 | 
22 |         if (key in self.__dict__):
23 |             item = self.__dict__[key]
24 |         else:
25 |             item = self.data[key]
26 | 
27 |         return item
28 | 
29 | #TODO this is not universal at all !!!
30 | class ProblemConfig(LocalConfig):
31 |     """Local Config is used to define any problem specific parameters."""
32 |     def __init__(self, config_path: str):
33 |         super(ProblemConfig, self).__init__(config_path)
34 |         # State form
35 |         self.adapter_select = self.data.get("adapter_select", [""])
36 |         # Enabled agent to be trained against multiple opponents in order provided
37 |         self.training_opponent_agent = self.data.get(
38 |             "training_opponent_agent", "")
39 |         self.testing_opponent_agent = self.data.get(
40 |             "testing_opponent_agent", "")
41 |         
42 |         self.training_setup = self.data.get("training_setup",'default')
43 |         self.testing_setup = self.data.get("testing_setup",'default')
44 |         
45 |         self.training_action_cap = self.data.get("training_action_cap",1000) # Arbitrary number to ensure games dont last forever
46 |         self.testing_action_cap = self.data.get("testing_action_cap",1000) # Arbitrary number to ensure games dont last forever
47 |         # Reward Signal, should be consistent between all agent being compared
48 |         self.reward_signal = self.data.get("reward_signal",[1,-0.1,0,0] )# [Value of winning, Value for draw, Value for each action, Value for reaching new state]
49 |         # Sub-Goal Defined
50 |         self.sub_goal = self.data.get("sub_goal",None)
51 |         
52 | class ConfigSetup(LocalConfig):
53 |     def __init__(self, config_dir: str):
54 |         super(ConfigSetup, self).__init__(config_dir)
55 |         self.state_configs = ProblemConfig(os.path.join(config_dir))


--------------------------------------------------------------------------------
/elsciRL/environment_setup/imports.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from elsciRL.agents.agent_abstract import Agent, QLearningAgent, LLMAgentAbstract
 3 | 
 4 | class ImportHelper:
 5 |     def __init__(self, local_setup_info:dict={}) -> None:
 6 |         self.setup_info = local_setup_info
 7 | 
 8 |     def agent_info(self, STATE_ADAPTER_TYPES:dict={}):
 9 |         agent: Agent | QLearningAgent | LLMAgentAbstract = self.setup_info['agent']
10 |         agent_type: str = self.setup_info['agent_type']
11 |         agent_name: str = self.setup_info['agent_name']
12 |         if self.setup_info['adapter_select'] in STATE_ADAPTER_TYPES:
13 |             agent_state_adapter = STATE_ADAPTER_TYPES[self.setup_info['adapter_select']](setup_info=self.setup_info)
14 |         else:
15 |             print(f"Adapter {self.setup_info['adapter_select']} not found in STATE_ADAPTER_TYPES.")
16 |             print(STATE_ADAPTER_TYPES)
17 |             agent_state_adapter = ''
18 |         return agent, agent_type, agent_name, agent_state_adapter
19 | 
20 |     def parameter_info(self):
21 |         num_train_episodes: int = self.setup_info['number_training_episodes']
22 |         num_test_episodes: int = self.setup_info['number_test_episodes']
23 |         try:
24 |             training_action_cap: int = self.setup_info['training_action_cap']
25 |             testing_action_cap: int = self.setup_info['testing_action_cap']
26 |         except:
27 |             if 'action_limit' in self.setup_info:
28 |                 training_action_cap: int = self.setup_info['action_limit']
29 |                 testing_action_cap: int = self.setup_info['action_limit']
30 |             elif 'action_cap' in self.setup_info:
31 |                 training_action_cap: int = self.setup_info['action_cap']
32 |                 testing_action_cap: int = self.setup_info['action_cap']
33 |             else:
34 |                 print('No action cap specified, using default values')
35 |                 training_action_cap: int = 1000
36 |                 testing_action_cap: int = 1000
37 |         reward_signal: List[int] = self.setup_info['reward_signal'] 
38 | 
39 |         return num_train_episodes, num_test_episodes, training_action_cap, testing_action_cap, reward_signal
40 |     
41 |     def training_flag(self):
42 |         train: bool = self.setup_info['train']
43 |         return train
44 | 
45 |     def live_env_flag(self):
46 |         live_env: bool = self.setup_info['live_env']
47 |         observed_states: bool = self.setup_info['observed_states']
48 |         #experience_sampling: bool = self.setup_info['experience_sampling']
49 |         return live_env, observed_states


--------------------------------------------------------------------------------
/elsciRL/agents/stable_baselines/SB3_DQN.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import numpy as np
 4 | from elsciRL.agents.agent_abstract import QLearningAgent
 5 | import gymnasium as gym
 6 | from stable_baselines3 import DQN
 7 | from stable_baselines3.common.evaluation import evaluate_policy
 8 | from PIL import Image # Used to generate GIF
 9 | 
10 | class SB_DQN(QLearningAgent):
11 |     def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate:float=0.0001, buffer_size:int=1000000):
12 |         self.epsilon: int = 0 # Not used currently but required for compatibility
13 |         self.device = "auto" if torch.cuda.is_available() else "cpu" 
14 |         self.dqn = DQN(policy, env, verbose=0, device=self.device, 
15 |                        learning_rate=learning_rate, buffer_size=buffer_size)
16 |         if torch.cuda.is_available():
17 |             print("---- Using GPU ----")
18 |             print("Device:", self.dqn.device)
19 | 
20 |     def policy(self, state: any) -> str:
21 |         return self.dqn.predict(state)
22 | 
23 |     def learn(self, total_steps:int=100) -> float:
24 |         self.dqn.learn(total_timesteps=total_steps)
25 |     
26 |     def test(self, env, render:bool=False):
27 |         #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1)
28 |         vec_env = self.dqn.get_env()
29 |         obs = vec_env.reset()
30 |         
31 |         actions = []
32 |         states = []
33 | 
34 |         done = False
35 |         render_stack = []
36 |         if render:
37 |             render_stack.append(
38 |                 Image.fromarray(vec_env.render().astype('uint8'))
39 |                 )
40 |         while not done: 
41 |             action, _state = self.dqn.predict(obs, deterministic=True)
42 |             if isinstance(action, np.int64):
43 |                 actions.append(action.item())
44 |             else:
45 |                 actions.append(action[0])
46 |             #actions.append(action[0])
47 |             
48 |             obs, r, done, info = vec_env.step(action)
49 |             states.append(info[0]['obs'])
50 |             if render:
51 |                 render_stack.append(
52 |                     Image.fromarray(vec_env.render().astype('uint8'))
53 |                     )
54 |             
55 |             #vec_env.render("human")
56 |         episode_reward = info[0]['episode']['r']
57 |         if episode_reward > 0.5:
58 |             print("----> ", episode_reward)
59 | 
60 |         return episode_reward, actions, states, render_stack
61 |     
62 |     def q_result(self):
63 |         results = [0,0]
64 |         total_q = results[0]
65 |         mean_q = results[1]
66 |         return total_q, mean_q
67 |     
68 |     def clone(self):
69 |         clone = pickle.loads(pickle.dumps(self))
70 |         return clone


--------------------------------------------------------------------------------
/elsciRL/agents/stable_baselines/SB3_PPO.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import numpy as np
 4 | from elsciRL.agents.agent_abstract import QLearningAgent
 5 | import gymnasium as gym
 6 | from stable_baselines3 import PPO
 7 | from stable_baselines3.common.evaluation import evaluate_policy
 8 | from PIL import Image # Used to generate GIF
 9 | 
10 | class SB_PPO(QLearningAgent):
11 |     def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate:float=0.0003, n_steps:int=2048):
12 |         self.epsilon: int = 0 # Not used currently but required for compatibility
13 |         self.device = "auto" if torch.cuda.is_available() else "cpu"
14 |         self.ppo = PPO(policy, env, verbose=0, device=self.device, 
15 |                        learning_rate=learning_rate, n_steps=n_steps)
16 |         if torch.cuda.is_available():
17 |             print("---- Using GPU ----")
18 |             print("Device:", self.ppo.device)
19 |     
20 |     def policy(self, state: any) -> str:
21 |         return self.ppo.predict(state)
22 | 
23 |     def learn(self, total_steps:int=100) -> float:
24 |         self.ppo.learn(total_timesteps=total_steps)
25 |     
26 |     def test(self, env, render:bool=False):
27 |         #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1)
28 |         vec_env = self.ppo.get_env()
29 |         obs = vec_env.reset()
30 |         
31 |         actions = []
32 |         states = []
33 | 
34 |         done = False
35 |         render_stack = []
36 |         if render:
37 |             render_stack.append(
38 |                 Image.fromarray(vec_env.render().astype('uint8'))
39 |                 )
40 |         while not done: 
41 |             action, _state = self.ppo.predict(obs, deterministic=True)
42 |             if isinstance(action, np.int64):
43 |                 actions.append(action.item())
44 |             else:
45 |                 actions.append(action[0])
46 |             #actions.append(action[0])
47 |             
48 |             obs, r, done, info = vec_env.step(action)
49 |             states.append(info[0]['obs'])
50 |             if render:
51 |                 render_stack.append(
52 |                     Image.fromarray(vec_env.render().astype('uint8'))
53 |                     )
54 |             
55 |             #vec_env.render("human")
56 |         episode_reward = info[0]['episode']['r']
57 |         if episode_reward > 0.5:
58 |             print("----> ", episode_reward)
59 |             
60 |         return episode_reward, actions, states, render_stack
61 |     
62 |     def q_result(self):
63 |         results = [0,0]
64 |         total_q = results[0]
65 |         mean_q = results[1]
66 |         return total_q, mean_q
67 |     
68 |     def clone(self):
69 |         clone = pickle.loads(pickle.dumps(self))
70 |         return clone
71 |     
72 |     


--------------------------------------------------------------------------------
/elsciRL/encoders/language_transformers/MiniLM_L6v2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from multiprocessing.spawn import import_main_path
 4 | from typing import Dict, List, Tuple
 5 | from collections import Counter
 6 | from gymnasium.spaces import Box
 7 | 
 8 | from torch import Tensor
 9 | from elsciRL.encoders.encoder_abstract import StateEncoder
10 | 
11 | # Language Encoder
12 | from sentence_transformers import SentenceTransformer
13 | 
14 | 
15 | 
16 | class LanguageEncoder(StateEncoder):
17 |     """Required Language Model included in requisite packages."""
18 |     _cached_enc: Dict[str, Tensor] = dict()
19 |     _cached_freq: Counter = Counter()
20 | 
21 |     def __init__(self, device: str = None):
22 |         autodev = "cuda" if torch.cuda.is_available() else "cpu"
23 |         self.device = device if device else autodev
24 |         self.sentence_model: SentenceTransformer = SentenceTransformer('all-MiniLM-L6-v2', device=self.device)
25 |         low_array = [-1 for i in range(384)]
26 |         high_array = [1 for i in range(384)]
27 |         self.observation_space = Box(low=np.array(low_array), high=np.array(high_array), dtype=np.float32)
28 |         self.name = "MiniLM_L6v2"
29 |         self.input_type = "text"
30 |         self.output_type = "tensor"
31 |         self.output_dim = 384
32 | 
33 |     def encode(self, state: str|List[str], legal_actions:list = None, episode_action_history:list = None, 
34 |                indexed: bool = False, progress_bar:bool=False) -> Tensor:
35 |         
36 |         # I think typing is overriding the input type anyway -> need to ensure sentences are split up
37 |         if type(state) == str:
38 |             state = [state]
39 |         #     state = state.split(".") 
40 |         #     state = [s for s in state if s.strip()]
41 |         if (len(state) == 0):
42 |             state = [""]
43 |         to_encode = [sent for sent in state if sent not in LanguageEncoder._cached_enc]
44 |         if (to_encode):
45 |             # Show progress bar if state is a list of strings
46 |             encoded = self.sentence_model.encode(to_encode, batch_size=256, convert_to_tensor=True, show_progress_bar=progress_bar)
47 |             LanguageEncoder._cached_enc.update({to_encode[i]: encoded[i] for i in range(len(to_encode))})
48 |         
49 |         LanguageEncoder._cached_freq.update(state)
50 |         LanguageEncoder._cached_freq.subtract(LanguageEncoder._cached_freq.keys())
51 |         state_encoded = torch.stack([LanguageEncoder._cached_enc[sent] for sent in state])
52 | 
53 |         if (len(LanguageEncoder._cached_freq) > 10000):
54 |             for key, freq in list(reversed(LanguageEncoder._cached_freq.most_common()))[:2000]:
55 |                 del LanguageEncoder._cached_enc[key]
56 |                 del LanguageEncoder._cached_freq[key]
57 | 
58 |         return state_encoded


--------------------------------------------------------------------------------
/elsciRL/agents/stable_baselines/SB3_A2C.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import numpy as np
 4 | from elsciRL.agents.agent_abstract import QLearningAgent
 5 | import gymnasium as gym
 6 | from stable_baselines3 import A2C
 7 | from PIL import Image # Used to generate GIF
 8 | 
 9 | 
10 | class SB_A2C(QLearningAgent):
11 |     def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate=0.0007, n_steps=500):
12 |         self.epsilon: int = 0 # Not used currently but required for compatibility
13 |         self.device = "auto" if torch.cuda.is_available() else "cpu" # A2C is meant to be run primarily on the CPU, especially when you are not using a CNN.
14 |         self.a2c = A2C(policy, env, verbose=0, device="cpu", 
15 |                        learning_rate=learning_rate, n_steps=n_steps)
16 |         if torch.cuda.is_available():
17 |             print("---- A2C is meant to be run primarily on the CPU ----")
18 |             print("Device:", self.a2c.device)
19 | 
20 |     def policy(self, state: any) -> str:
21 |         # TODO: make sure output is int
22 |         return self.a2c.predict(state)
23 | 
24 |     def learn(self, total_steps:int=100) -> float:
25 |         self.a2c.learn(total_timesteps=total_steps)
26 |     
27 |     def test(self, env, render:bool=False):
28 |         #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1)
29 |         # Using environment from agent may limit episodes based on prior experience
30 |         #vec_env = self.a2c.get_env()
31 |         
32 |         vec_env = env
33 |         obs, info = vec_env.reset()
34 |         
35 |         actions = []
36 |         states = []
37 | 
38 |         done = False
39 |         episode_reward = 0
40 |         render_stack = []
41 |         if render:
42 |             render_stack.append(
43 |                 Image.fromarray(vec_env.render().astype('uint8'))
44 |                 )
45 |         while not done: 
46 |             action, _state = self.a2c.predict(obs, deterministic=True)
47 |             if isinstance(action, np.int64):
48 |                 actions.append(action.item())
49 |             else:
50 |                 actions.append(action)
51 |             # actions.append(int(action))
52 |             obs, r, done, truncated, info = vec_env.step(action)
53 |             episode_reward += r
54 |             if render:
55 |                 render_stack.append(Image.fromarray(vec_env.render().astype('uint8')))
56 |         
57 |             #states.append(info[0]['obs'])
58 |             states.append(info['obs'])
59 |             #vec_env.render("human")
60 | 
61 |         #episode_reward = info[0]['episode']['r']
62 |         if episode_reward > 0.5:
63 |             print("----> ", episode_reward)
64 |         
65 |         return episode_reward, actions, states, render_stack
66 |     
67 |     def q_result(self):
68 |         results = [0,0]
69 |         total_q = results[0]
70 |         mean_q = results[1]
71 |         return total_q, mean_q
72 |     
73 |     def clone(self):
74 |         clone = pickle.loads(pickle.dumps(self))
75 |         return clone
76 |     
77 |     


--------------------------------------------------------------------------------
/elsciRL/experiments/experiment_utils/agent_factory.py:
--------------------------------------------------------------------------------
 1 | class AgentFactory:
 2 |     """Factory for creating agent instances based on type name and parameters."""
 3 |     def __init__(self, adapters, setup_info):
 4 |         from elsciRL.agents.table_q_agent import TableQLearningAgent
 5 |         from elsciRL.agents.DQN import DQNAgent
 6 |         from elsciRL.agents.LLM_agents.ollama_agent import LLMAgent as OllamaAgent
 7 |         self.adapters = adapters
 8 |         self.agent_types = {
 9 |             "Qlearntab": TableQLearningAgent,
10 |             "DQN": DQNAgent,
11 |             "LLM_Ollama": OllamaAgent,
12 |         }
13 |         self.setup_info = setup_info
14 | 
15 |     def register_agent(self, name, agent_class):
16 |         self.agent_types[name] = agent_class
17 | 
18 |     def create(self, agent_type, agent_parameters, engine=None, adapter=None):
19 |         if agent_type == "DQN":
20 |             if adapter:
21 |                 adapter_sample = self.adapters[adapter](setup_info=self.setup_info)
22 |                 # Set input_size from adapter
23 |                 try:
24 |                     input_size = adapter_sample.input_dim
25 |                     print(f"Using input_dim from adapter {adapter}: {input_size}")
26 |                 except Exception:
27 |                     try:
28 |                         input_size = adapter_sample.encoder.output_dim
29 |                         print(f"Using encoder output_dim from encoder {adapter_sample.encoder}: {input_size}")
30 |                     except Exception:
31 |                         try:
32 |                             input_size = adapter_sample.LLM_adapter.encoder.output_dim
33 |                             print(f"Using LLM_adapter encoder output_dim from LLM adapter {adapter_sample.LLM_adapter}: {input_size}")
34 |                         except Exception:
35 |                             print(f"Adapter {adapter} does not have input_dim specified.")
36 |                             raise ValueError(f"No input dim size found in adapter: {adapter}")
37 | 
38 |             if engine:
39 |                 print(engine)
40 |                 engine_sample = engine(local_setup_info=self.setup_info)
41 |                 try:
42 |                     output_size = engine_sample.output_size
43 |                 except Exception:
44 |                     try:
45 |                         output_size = engine_sample.output_dim
46 |                     except Exception:
47 |                         try:
48 |                             output_size = engine_sample.output_dim_size
49 |                         except Exception:
50 |                             print(f"Engine {engine} does not contain output dim size for DQN agent, using default 1,000.")
51 |                             output_size = 1000
52 |             # Order must match DQN input
53 |             temp_dict = {'input_size': input_size, 'output_size': output_size}
54 |             temp_dict.update(agent_parameters)
55 |         else:
56 |             # For other agents, we assume the parameters are already in the correct format
57 |             temp_dict = agent_parameters
58 |         if agent_type not in self.agent_types:
59 |             raise ValueError(f"Unknown agent type: {agent_type}")
60 |         return self.agent_types[agent_type](**temp_dict)
61 | 


--------------------------------------------------------------------------------
/elsciRL/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import json
 4 | 
 5 | 
 6 | class Config:
 7 |     def __init__(self, config_file_path: str):
 8 |         if config_file_path:
 9 |             with open(config_file_path) as config_file:
10 |                 self.data = json.load(config_file)
11 |                 self.config_file_path = config_file_path
12 | 
13 |         else:
14 |             self.data = dict()
15 |             self.config_path = ""
16 |             logging.info("No arguments given, using default configuration...")
17 | 
18 |     def __getitem__(self, key: str):
19 |         item = None
20 | 
21 |         if key in self.__dict__:
22 |             item = self.__dict__[key]
23 |         else:
24 |             item = self.data[key]
25 | 
26 |         return item
27 | 
28 | 
29 | class ExperimentConfig(Config):
30 |     def __init__(self, config_path: str):
31 |         super(ExperimentConfig, self).__init__(config_path)
32 | 
33 |         # Name setup
34 |         self.name = self.data.get(
35 |             "name", os.path.split(self.config_file_path)[-1].replace(".json", "")
36 |         )
37 |         # Define Problem Type Choice
38 |         self.problem_type = self.data.get("problem_type", "")
39 |         # Specify local config choices to select agents of interest
40 |         self.agent_select = self.data.get("agent_select", ["Qlearntab"])
41 | 
42 |         # ---> We then parse these three inputs to obtain the local config setup info
43 |         # ---> Ideally input is a dict input: setups = { 'Setup1':{"Adapter":"Engine", "Encoder":"Yes", "Agent":"TabQ"},... }
44 | 
45 |         # Training repeated
46 |         self.num_training_episodes = self.data.get("num_training_episodes", 1000)
47 |         self.number_training_repeats = self.data.get("number_training_repeats", 5)
48 | 
49 |         # Testing repeated
50 |         self.number_test_episodes = self.data.get("number_test_episodes", 100)
51 |         self.number_test_repeats = self.data.get("number_test_repeats", 5)
52 |         self.test_agent_type = self.data.get("test_agent_type", "best")
53 | 
54 |         # Tab Q Agent parameters
55 |         self.alpha = self.data.get("alpha", [0.05])
56 |         self.gamma = self.data.get("gamma", [0.95])
57 |         self.epsilon = self.data.get("epsilon", [0.05])
58 |         # Neural Agent Parameters
59 |         self.input_type = "lm"
60 |         self.input_size = self.data.get("input_size", [384])
61 |         self.sent_hidden_dim = self.data.get("sent_hidden_dim", [10])
62 |         self.hidden_dim = self.data.get("hidden_dim", [128])
63 |         self.num_hidden = self.data.get("num_hidden", [2])
64 |         self.sequence_size = self.data.get("sequence_size", [20])
65 |         self.memory_size = self.data.get("memory_size", [2000])
66 |         self.target_replace_iter = self.data.get("target_replace_iter", [100])
67 |         self.learning_rate = self.data.get("learning_rate", [0.001])
68 |         self.batch_size = self.data.get("batch_size", [1])
69 | 
70 |         self.number_test_episodes = self.data.get("number_test_episodes", 250)
71 |         self.number_test_repeats = self.data.get("number_test_repeats", 5)
72 | 
73 | 
74 | class TestingSetupConfig(Config):
75 |     def __init__(self, config_dir: str):
76 |         super(TestingSetupConfig, self).__init__(config_dir)
77 |         self.state_configs = ExperimentConfig(os.path.join(config_dir))
78 | 


--------------------------------------------------------------------------------
/elsciRL/GUI/prerender_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from torch import Tensor
 4 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder as MiniLM_L6v2
 5 | 
 6 | # Get search method
 7 | import os
 8 | import json
 9 | from datetime import datetime
10 | 
11 | 
12 | def encode_prerender_data(observed_states:dict|str=None,
13 |                         save_dir:str=None,
14 |                         encoder:str ='MiniLM_L6v2') -> Tensor:
15 |     """    Encodes the observed states using a language encoder.
16 |     Args:
17 |         observed_states (dict or str): The observed states to encode, can be the dictionary or the directory path string.
18 |         save_dir (str): The directory where the encoded states will be saved. If None, defaults to './encoded-prerender-data'.
19 |         encoder (str): The name of the encoder to use. Defaults to 'MiniLM_L6v2', options include:
20 |             - 'MiniLM_L6v2': A lightweight language model suitable for encoding text.
21 |             - ~~Other encoders can be added in the future.~~
22 |     Returns:
23 |         Tensor: The encoded representation of the observed states.
24 |     """
25 |     # ------------------------------------------------------------------
26 |     # Define the available encoders
27 |     # Currently only MiniLM_L6v2 is available, but can be extended in the future.
28 |     ENCODERS = {'MiniLM_L6v2': MiniLM_L6v2}
29 |     encoder = ENCODERS[encoder]()
30 |     # ------------------------------------------------------------------
31 |     if observed_states is None:
32 |         print("\n ----------------------------------------------------")
33 |         print(" No observed states provided. Please select a file to encode.")
34 |         print(" ----------------------------------------------------\n")
35 |         file_names = [file for file in os.listdir('./') if file.endswith('.txt')]
36 |         for n, file in enumerate(file_names):
37 |             print(f"- {n}: {file}")
38 |         selection = input("\n Select the file to encode (by number): ")
39 |         observed_states_filename = file_names[int(selection)]
40 |         observed_states_path = os.path.join('./', observed_states_filename)
41 |         with open(observed_states_path, 'r') as f:
42 |             observed_states = json.loads(f.read())
43 |         save_dir = './'
44 |     else:
45 |         if isinstance(observed_states, str):
46 |             observed_states_filename = observed_states.split('/')[-1].split('.')[0]
47 |             if not save_dir:
48 |                 save_dir = os.path.dirname(observed_states)
49 |             with open(observed_states, 'r') as f:
50 |                 observed_states = json.loads(f.read())
51 |         else:
52 |             observed_states_filename = 'observed_states'
53 |             if not save_dir:
54 |                 save_dir = './'
55 | 
56 |     # Encode the observed states
57 |     print(f"\n Encoding observed state file {observed_states_filename} using {encoder.name}...")
58 |     str_states = [str_state for str_state in observed_states.values()]
59 |     observed_states_encoded = encoder.encode(str_states)
60 | 
61 |     if not os.path.exists(save_dir):
62 |         os.makedirs(save_dir)
63 |     file_path = os.path.join(save_dir, 'encoded_' + observed_states_filename.split('.')[0] + '.txt')
64 |     np.savetxt(file_path, observed_states_encoded.numpy())
65 |     print(f"Encoded states saved to {file_path}")
66 |     print(f"Number of States: {len(observed_states_encoded)}")
67 | 
68 |     return observed_states_encoded


--------------------------------------------------------------------------------
/elsciRL/examples/adapters/elsciRL_sailing_default.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | import pandas as pd
 3 | import numpy as np
 4 | import torch
 5 | from torch import Tensor
 6 | # StateAdapter includes static methods for adapters
 7 | from elsciRL.encoders.poss_state_encoded import StateEncoder
 8 | from gymnasium.spaces import Text, Discrete
 9 | 
10 | class DefaultAdapter:
11 | 
12 |     # ------ Static Methods ---------------------------------------
13 |     # - Defined by simulator source https://github.com/PPierzc/ai-learns-to-sail/blob/master/tasks/channel.py
14 |     @staticmethod
15 |     def angle_to_state(angle):
16 |         return int(30 * ((angle + np.pi) / (2 * np.pi) % 1))  # Discretization of the angle space
17 |     
18 |     @staticmethod
19 |     def x_to_state(x):
20 |         return int(40 * ((x + -10) / 20))  # Discretization of the x space
21 |     
22 |     @staticmethod
23 |     def state_discretizer(state):
24 |         x = float(state.split('_')[0])
25 |         x_state = DefaultAdapter.x_to_state(x)
26 | 
27 |         angle = float(state.split('_')[1])
28 |         angle_state = DefaultAdapter.angle_to_state(angle)
29 | 
30 |         state_out = str(x_state)+'_'+str(angle_state)
31 |         return state_out
32 |     # -------------------------------------------------------------
33 | 
34 |     _cached_state_idx: Dict[str, int] = dict()
35 |     def __init__(self, setup_info:dict={}) -> None:
36 |         # ------ State Encoder ---------------------------------------
37 |         # Initialise encoder based on all possible env states
38 |         all_possible_x = [i*-1 for i in range(40)]
39 |         all_possible_angle = [i for i in range(30)]
40 |         # Need an index that preserves the identity of both the x and angle values
41 |         all_possible_states = []
42 |         for x_ind in all_possible_x:
43 |             for angle_ind in all_possible_angle:
44 |                 index = str(x_ind)+'_'+str(angle_ind)
45 |                 all_possible_states.append(index)
46 |         # Input to pre-built possible state encoder
47 |         #self.encoder = StateEncoder(all_possible_states)
48 |         self.encoder = {}
49 |         # Observartion is string: "x_angle"
50 |         # -> Then discretized and returned as string: "x_state_angle_state"
51 |         # -> Before being numeritized to a unique id (x:-10-10*2dp * angle:0-2pi*1dp)
52 |         self.observation_space = Discrete(2000*30)
53 |     
54 |     
55 |     def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor:
56 |         """ Use Language name for every piece name for current board position """
57 |         
58 |         state = DefaultAdapter.state_discretizer(state)
59 | 
60 |         # Encode to Tensor for agents
61 |         if encode:
62 |             #state_encoded = self.encoder.encode(state=state)
63 |             # elsciRL state encoder is large and not needed for tabular agents
64 |             # - Wont work for neural agents
65 |             if (state not in self.encoder):
66 |                 state_encoded = torch.tensor(len(self.encoder))
67 |                 self.encoder[state] = state_encoded
68 |             else:
69 |                 state_encoded = self.encoder[state]
70 |         else:
71 |             state_encoded = state
72 | 
73 |         if (indexed):
74 |             state_indexed = list()
75 |             for sent in state:
76 |                 if (sent not in DefaultAdapter._cached_state_idx):
77 |                     DefaultAdapter._cached_state_idx[sent] = len(DefaultAdapter._cached_state_idx)
78 |                 state_indexed.append(DefaultAdapter._cached_state_idx[sent])
79 | 
80 |             state_encoded = torch.tensor(state_indexed)
81 | 
82 |         return state_encoded


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .vscode/
163 | 
164 | # Dev testing problem
165 | benchmark/output/*
166 | elsciRL/benchmark/output/*
167 | elsciRL-App-output/*


--------------------------------------------------------------------------------
/elsciRL/experiments/training_procedures/policy_gradient.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from elsciRL.environment_setup.results_table import ResultsTable
  4 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir
  5 | from elsciRL.interaction_loops.policy_gradient import PolicyGradientInteractionLoop
  6 | 
  7 | 
  8 | def run_policy_gradient_training_loop(
  9 |     env_manager,
 10 |     policy_agent_factory,
 11 |     result_manager,
 12 |     training_render,
 13 |     training_render_save_dir,
 14 |     save_dir,
 15 |     engine_name,
 16 |     engine,
 17 |     agent_type,
 18 |     adapter,
 19 |     train_setup_info,
 20 |     trained_agents,
 21 |     num_training_seeds,
 22 |     test_agent_type,
 23 |     show_figures,
 24 |     number_training_repeats,
 25 |     wrappers=None,
 26 | ):
 27 |     """Specialized training loop for policy-gradient agents."""
 28 | 
 29 |     key = f"{engine_name}_{agent_type}_{adapter}"
 30 |     if key not in trained_agents:
 31 |         trained_agents[key] = {}
 32 | 
 33 |     seed_recall = {}
 34 |     seed_results_connection = {}
 35 |     observed_states_stored = {}
 36 |     training_results_stored = None
 37 | 
 38 |     for seed_num in range(num_training_seeds):
 39 |         if num_training_seeds > 1:
 40 |             print("------\n- Seed Num: ", seed_num)
 41 | 
 42 |         setup_num = 0
 43 |         temp_agent_store = {}
 44 | 
 45 |         for training_repeat in range(1, number_training_repeats + 1):
 46 |             setup_num += 1
 47 |             env = env_manager.create_gym_env(engine, adapter, train_setup_info, wrappers=wrappers)
 48 |             agent_parameters = train_setup_info['agent_parameters'][agent_type]
 49 |             agent = policy_agent_factory.create(agent_type, agent_parameters, env)
 50 | 
 51 |             total_steps = train_setup_info.get('training_action_cap', 100) * train_setup_info.get('number_training_episodes', 1)
 52 |             agent.learn(total_steps=total_steps)
 53 | 
 54 |             agent_name = train_setup_info.get('agent_name', f"{agent_type}_{adapter}")
 55 |             results_table = ResultsTable(train_setup_info)
 56 |             table_results = PolicyGradientInteractionLoop.policy_rollout(
 57 |                 agent,
 58 |                 env,
 59 |                 agent_name,
 60 |                 train_setup_info.get('number_training_episodes', 1),
 61 |                 results_table,
 62 |                 render=False,
 63 |                 action_limit=train_setup_info.get('training_action_cap'),
 64 |             )
 65 | 
 66 |             table_results['episode'] = table_results.index
 67 |             table_results.insert(loc=0, column='Repeat', value=setup_num)
 68 | 
 69 |             agent_save_dir = os.path.join(
 70 |                 save_dir,
 71 |                 f"{engine_name}_{agent_type}_{adapter}__training_results_{setup_num}"
 72 |             )
 73 |             ensure_dir(agent_save_dir)
 74 |             Return = result_manager.train_report(table_results, agent_save_dir, show_figures)
 75 |             train_setup_info['train_save_dir'] = agent_save_dir
 76 | 
 77 |             if key not in temp_agent_store:
 78 |                 temp_agent_store[key] = {}
 79 |             temp_agent_store[key][setup_num] = {'Return': Return, 'agent': agent, 'train_setup': train_setup_info.copy()}
 80 | 
 81 |             seed_recall[agent_name] = setup_num
 82 |             training_results_stored = table_results
 83 | 
 84 |         seed_results_connection[key] = training_results_stored
 85 | 
 86 |         def _select_training_setups():
 87 |             if test_agent_type.lower() == 'best':
 88 |                 best_repeat = max(temp_agent_store[key], key=lambda r: temp_agent_store[key][r]['Return'])
 89 |                 return [temp_agent_store[key][best_repeat]]
 90 |             if test_agent_type.lower() == 'all':
 91 |                 return list(temp_agent_store[key].values())
 92 |             return [temp_agent_store[key][setup_num]]
 93 | 
 94 |         selected_setups = _select_training_setups()
 95 |         trained_agents[key][agent_name] = [entry['agent'] for entry in selected_setups] if len(selected_setups) > 1 else selected_setups[0]['agent']
 96 | 
 97 |         training_setups_for_key = {}
 98 |         for idx, entry in enumerate(selected_setups, start=1):
 99 |             training_setup = entry['train_setup']
100 |             repeat_label = entry.get('train_setup', {}).get('Repeat', idx)
101 |             training_setups_for_key[f"Training_Setup_{engine_name}_{agent_type}_{adapter}_{repeat_label}"] = training_setup
102 | 
103 |     return trained_agents, seed_results_connection, temp_agent_store, training_results_stored, observed_states_stored
104 | 


--------------------------------------------------------------------------------
/tests/test_policy_gradient_classroom.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Run the following to test:
  3 |     pytest tests/test_policy_gradient_classroom.py -k Classroom
  4 | """
  5 | 
  6 | import pytest
  7 | 
  8 | from elsciRL.application_suite.import_tool import PullApplications
  9 | from elsciRL.experiments.policy_gradient import PolicyGradientExperiment
 10 | 
 11 | 
 12 | @pytest.mark.integration
 13 | @pytest.mark.slow
 14 | def test_policy_gradient_runs_on_classroom(tmp_path):
 15 |     pytest.importorskip("stable_baselines3")
 16 | 
 17 |     puller = PullApplications()
 18 |     try:
 19 |         application_data = puller.pull(['Classroom'])
 20 |     except Exception as exc:
 21 |         pytest.skip(f"Classroom application unavailable: {exc}")
 22 | 
 23 |     classroom_data = application_data.get('Classroom')
 24 |     if not classroom_data:
 25 |         pytest.skip("Classroom application data missing")
 26 | 
 27 |     default_adapter = 'default'
 28 |     if default_adapter not in classroom_data['adapters']:
 29 |         default_adapter = list(classroom_data['adapters'].keys())[0]
 30 | 
 31 |     # Sanity-check the Classroom engine's API matches Gym expectations.
 32 |     engine_cls = classroom_data['engine']
 33 |     engine_instance = engine_cls(classroom_data['local_configs']['classroom_A'])
 34 |     initial_obs = engine_instance.reset()
 35 |     step_output = engine_instance.step(state=initial_obs, action=0)
 36 |     assert isinstance(step_output, tuple), "Engine.step should return a tuple"
 37 |     assert len(step_output) == 4, f"Engine.step must return 4 values, got {len(step_output)}"
 38 | 
 39 |     base_experiment_data = {
 40 |         "number_training_episodes": 100,
 41 |         "number_training_repeats": 1,
 42 |         "number_training_seeds": 1,
 43 |         "number_test_episodes": 100,
 44 |         "number_test_repeats": 1,
 45 |         "training_action_cap": 16,
 46 |         "testing_action_cap": 16,
 47 |         "test_agent_type": "best",
 48 |         "reward_signal": [1, 0, 0],
 49 |         "train": True,
 50 |         "live_env": True,
 51 |     }
 52 | 
 53 |     agent_configs = {
 54 |         "PPO": {
 55 |             "agent_parameters": {
 56 |                 "learning_rate": 3e-4,
 57 |                 "batch_size": 64,
 58 |                 "minibatch_size": 16,
 59 |                 "update_epochs": 2,
 60 |                 "hidden_size": 64,
 61 |             }
 62 |         },
 63 |     }
 64 | 
 65 |     for agent_type, config in agent_configs.items():
 66 |         experiment_data = base_experiment_data.copy()
 67 |         experiment_data["agent_select"] = [agent_type]
 68 |         experiment_data["adapter_select"] = [default_adapter]
 69 |         experiment_data["adapter_input_dict"] = {agent_type: [default_adapter]}
 70 |         experiment_data["agent_parameters"] = {agent_type: config["agent_parameters"]}
 71 | 
 72 |         experiment_config = {"data": experiment_data}
 73 |         local_config = {"data": classroom_data['local_configs']['classroom_A']}
 74 | 
 75 |         agent_tmp_dir = tmp_path / agent_type
 76 |         agent_tmp_dir.mkdir(parents=True, exist_ok=True)
 77 | 
 78 |         experiment = PolicyGradientExperiment(
 79 |             Config=experiment_config,
 80 |             ProblemConfig=local_config,
 81 |             Engine=classroom_data['engine'],
 82 |             Adapters=classroom_data['adapters'],
 83 |             save_dir=str(agent_tmp_dir),
 84 |             show_figures='No',
 85 |             window_size=0.1,
 86 |         )
 87 |         
 88 |         print(f"Training {agent_type} agent...")
 89 |         training_setups = experiment.train()
 90 |         assert training_setups, f"Policy gradient training should generate setups for {agent_type}"
 91 | 
 92 |         print(f"Testing {agent_type} agent...")
 93 |         evaluation = experiment.test()
 94 |         assert evaluation is not None
 95 | 
 96 |         print(f"Rendering {agent_type} agent...")
 97 |         render_dir = agent_tmp_dir / "renders"
 98 |         render_outputs = experiment.render_results(render_save_dir=str(render_dir))
 99 |         assert render_outputs is not None
100 |         print('Test complete')
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     import argparse
105 |     import tempfile
106 |     from pathlib import Path
107 | 
108 |     parser = argparse.ArgumentParser(description="Run policy-gradient classroom test")
109 |     parser.add_argument(
110 |         "--output-dir",
111 |         type=Path,
112 |         default=None,
113 |         help="Directory where test artifacts should be saved. Uses a temp dir if omitted.",
114 |     )
115 |     args = parser.parse_args()
116 | 
117 |     if args.output_dir is not None:
118 |         args.output_dir.mkdir(parents=True, exist_ok=True)
119 |         test_policy_gradient_runs_on_classroom(args.output_dir)
120 |     else:
121 |         with tempfile.TemporaryDirectory() as tmp_dir:
122 |             test_policy_gradient_runs_on_classroom(Path(tmp_dir))
123 | 


--------------------------------------------------------------------------------
/elsciRL/analysis/combined_tabular_results.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import os 
 4 | import json
 5 | 
 6 | def combined_tabular_analysis_results(results_dir:str='', analysis_type='training'):
 7 |     if results_dir == '':
 8 |         raise ValueError("Save directory not specified.")
 9 |     analysis_type = analysis_type.lower() # lowercase analysis type input
10 |     # Get sub-dir for each problem-experiment type
11 |     instruction_folders = [os.path.join(results_dir, instr) for instr in os.listdir(results_dir) if os.path.isdir(os.path.join(results_dir, instr))]
12 |     variance_results = {}
13 |     for instr_folder_dir in instruction_folders:
14 |         instr_id = instr_folder_dir.split('/')[-1].split('//')[-1].split('\\')[-1].split('\\\\')[-1]
15 |         if instr_id not in variance_results.keys():
16 |             variance_results[instr_id] = {}
17 |         print(f"Processing {instr_id} for {analysis_type} analysis.")
18 |         problem_folders = [name for name in os.listdir(instr_folder_dir) if os.path.isdir(os.path.join(instr_folder_dir, name))]
19 |         # Find experiment folders
20 |         # - Capture case where there is only one experiment type
21 |         # and so wont have sub-directory for experiments to search
22 |         for experiment_dir in problem_folders:
23 |             if analysis_type == 'training':
24 |                 experiment_name = experiment_dir+'_training'
25 |                 file_names = [name for name in os.listdir(instr_folder_dir+'/'+experiment_dir) if name[0:25] == 'training_variance_results']
26 |             elif analysis_type == 'testing':
27 |                 experiment_name = experiment_dir+'_testing'
28 |                 file_names = [name for name in os.listdir(instr_folder_dir+'/'+experiment_dir) if name[0:24] == 'testing_variance_results']
29 |             else:
30 |                 raise ValueError("Analysis type must be either 'training' or 'testing'.")
31 | 
32 |             if experiment_name not in variance_results[instr_id].keys():
33 |                 variance_results[instr_id][experiment_name] = {}
34 | 
35 |             for file in file_names:
36 |                 results = pd.read_csv(instr_folder_dir+'/'+experiment_dir+'/'+file)
37 |                 agent = results['agent'].iloc[0].split('__')[0]
38 |                 if agent not in variance_results[instr_id][experiment_name].keys():
39 |                     variance_results[instr_id][experiment_name][agent] = {}
40 |                 
41 |                 # Calculate Mean and Std Dev
42 |                 variance_results[instr_id][experiment_name][agent]['num_repeats'] = results['num_repeats'].iloc[0]
43 |                 variance_results[instr_id][experiment_name][agent]['number_episodes'] = results.index.max() + 1
44 |                 # - rolling avg R per episode
45 |                 variance_results[instr_id][experiment_name][agent]['mean'] = results['avg_R_mean'].mean()
46 |                 variance_results[instr_id][experiment_name][agent]['median'] = results['avg_R_mean'].median()
47 |                 variance_results[instr_id][experiment_name][agent]['std_error'] = results['avg_R_mean'].sem()
48 |                 variance_results[instr_id][experiment_name][agent]['std_dev'] = results['avg_R_mean'].std()
49 |                 variance_results[instr_id][experiment_name][agent]['variance'] = results['avg_R_mean'].var()
50 |                 # - cumulative R per episode
51 |                 variance_results[instr_id][experiment_name][agent]['cum_R_mean'] = results['cum_R_mean'].mean()
52 |                 variance_results[instr_id][experiment_name][agent]['cum_R_median'] = results['cum_R_mean'].median()
53 |                 variance_results[instr_id][experiment_name][agent]['cum_R_std_error'] = results['cum_R_mean'].sem()
54 |                 variance_results[instr_id][experiment_name][agent]['cum_R_std_dev'] = results['cum_R_mean'].std()
55 |                 variance_results[instr_id][experiment_name][agent]['cum_R_variance'] = results['cum_R_mean'].var()
56 |                 # - time avg per episode
57 |                 variance_results[instr_id][experiment_name][agent]['time_avg'] = results['time_mean'].mean()
58 | 
59 |     variance_results_df = pd.DataFrame.from_dict(
60 |         {f"{instr}/{experiment}/{agent}": data for instr, experiments in variance_results.items() 
61 |          for experiment, agents in experiments.items() 
62 |          for agent, data in agents.items()},
63 |         orient='index'
64 |     ).reset_index()
65 |     variance_results_df.columns = ['Instruction/Experiment/Agent', 'Num Repeats', 'Number Episodes', 
66 |                                     'Avg R Mean', 'Avg R Median', 'Avg R Std Error', 'Avg R Std Dev', 'Avg R Variance',
67 |                                     'Cumulative R Mean', 'Cumulative R Median', 'Cumulative R Std Error',
68 |                                     'Cumulative R Std Dev', 'Cumulative R Variance', 'Time Avg']
69 |     # Save the combined results to a CSV file
70 |     combined_results_filename = f"{analysis_type}_combined_results.csv"
71 |     combined_results_path = os.path.join(results_dir, combined_results_filename)
72 |     variance_results_df.to_csv(combined_results_path, index=False)
73 |     


--------------------------------------------------------------------------------
/elsciRL/examples/adapters/elsciRL_sailing_language.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List
  2 | import numpy as np
  3 | import pandas as pd
  4 | import torch
  5 | from torch import Tensor
  6 | # StateAdapter includes static methods for adapters
  7 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder
  8 | from gymnasium.spaces import Text, Box
  9 | 
 10 | 
 11 | class LanguageAdapter:
 12 |     _cached_state_idx: Dict[str, int] = dict()
 13 | 
 14 |     def __init__(self, setup_info:dict={}) -> None:
 15 |         # Language encoder doesn't require any preset knowledge of env to use
 16 |         self.encoder = LanguageEncoder()
 17 |         # Observartion is string: "x_angle"
 18 |         # -> encoder output is 1x384 tensor from miniLM
 19 |         self.observation_space = Box(low=-1, high=1, shape=(1,384), dtype=np.float32)
 20 |     
 21 |     def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor:
 22 |         """ Use Language name for every piece name for current board position """
 23 | 
 24 |         # state = 'x_angle'
 25 |         # legal_moves = [0,1]
 26 |         # episode_action_history = [action, action, action] where aciton = [0,1]
 27 | 
 28 |         # Angle is relative to the goal of moving forward (i.e. bearing)
 29 |             # - angle=0 is directly forward
 30 |             # - angle<0 is slightly left
 31 |             # - angle>0 is slightly right
 32 | 
 33 |         x = float(state.split('_')[0])
 34 |         angle = float(state.split('_')[1])
 35 |         
 36 |         # Horizontal position
 37 |         if (x>-1)&(x<1):
 38 |             L_x = 'in the middle'
 39 |         elif (x>-3)&(x<3):
 40 |             L_x = 'near to the center'
 41 |         elif (x>-5)&(x<5):
 42 |             L_x = 'in between the edge and the center'
 43 |         elif (x>-7)&(x<7):
 44 |             L_x = 'near to the edge'
 45 |         elif (x>=-10)&(x<=10):
 46 |             L_x = 'very close to the edge'
 47 |         else:
 48 |             L_x = 'out of bounds'
 49 | 
 50 |         # Side of river
 51 |         if x<0:
 52 |             L_x_side = 'on the harbor side of the river'
 53 |         elif x>0:
 54 |             L_x_side = 'on the beach side of the river'
 55 |         else:
 56 |             L_x_side = ''
 57 | 
 58 |         # Angle
 59 |         # - Defined in radians where 90deg = 1.57
 60 |         # - Peak velocity at  45deg = pi/4 = 0.7853...
 61 |         if angle==0:
 62 |             L_angle = 'facing directly into the wind'
 63 |         elif (angle>-0.1)&(angle<0.1):
 64 |             L_angle = 'facing into the wind'
 65 |         elif (angle>-0.5)&(angle<0.5):
 66 |             L_angle = 'close hauled with wind'
 67 |         elif (angle>-1)&(angle<1):
 68 |             L_angle = 'cutting the wind'
 69 |         else:
 70 |             L_angle = 'moving across the wind'
 71 |         # Wind side
 72 |         if angle<0:
 73 |             L_wind_side = 'on the starboard side'
 74 |         elif angle>0:
 75 |             L_wind_side = 'on the port side'
 76 |         else:
 77 |             L_wind_side = ''
 78 | 
 79 |         L_state = 'The boat is ' + L_x_side + ' ' + L_x + ', ' + L_angle + ' ' + L_wind_side + ', '
 80 |         L_state = L_state.replace('  ', ' ').replace(' .','.').replace(' ,',',').replace(' and,','') # Remove double spaces
 81 |         
 82 |         # Last action taken and final language state output
 83 |         if len(episode_action_history)>0:    
 84 |             last_action = episode_action_history[-1]
 85 |             # if last_action==0:
 86 |             #     L_action = 'the last action was to turn to the left slightly.'
 87 |             # elif last_action==1:
 88 |             #     L_action = 'the last action was to turn to the right slightly.'
 89 | 
 90 |             if (x<=0)&(last_action==0):
 91 |                 L_action = 'the last action was to turn towards the harbor.'
 92 |             elif (x<0)&(last_action==1):
 93 |                 L_action = 'the last action was to turn towards the center of the river.'
 94 |             elif (x>=0)&(last_action==1):
 95 |                 L_action = 'the last action was to turn towards the beach.'
 96 |             elif (x>0)&(last_action==0):
 97 |                 L_action = 'the last action was to turn towards the center of the river.'
 98 | 
 99 |             state = L_state + ' ' + L_action
100 |         else:
101 |             state = L_state        
102 | 
103 |         #print(state)
104 | 
105 |         # Encode to Tensor for agents
106 |         if encode:
107 |             state_encoded = self.encoder.encode(state=state)
108 |         else:
109 |             state_encoded = state
110 | 
111 |         if (indexed):
112 |             state_indexed = list()
113 |             for sent in state:
114 |                 if (sent not in LanguageAdapter._cached_state_idx):
115 |                     LanguageAdapter._cached_state_idx[sent] = len(LanguageAdapter._cached_state_idx)
116 |                 state_indexed.append(LanguageAdapter._cached_state_idx[sent])
117 | 
118 |             state_encoded = torch.tensor(state_indexed)
119 | 
120 |         return state_encoded


--------------------------------------------------------------------------------
/elsciRL/environment_setup/results_table.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | 
  4 | class ResultsTable:
  5 |     def __init__(self, local_setup_info:dict = None) -> None:
  6 |         if type(local_setup_info['training_results']) != type(pd.DataFrame()):
  7 |             self.agent:list = []
  8 |             self.opponent:list =[]
  9 |             self.episode:list = []
 10 |             self.num_actions:list = []
 11 |             self.episode_reward:list = []
 12 |             self.cumulative_reward:list = []
 13 |             self.time_per_episode:list = []
 14 |             self.action_history:list = []
 15 |             self.q_total:list = []
 16 |             self.q_mean:list = []
 17 |             # new
 18 |             self.cum_r = 0
 19 |         else:
 20 |             self.agent:list = local_setup_info['training_results'].agent.tolist()
 21 |             self.opponent:list =local_setup_info['training_results'].opponent.tolist()
 22 |             self.episode:list = local_setup_info['training_results'].episode.tolist()
 23 |             self.num_actions:list = local_setup_info['training_results'].num_actions.tolist()
 24 |             self.episode_reward:list = local_setup_info['training_results'].episode_reward.tolist()
 25 |             self.cumulative_reward:list = local_setup_info['training_results'].cumulative_reward.tolist()
 26 |             self.cum_r = self.cumulative_reward[-1]
 27 |             self.time_per_episode:list = local_setup_info['training_results'].time_per_episode.tolist()
 28 |             self.action_history:list = local_setup_info['training_results'].action_history.tolist()
 29 |             self.q_total:list = local_setup_info['training_results'].q_total.tolist()
 30 |             self.q_mean:list = local_setup_info['training_results'].q_mean.tolist()
 31 | 
 32 |     def results_per_episode(self,agent_name:str='missing', opponent_name:str='None', episode_num:int=0, action_num:int=0, episode_reward:float=0, time:float=0, episode_action_history:list=[], q_total:float=0, q_mean:float=0):
 33 |         self.agent.append(agent_name)
 34 |         self.opponent.append(opponent_name)
 35 |         self.episode.append(episode_num)
 36 |         self.num_actions.append(action_num)
 37 |         self.episode_reward.append(episode_reward)
 38 |         self.cum_r +=episode_reward
 39 |         self.cumulative_reward.append(self.cum_r)
 40 |         self.time_per_episode.append(time)
 41 |         self.action_history.append(episode_action_history)
 42 |         self.q_total.append(q_total)
 43 |         self.q_mean.append(q_mean)
 44 |         
 45 | 
 46 |     def results_table_format(self):
 47 |         results= pd.DataFrame({ 
 48 |                     'agent': self.agent,
 49 |                     'opponent': self.opponent,
 50 |                     'episode': self.episode, 
 51 |                     'num_actions': self.num_actions, 
 52 |                     'episode_reward': self.episode_reward,
 53 |                     "cumulative_reward": self.cumulative_reward,
 54 |                     "time_per_episode":self.time_per_episode,
 55 |                     "action_history": self.action_history,
 56 |                     "q_total":self.q_total,
 57 |                     "q_mean":self.q_mean})
 58 |         return results
 59 |     
 60 |     def reset(self):
 61 |         self.agent:list = []
 62 |         self.opponent:list =[]
 63 |         self.episode:list = []
 64 |         self.num_actions:list = []
 65 |         self.episode_reward:list = []
 66 |         self.cum_r = 0
 67 |         self.cumulative_reward:list = []
 68 |         self.time_per_episode:list = []
 69 |         self.action_history:list = []
 70 |         self.q_total:list = []
 71 |         self.q_mean:list = []
 72 | 
 73 |     def copy(self):
 74 |         results_copy= pd.DataFrame({ 
 75 |                     'agent': self.agent.copy(),
 76 |                     'opponent': self.opponent.copy(),
 77 |                     'episode': self.episode.copy(), 
 78 |                     'num_actions': self.num_actions.copy(), 
 79 |                     'episode_reward': self.episode_reward.copy(),
 80 |                     "cumulative_reward": self.cumulative_reward.copy(),
 81 |                     "time_per_episode":self.time_per_episode.copy(),
 82 |                     "action_history":self.action_history.copy(),
 83 |                     "q_total":self.q_total.copy(),
 84 |                     "q_mean":self.q_mean.copy()})
 85 |         return results_copy
 86 |     
 87 |     def load(self, results_copy):
 88 |         self.agent:list = results_copy.agent.tolist()
 89 |         self.opponent:list = results_copy.opponent.tolist()
 90 |         self.episode:list = results_copy.episode.tolist()
 91 |         self.num_actions:list = results_copy.num_actions.tolist()
 92 |         self.episode_reward:list = results_copy.episode_reward.tolist()
 93 |         self.cumulative_reward:list = results_copy.cumulative_reward.tolist()
 94 |         self.cum_r = self.cumulative_reward[-1]
 95 |         self.time_per_episode:list = results_copy.time_per_episode.tolist()
 96 |         self.action_history:list = results_copy.action_history.tolist()
 97 |         self.q_total:list = results_copy.q_total.tolist()
 98 |         self.q_mean:list = results_copy.q_mean.tolist()
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/elsciRL/adapters/LLM_state_generators/text_gpt-4.1.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Any
  3 | import os
  4 | 
  5 | from openai import OpenAI
  6 | 
  7 | try:
  8 |     from torch import Tensor
  9 |     from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder
 10 | except ImportError:
 11 |     print("Warning: torch or LanguageEncoder not found. Please ensure elsciRL is properly installed.")
 12 |     Tensor = None
 13 |     LanguageEncoder = None
 14 | 
 15 | class LLMAdapter(ABC):
 16 |     """Convert a general prompt and raw text state into a description of the state."""
 17 |     def __init__(self, base_prompt: str):
 18 |         super().__init__()
 19 |         # Define the fields that describe the state features:
 20 |         self.base_prompt = base_prompt
 21 | 
 22 |     @abstractmethod
 23 |     def _read(raw_state) -> list:
 24 |         # Read the data.
 25 |         # fill in the feature fields
 26 |         raise NotImplementedError
 27 | 
 28 | 
 29 | class GPTAdapter(LLMAdapter):
 30 |     """Adapter for OpenAI GPT models."""
 31 |     
 32 |     def __init__(self, base_prompt: str, model_name: str = "gpt-4"):
 33 |         super().__init__(base_prompt)
 34 |         self.model_name = model_name
 35 |         
 36 |         # Initialize the language encoder for encoding functionality
 37 |         if LanguageEncoder is not None:
 38 |             self.encoder = LanguageEncoder()
 39 |         else:
 40 |             print("Warning: LanguageEncoder not available. Encoding will not work.")
 41 |             self.encoder = None
 42 |     
 43 |     def _read(self, raw_state) -> list:
 44 |         """Read the data and fill in the feature fields."""
 45 |         # This method should be implemented based on specific requirements
 46 |         # For now, returning the raw state as a list
 47 |         return [raw_state] if isinstance(raw_state, str) else raw_state
 48 |     
 49 |     def call_gpt_api(self, prompt: str):
 50 |         """Call the OpenAI GPT API with the given prompt."""
 51 |         try:
 52 |             api_key = os.environ.get("OPENAI_API_KEY")
 53 |             if not api_key:
 54 |                 raise ValueError("OPENAI_API_KEY environment variable not set")
 55 |             
 56 |             client = OpenAI(api_key=api_key)
 57 |             response = client.chat.completions.create(
 58 |                 model=self.model_name,
 59 |                 messages=[
 60 |                     {"role": "system", "content": self.base_prompt},
 61 |                     {"role": "user", "content": prompt}
 62 |                 ],
 63 |                 max_tokens=5000
 64 |             )
 65 |             return response.to_dict() if hasattr(response, 'to_dict') else response
 66 |         except Exception as e:
 67 |             print(f"Error calling OpenAI API: {e}")
 68 |             return None
 69 | 
 70 |     def process_gpt_response(self, response):
 71 |         """Process the response from OpenAI API."""
 72 |         if response and 'choices' in response:
 73 |             return response['choices'][0]['message']['content']
 74 |         return None
 75 | 
 76 |     def adapter(self, state: any, legal_moves: list = None, episode_action_history: list = None, encode: bool = True, indexed: bool = False) -> Tensor:
 77 |         """Returns the adapted form, may require input flag for encoded or non-encoded output."""
 78 |         # Build the full context prompt including legal moves and action history
 79 |         context_parts = []
 80 |         
 81 |         # Add state information
 82 |         if state is not None:
 83 |             context_parts.append(f"Current state: {state}")
 84 |         
 85 |         # Add legal moves if provided
 86 |         if legal_moves is not None and len(legal_moves) > 0:
 87 |             context_parts.append(f"Legal moves: {legal_moves}")
 88 |         
 89 |         # Add action history if provided
 90 |         if episode_action_history is not None and len(episode_action_history) > 0:
 91 |             recent_actions = episode_action_history[-5:]  # Last 5 actions
 92 |             context_parts.append(f"Recent actions: {recent_actions}")
 93 |         
 94 |         # Combine all context into a single prompt
 95 |         full_prompt = " | ".join(context_parts)
 96 |         
 97 |         # Get GPT response
 98 |         adapted_state = self.call_gpt_api(full_prompt)
 99 |         processed_response = self.process_gpt_response(adapted_state)
100 |         
101 |         if processed_response is None:
102 |             processed_response = str(state) if state is not None else "No state available"
103 |         
104 |         # Handle encoding
105 |         if encode:
106 |             if self.encoder is not None:
107 |                 # Use the LanguageEncoder to encode the response
108 |                 state_encoded = self.encoder.encode(
109 |                     state=processed_response,
110 |                     legal_actions=legal_moves,
111 |                     episode_action_history=episode_action_history,
112 |                     indexed=indexed
113 |                 )
114 |                 return state_encoded
115 |             else:
116 |                 print("Warning: Encoder not available, returning processed response as string")
117 |                 return processed_response
118 |         else:
119 |             return processed_response
120 |         
121 |     def sample(self, state: any):
122 |         """Returns a sample of an adapted state form (typically initial position of the environment)."""
123 |         if not state:
124 |             state = 'The current state is empty.'
125 |         return self.adapter(state, encode=True)
126 | 
127 |         
128 | 


--------------------------------------------------------------------------------
/elsciRL/experiments/training_procedures/default_exp_training.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir
  3 | from elsciRL.experiments.experiment_utils.render_current_results import render_current_result
  4 | 
  5 | 
  6 | def run_training_loop(
  7 |     env_manager,
  8 |     agent_factory,
  9 |     result_manager,
 10 |     training_render,
 11 |     training_render_save_dir,
 12 |     save_dir,
 13 |     engine_name,
 14 |     engine,
 15 |     agent_type,
 16 |     adapter,
 17 |     all_adapters,
 18 |     train_setup_info,
 19 |     trained_agents,
 20 |     num_training_seeds,
 21 |     test_agent_type,
 22 |     show_figures,
 23 |     number_training_repeats,
 24 |     gym_env:bool=False
 25 | ):
 26 |     if f"{engine_name}_{agent_type}_{adapter}" not in trained_agents:
 27 |         trained_agents[f"{engine_name}_{agent_type}_{adapter}"] = {}
 28 |     seed_recall = {}
 29 |     seed_results_connection = {}
 30 |     for seed_num in range(num_training_seeds):
 31 |         if num_training_seeds > 1:
 32 |             print("------\n- Seed Num: ", seed_num)
 33 |         if seed_num == 0:
 34 |             train_setup_info['training_results'] = False
 35 |             train_setup_info['observed_states'] = False
 36 |         else:
 37 |             train_setup_info['training_results'] = False
 38 |             train_setup_info['observed_states'] = observed_states_stored.copy()
 39 |         setup_num = 0
 40 |         temp_agent_store = {}
 41 |         for training_repeat in range(1, number_training_repeats + 1):
 42 |             if number_training_repeats > 1:
 43 |                 print("------\n- Repeat Num: ", training_repeat)
 44 |             setup_num += 1
 45 |             agent = agent_factory.create(agent_type, train_setup_info['agent_parameters'][agent_type], engine, adapter)
 46 |             train_setup_info['agent'] = agent
 47 |             # Create the environment, use gym_env if specified
 48 |             if gym_env:
 49 |                 live_env = env_manager.create_gym_env(engine, adapter, train_setup_info)
 50 |             else:
 51 |                 live_env = env_manager.create_env(engine, all_adapters, train_setup_info)
 52 |             # ---
 53 |             if training_repeat > 1:
 54 |                 live_env.start_obs = env_start
 55 |             env_start = live_env.start_obs
 56 |             goal = str(env_start).split(".")[0] + "---GOAL"
 57 |             print("Flat agent Goal: ", goal)
 58 |             if goal in seed_recall:
 59 |                 setup_num = seed_recall[goal]
 60 |             else:
 61 |                 seed_recall[goal] = 1
 62 |             agent_save_dir = os.path.join(save_dir,
 63 |                 f"{engine_name}_{agent_type}_{adapter}__training_results_{goal}_{setup_num}"
 64 |                     ) if num_training_seeds > 1 else os.path.join(save_dir,
 65 |                             f"{engine_name}_{agent_type}_{adapter}__training_results_{setup_num}"
 66 |                         )
 67 |             ensure_dir(agent_save_dir)
 68 |             if goal in trained_agents[f"{engine_name}_{agent_type}_{adapter}"]:
 69 |                 live_env.agent = trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal].clone()
 70 |                 live_env.agent.exploration_parameter_reset()
 71 |             if goal in seed_results_connection:
 72 |                 live_env.results.load(seed_results_connection[goal])
 73 |             training_results = live_env.episode_loop()
 74 |             training_results['episode'] = training_results.index
 75 |             training_results.insert(loc=0, column='Repeat', value=setup_num)
 76 |             Return = result_manager.train_report(training_results, agent_save_dir, show_figures)
 77 |             if goal not in temp_agent_store:
 78 |                 temp_agent_store[goal] = {}
 79 |             temp_agent_store[goal][setup_num] = {'Return': Return, 'agent': live_env.agent.clone()}
 80 |             if training_repeat == 1:
 81 |                 max_Return = Return
 82 |                 best_agent = live_env.agent
 83 |                 training_results_stored = live_env.results.copy()
 84 |                 observed_states_stored = live_env.elsciRL.observed_states
 85 |             if Return > max_Return:
 86 |                 max_Return = Return
 87 |                 best_agent = live_env.agent
 88 |                 training_results_stored = live_env.results.copy()
 89 |                 observed_states_stored = live_env.elsciRL.observed_states
 90 |             seed_recall[goal] = seed_recall[goal] + 1
 91 |             train_setup_info['train_save_dir'] = agent_save_dir
 92 |             if training_render:
 93 |                 current_render_save_dir = training_render_save_dir or agent_save_dir
 94 |                 render_current_result(
 95 |                     training_setup=train_setup_info,
 96 |                     current_environment=live_env,
 97 |                     current_agent=live_env.agent,
 98 |                     local_save_dir=current_render_save_dir
 99 |                 )
100 |         seed_results_connection[goal] = training_results_stored
101 |         # Save trained agent(s)
102 |         if test_agent_type.lower() == 'best':
103 |             trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal] = best_agent.clone()
104 |         elif test_agent_type.lower() == 'all':
105 |             start_repeat_num = list(temp_agent_store[goal].keys())[0]
106 |             end_repeat_num = list(temp_agent_store[goal].keys())[-1]
107 |             all_agents = [temp_agent_store[goal][repeat]['agent'] for repeat in range(start_repeat_num, end_repeat_num + 1)]
108 |             trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal] = all_agents
109 | 
110 |     return trained_agents, seed_results_connection, temp_agent_store, training_results_stored, observed_states_stored
111 | 


--------------------------------------------------------------------------------
/elsciRL/GUI/LLM_tools/LLM_utils.py:
--------------------------------------------------------------------------------
  1 | # IMPORTS LLM API TOOLS
  2 | # EDITED OUT FOR NOW UNTIL FULL IMPLEMENTATION READY
  3 | 
  4 | # import os
  5 | # import json
  6 | 
  7 | # from openai import OpenAI
  8 | 
  9 | # def call_gpt_api(prompt):
 10 | #     import os
 11 | #     api_key = os.environ.get("OPENAI_API_KEY")
 12 | #     client = OpenAI(api_key=api_key)
 13 | #     response = client.chat.completions.create(
 14 | #         model="gpt-4.1",
 15 | #         messages=[{"role": "system", "content": prompt}],
 16 | #         max_tokens=5000
 17 | #     )
 18 | #     return response.to_dict() if hasattr(response, 'to_dict') else response
 19 | 
 20 | # def process_gpt_response(response):
 21 | #     if response and 'choices' in response:
 22 | #         return response['choices'][0]['message']['content']
 23 | #     return None
 24 | 
 25 | # def generate_application(self, user_input:str=''):
 26 | #     # TODO: Use this in a new tab with user input to update application list
 27 | #     # Load the app_setup.md content as part of the system prompt
 28 |     
 29 | #     # Add requirement to system prompt for code chunk separation
 30 | #     system_prompt_requirement = (
 31 | #         "If your response contains any code chunks, you must output them in a separate section clearly marked as 'Code Output', "
 32 | #         "so that the application can extract and save them to a file. Do not mix code with explanations in the same section."
 33 | #     )
 34 | #     # Combine the app_setup.md info with the system prompt and the new requirement
 35 | #     system_prompt = (
 36 | #         "You are a helpful assistant. "
 37 | #         "Below is important application setup information for elsciRL:\n"
 38 | #         f"{self.app_setup_info}\n"
 39 | #         f"{system_prompt_requirement}\n"
 40 | #         "Please use this information to answer user queries."
 41 | #     )
 42 |     
 43 | #     if not user_input:
 44 | #         return {"error": "No input provided"}
 45 |     
 46 | #     # Use the utils function to call the GPT API
 47 | #     response = call_gpt_api(system_prompt + "\nUser: " + user_input)
 48 | #     reply = process_gpt_response(response)
 49 | #     print(reply)
 50 | #     if not reply:
 51 | #         return {"error": "Failed to get response from GPT API"}
 52 | 
 53 | #     # Save the complete output to a .txt file
 54 | #     output_dir = os.path.join(os.path.dirname(__file__), 'output')
 55 | #     os.makedirs(output_dir, exist_ok=True)
 56 | #     output_path = os.path.join(output_dir, 'last_gpt_response.txt')
 57 | #     with open(output_path, 'w', encoding='utf-8') as f:
 58 | #         f.write(str(response))
 59 | 
 60 | #     # Follow-up: Ask the AI model to extract all Python code and JSON config blocks and return a list of (filename, code) pairs
 61 | #     followup_prompt = (
 62 | #         "Extract all Python code blocks and JSON config blocks from the following text. "
 63 | #         "For each code or config block, output a JSON array where each item has 'filename' and 'code' fields. "
 64 | #         "Choose a descriptive filename for each code block (e.g., based on class/function names or comments, use .py for Python and .json for configs). "
 65 | #         "Do not include any explanation, only the JSON array.\n\n" + reply
 66 | #     )
 67 | #     code_response = call_gpt_api(followup_prompt)
 68 | #     code_reply = process_gpt_response(code_response)
 69 | #     try:
 70 | #         code_blocks = json.loads(code_reply)
 71 | #         generated_data = {}
 72 | #         for block in code_blocks:
 73 | #             fname = block.get('filename', 'extracted_code.py')
 74 | #             code = block.get('code', '')
 75 | #             generated_data[fname] = code
 76 | #             code_file_path = os.path.join(output_dir, fname)
 77 | #             with open(code_file_path, 'w', encoding='utf-8') as code_file:
 78 | #                 code_file.write(code)
 79 | #     except Exception as e:
 80 | #         # fallback: save the raw reply if not valid JSON
 81 | #         code_file_path = os.path.join(output_dir, 'extracted_code.py')
 82 | #         with open(code_file_path, 'w', encoding='utf-8') as code_file:
 83 | #             code_file.write(code_reply.strip())
 84 | 
 85 | #     for name,code in generated_data.items():
 86 | #         if 'engine' in name.lower():
 87 | #             generated_data['engine'] = code
 88 | #         elif 'analysis' in name.lower():
 89 | #             generated_data['analysis'] = code
 90 | #         elif ('experiment' in name.lower()) | ('agent' in name.lower()):
 91 | #             generated_data['agent_config'] = code
 92 | #         elif ('local' in name.lower()) | ('env' in name.lower()):
 93 | #             generated_data['local_config'] = code
 94 | #         elif 'adapter_language' in name.lower():
 95 | #             generated_data['adapter_language'] = code
 96 | #         elif ('numeric' in name.lower()) | ('default' in name.lower()):
 97 | #             generated_data['adapter_numeric'] = code
 98 |     
 99 | #     # Create the application setup dictionary
100 | #     application_setup = {
101 | #         'engine':generated_data['engine'],
102 | #         'experiment_configs':{'quick_test':generated_data['agent_config']},
103 | #         'local_configs':{'env_config':generated_data['local_config']},
104 | #         'adapters':{'numeric_adapter':generated_data['adapter_numeric'],
105 | #                     'language_adapter':generated_data['adapter_language']},
106 | #         'local_analysis':{'blackjack_graphs':generated_data['analysis']},
107 | #         'prerender_data':{},
108 | #         'prerender_images':{},
109 | #     }
110 | 
111 | #     # Add the new application to the application data
112 | #     self.pull_app_data = self.application_data.add_applicaiton(
113 | #         problem=generated_data['agent_config']['name'], 
114 | #         application_data=application_setup
115 | #         )
116 | 
117 | #     return reply


--------------------------------------------------------------------------------
/elsciRL/interaction_loops/state_search.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | 
  5 | # ------ Imports -----------------------------------------
  6 | # Agent Setup
  7 | from elsciRL.environment_setup.imports import ImportHelper
  8 | 
  9 | # Evaluation standards
 10 | from elsciRL.environment_setup.results_table import ResultsTable
 11 | from elsciRL.environment_setup.elsciRL_info import elsciRLInfo
 12 | 
 13 | 
 14 | def episode_loop(Engine, Adapters: dict, local_setup_info: dict, number_episodes: int = 1000, 
 15 |                  batch_number: int = 0, observed_states: dict = {}) -> dict:
 16 |     # --- INIT state space from engine
 17 |     agent_adapter_name = local_setup_info['agent_type'] + "_" + local_setup_info['adapter_select']
 18 |     engine = Engine(local_setup_info)
 19 |     start_obs = engine.reset()
 20 |     # --- PRESET elsciRL INFO
 21 |     # Agent
 22 |     Imports = ImportHelper(local_setup_info)
 23 |     agent, agent_type, agent_name, agent_state_adapter = (
 24 |         Imports.agent_info(Adapters)
 25 |     )
 26 |     (
 27 |         num_train_episodes,
 28 |         num_test_episodes,
 29 |         training_action_cap,
 30 |         testing_action_cap,
 31 |         reward_signal,
 32 |     ) = Imports.parameter_info()
 33 | 
 34 |     # Training or testing phase flag
 35 |     train = Imports.training_flag()
 36 | 
 37 |     # Mode selection (already initialized)
 38 |     # --- elsciRL
 39 |     live_env, observed_states_flag = (
 40 |         Imports.live_env_flag()
 41 |     )
 42 |     # Results formatting
 43 |     results = ResultsTable(local_setup_info)
 44 |     # elsciRL input function
 45 |     # - We only want to init trackers on first batch otherwise it resets knowledge
 46 |     elsciRL = elsciRLInfo(observed_states)
 47 |     # RENDER AND SUB-GOALS REMOVED COMPLETELY SO SAVE RUN-TIME
 48 |     
 49 |     for episode in tqdm(range(0, number_episodes)):
 50 |         action_history = []
 51 |         # ---
 52 |         # Start observation is used instead of .reset()  fn so that this can be overridden for repeat analysis from the same start pos
 53 |         obs = engine.reset(start_obs=start_obs)
 54 |         legal_moves = engine.legal_move_generator(obs)
 55 | 
 56 |         # LLM agents need to pass the state as a string
 57 |         if agent_type.split("_")[0] == "LLM":
 58 |             state = agent_state_adapter.adapter(
 59 |             state=obs,
 60 |             legal_moves=legal_moves,
 61 |             episode_action_history=action_history,
 62 |             encode=False,
 63 |         )
 64 |         else:
 65 |             state = agent_state_adapter.adapter(
 66 |                 state=obs,
 67 |                 legal_moves=legal_moves,
 68 |                 episode_action_history=action_history,
 69 |                 encode=True,
 70 |             )
 71 |         # ---
 72 |         start_time = time.time()
 73 |         episode_reward: int = 0
 74 |         # ---
 75 |         for action in range(0, training_action_cap):
 76 |             if live_env:
 77 |                 # Agent takes action
 78 |                 legal_moves = engine.legal_move_generator(obs)
 79 |                 agent_action = agent.policy(state, legal_moves)
 80 | 
 81 |                 if isinstance(agent_action, np.int64):
 82 |                     action_history.append(agent_action.item())
 83 |                 else:
 84 |                     action_history.append(agent_action)
 85 | 
 86 |                 next_obs, reward, terminated, _ = engine.step(
 87 |                     state=obs, action=agent_action
 88 |                 )
 89 |                 
 90 |                 # Can override reward per action with small negative punishment
 91 |                 if reward == 0:
 92 |                     reward = reward_signal[1]
 93 | 
 94 |                 # Only update observed states if not already observed
 95 |                 if next_obs not in observed_states:
 96 |                     legal_moves = engine.legal_move_generator(next_obs)
 97 |                     # LLM agents need to pass the state as a string
 98 |                     if agent_type.split("_")[0] == "LLM":
 99 |                         next_state = agent_state_adapter.adapter(
100 |                         state=next_obs,
101 |                         legal_moves=legal_moves,
102 |                         episode_action_history=action_history,
103 |                         encode=False,
104 |                     )
105 |                     else:
106 |                         next_state = agent_state_adapter.adapter(
107 |                             state=next_obs,
108 |                             legal_moves=legal_moves,
109 |                             episode_action_history=action_history,
110 |                             encode=True,
111 |                         )
112 |                     # elsciRL trackers
113 |                     # TODO: Consider adding prior action history to the tracker so that we can 
114 |                     #  transform observed data across adapters without loss of information
115 |                     observed_states = elsciRL.observed_state_tracker(
116 |                         engine_observation=next_obs,
117 |                         language_state=agent_state_adapter.adapter(
118 |                             state=next_obs,
119 |                             legal_moves=legal_moves,
120 |                             episode_action_history=action_history,
121 |                             encode=False,
122 |                         ),
123 |                     )
124 | 
125 |             episode_reward += reward
126 |             if terminated:
127 |                 break
128 |             else:
129 |                 state = next_state
130 |                 if live_env:
131 |                     obs = next_obs
132 | 
133 |         # If action limit reached
134 |         if not terminated:
135 |             reward = reward_signal[2]
136 | 
137 |         end_time = time.time()
138 |         try:
139 |             agent_results = agent.q_result()
140 |         except:
141 |             agent_results = [0, 0]
142 | 
143 |         if live_env:
144 |             results.results_per_episode(
145 |                 agent_name,
146 |                 None,
147 |                 episode,
148 |                 action,
149 |                 episode_reward,
150 |                 (end_time - start_time),
151 |                 action_history,
152 |                 agent_results[0],
153 |                 agent_results[1],
154 |             )
155 |     # Output GIF image of all episode frames
156 |     return observed_states
157 | 


--------------------------------------------------------------------------------
/elsciRL/analysis/convergence_measure.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | from typing import List
  3 | 
  4 | # Define convergence evaluation function
  5 | class Convergence_Measure:
  6 |     def __init__(self, total_num_episodes):
  7 |         # --- PARAMETERS ---
  8 |         self.conv_threshold_perc = 5
  9 |         self.num_prior_epi = int(total_num_episodes/10)
 10 |         self.num_prior_epi_points = 5
 11 |         self.plot_convergence_figures = False
 12 |         # ------------------
 13 |         # Ploy display time
 14 |         self.display_plot_time = 10
 15 | 
 16 |     def convergence_check(self, value_list: List[float], player_side: str, visual_save_dir: str):
 17 |         """ CONVERGENCE CHECK METHODOLOGY
 18 |         - Goes through each Q value by episode and calculates the percentage change from the previous result
 19 |         - Because a single point can not provide accurate results, we introduce a system in which N prior output points are checked
 20 |         - We set the prior check points by setting a range of episodes and evenly space N points between the current episode and the fist check point defined by the range
 21 |         - We accept that the output has converged if ALL the prior N outputs percentage change is less than our threshold
 22 |         - The episode for which the output has converged in then the first check point of this providing a systematic numeric convergence evaluation
 23 |         """
 24 |         perc_change_tracker = []
 25 |         prior_change_long_term_tracker = []
 26 |         conv_met_check = []
 27 |         conv_met = False
 28 |         for n in range(0,len(value_list)):
 29 |             value = value_list[n]
 30 |             # First row fixed value
 31 |             if n == 0:
 32 |                 perc_change = 100
 33 |             else:
 34 |                 perc_change = abs((value - prior_row_value)/prior_row_value)*100
 35 |             perc_change_tracker.append(perc_change)
 36 | 
 37 |             prior_epi_points_tracker = []
 38 |             if n<self.num_prior_epi:
 39 |                 prior_change_long_term_epi_point = 100
 40 |                 for prior_epi_point in range(0, self.num_prior_epi_points):
 41 |                     prior_epi_points_tracker.append(prior_change_long_term_epi_point)
 42 |             else:
 43 |                 for prior_epi_point in range(0, self.num_prior_epi_points):
 44 |                     prior_Q_value_point = value_list[(n - self.num_prior_epi)+int(prior_epi_point*self.num_prior_epi/self.num_prior_epi_points)]
 45 |                     prior_change_long_term_epi_point = abs((value - prior_Q_value_point)/prior_Q_value_point)*100
 46 |                     prior_epi_points_tracker.append(prior_change_long_term_epi_point)
 47 |             prior_change_long_term_tracker.append(max(prior_epi_points_tracker))
 48 |             
 49 |             # Find first instance where all check points are below threshold and log the prior point check as well
 50 |             if (max(prior_epi_points_tracker)<self.conv_threshold_perc)&(conv_met==False):
 51 |                 conv_met_check_points = []
 52 |                 for prior_epi_point in range(0,self.num_prior_epi_points):
 53 |                     prior_Q_value_point = value_list[(n - self.num_prior_epi)+int(prior_epi_point*self.num_prior_epi/self.num_prior_epi_points)]
 54 |                     conv_met_check_points.append(prior_Q_value_point)
 55 |                 conv_met_check_points.append(value)
 56 |                 conv_met_check.append([conv_met_check_points])
 57 |                 conv_met = True
 58 |                 # We take the first check point where output has converged
 59 |                 convergence_epi_first = (n - self.num_prior_epi)
 60 |                 convergence_epi = n
 61 |                 conv_met_check_plot_points = conv_met_check[convergence_epi][0]
 62 |             else:
 63 |                 conv_met_check.append(0)
 64 |                 conv_met = conv_met
 65 | 
 66 |             # current row becomes prior for next iteration
 67 |             prior_row_value = value
 68 | 
 69 | 
 70 |         print("Producing visual analysis...")
 71 |         plt.figure(1)
 72 |         plt.subplot(1, 2, 1)
 73 |         plt.plot(value_list, alpha=0.5)
 74 |         if conv_met == True:
 75 |             print("Output converges after ", convergence_epi_first, "episodes")
 76 |             for prior_epi_point in range(0, self.num_prior_epi_points+1):
 77 |                 # Plot line in addition to data points for first as this is where the system converges
 78 |                 if prior_epi_point == 0:
 79 |                     Y = conv_met_check_plot_points[prior_epi_point]
 80 |                     plt.scatter((convergence_epi - self.num_prior_epi)+int(prior_epi_point*self.num_prior_epi/self.num_prior_epi_points), Y, c='red', marker='+')
 81 |                     plt.plot([0, len(conv_met_check)], [Y, Y], 'r--', linewidth=2)
 82 |                 else:
 83 |                     Y = conv_met_check_plot_points[prior_epi_point]
 84 |                     plt.scatter((convergence_epi - self.num_prior_epi) + int(prior_epi_point * self.num_prior_epi / self.num_prior_epi_points), Y, c='red', marker='+')
 85 |         else:
 86 |             print("Convergence not found!")
 87 |             print("-- If oscillating, reduce alpha. If still learning, increase the number of episodes.") 
 88 |             convergence_epi_first = "None"
 89 | 
 90 |         plt.title("Mean Value by Episode")
 91 |         plt.ylabel("Mean Value")
 92 |         plt.xlabel("Episode")
 93 | 
 94 |         plt.subplot(1, 2, 2)
 95 |         plt.plot(prior_change_long_term_tracker, linewidth=2)
 96 |         plt.plot([0, len(prior_change_long_term_tracker)], [self.conv_threshold_perc, self.conv_threshold_perc], 'k--', linewidth=1)
 97 |         plt.title("Max Percentage Difference between Values from \n Current Episode and the "+str(self.num_prior_epi_points)+" Prior Convergence Points")
 98 |         plt.xlabel("Episode")
 99 |         plt.ylabel("Max Percentage Change")
100 |         plt.ylim(0,105)
101 |         
102 |         if self.plot_convergence_figures == 'Y':
103 |             plt.show(block=False)
104 |             plt.pause(self.display_plot_time)
105 |             fig = plt.gcf()
106 |             fig.set_size_inches(18.5, 10.5)
107 |             fig.savefig(visual_save_dir+'/Convergence_results_'+str(player_side)+'.png', dpi=100)
108 |             plt.close()
109 |         else:
110 |             fig = plt.gcf()
111 |             fig.set_size_inches(18.5, 10.5)
112 |             fig.savefig(visual_save_dir+'/Convergence_results_'+str(player_side)+'.png', dpi=100)
113 |             plt.clf()
114 | 
115 |         return conv_met, convergence_epi_first


--------------------------------------------------------------------------------
/elsciRL/interaction_loops/policy_gradient.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | from PIL import Image
  6 | from gymnasium.wrappers import TimeLimit
  7 | 
  8 | from elsciRL.environment_setup.imports import ImportHelper
  9 | from elsciRL.environment_setup.results_table import ResultsTable
 10 | from elsciRL.environment_setup.elsciRL_info import elsciRLInfo
 11 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir
 12 | 
 13 | 
 14 | def apply_action_limit(env, max_steps: Optional[int]):
 15 |     """Wrap env with a TimeLimit so runaway episodes truncate after max_steps."""
 16 | 
 17 |     if not max_steps or max_steps <= 0:
 18 |         return env
 19 |     if isinstance(env, TimeLimit):
 20 |         env._max_episode_steps = (
 21 |             min(env._max_episode_steps, max_steps)
 22 |             if getattr(env, "_max_episode_steps", None)
 23 |             else max_steps
 24 |         )
 25 |         return env
 26 |     try:
 27 |         return TimeLimit(env, max_episode_steps=max_steps)
 28 |     except Exception:
 29 |         setattr(env, "_elsci_max_episode_steps", max_steps)
 30 |         return env
 31 | 
 32 | 
 33 | def _normalize_render_stack(render_stack):
 34 |     """Convert renderer outputs to PIL Images so GIF saving works consistently."""
 35 | 
 36 |     normalized = []
 37 |     for frame in render_stack or []:
 38 |         if frame is None:
 39 |             continue
 40 |         if hasattr(frame, "save"):
 41 |             normalized.append(frame)
 42 |         elif isinstance(frame, np.ndarray):
 43 |             normalized.append(Image.fromarray(frame.astype(np.uint8)))
 44 |     return normalized
 45 | 
 46 | 
 47 | class PolicyGradientInteractionLoop:
 48 |     """Interaction loop tailored for policy-gradient Gym agents."""
 49 | 
 50 |     def __init__(self, Engine, Adapters: dict, local_setup_info: dict):
 51 |         self.setup_info = local_setup_info
 52 |         self.agent_type = local_setup_info['agent_type']
 53 |         Imports = ImportHelper(local_setup_info)
 54 |         self.agent, _, self.agent_name, self.agent_state_adapter = Imports.agent_info(Adapters)
 55 |         (
 56 |             self.num_train_episodes,
 57 |             self.num_test_episodes,
 58 |             self.training_action_cap,
 59 |             self.testing_action_cap,
 60 |             self.reward_signal,
 61 |         ) = Imports.parameter_info()
 62 |         self.train = Imports.training_flag()
 63 |         self.live_env, self.observed_states, self.experience_sampling = Imports.live_env_flag()
 64 | 
 65 |         self.env = Engine(local_setup_info)
 66 |         max_steps = self.training_action_cap if self.train else self.testing_action_cap
 67 |         self.env = apply_action_limit(self.env, max_steps)
 68 |         self.start_obs = self.env.reset()
 69 |         self.results = ResultsTable(local_setup_info)
 70 |         self.elsciRL = elsciRLInfo(self.observed_states, self.experience_sampling)
 71 | 
 72 |     def episode_loop(self, render: bool = False, render_save_dir: Optional[str] = None):
 73 |         num_episodes = self.num_train_episodes if self.train else self.num_test_episodes
 74 |         table_results = self._run_rollout(
 75 |             agent=self.agent,
 76 |             env=self.env,
 77 |             agent_name=self.agent_name,
 78 |             num_episodes=num_episodes,
 79 |             results_table=self.results,
 80 |             train=self.train,
 81 |             training_action_cap=self.training_action_cap,
 82 |             testing_action_cap=self.testing_action_cap,
 83 |             render=render,
 84 |             render_save_dir=render_save_dir,
 85 |         )
 86 |         return table_results
 87 | 
 88 |     @staticmethod
 89 |     def _run_rollout(
 90 |         agent,
 91 |         env,
 92 |         agent_name: str,
 93 |         num_episodes: int,
 94 |         results_table,
 95 |         train: bool,
 96 |         training_action_cap: Optional[int] = None,
 97 |         testing_action_cap: Optional[int] = None,
 98 |         render: bool = False,
 99 |         render_save_dir: Optional[str] = None,
100 |     ):
101 |         action_limit = training_action_cap if train else testing_action_cap
102 |         env = apply_action_limit(env, action_limit)
103 | 
104 |         episode_render = []
105 |         for episode in range(num_episodes):
106 |             start_time = time.time()
107 |             if train:
108 |                 total_steps = action_limit if action_limit and action_limit > 0 else 1
109 |                 agent.learn(total_steps=total_steps)
110 |             reward, actions, _, render_stack = agent.test(env, render=render)
111 |             end_time = time.time()
112 | 
113 |             if actions:
114 |                 if isinstance(actions[0], np.int64):
115 |                     actions = [action.item() for action in actions]
116 |                 elif isinstance(actions[0], np.ndarray):
117 |                     actions = [action.item() for action in actions]
118 | 
119 |             results_table.results_per_episode(
120 |                 agent_name,
121 |                 None,
122 |                 episode,
123 |                 len(actions),
124 |                 reward,
125 |                 (end_time - start_time),
126 |                 actions,
127 |                 0,
128 |                 0,
129 |             )
130 |             if render and render_stack:
131 |                 episode_render.extend(_normalize_render_stack(render_stack))
132 | 
133 |         table_results = results_table.results_table_format()
134 |         if render and episode_render:
135 |             ensure_dir(render_save_dir or "renders")
136 |             episode_render[0].save(
137 |                 f"{render_save_dir or 'renders'}/{agent_name}_policy.gif",
138 |                 save_all=True,
139 |                 append_images=episode_render[1:],
140 |                 optimize=False,
141 |                 duration=200,
142 |                 loop=1,
143 |             )
144 |         return table_results
145 | 
146 |     @classmethod
147 |     def policy_rollout(
148 |         cls,
149 |         agent,
150 |         env,
151 |         agent_name: str,
152 |         num_episodes: int,
153 |         results_table,
154 |         render: bool = False,
155 |         render_save_dir: Optional[str] = None,
156 |         action_limit: Optional[int] = None,
157 |     ):
158 |         return cls._run_rollout(
159 |             agent=agent,
160 |             env=env,
161 |             agent_name=agent_name,
162 |             num_episodes=num_episodes,
163 |             results_table=results_table,
164 |             train=False,
165 |             training_action_cap=None,
166 |             testing_action_cap=action_limit,
167 |             render=render,
168 |             render_save_dir=render_save_dir,
169 |         )
170 | 


--------------------------------------------------------------------------------
/elsciRL/GUI/static/app_setup.md:
--------------------------------------------------------------------------------
  1 | # New elsciRL Application Setup
  2 | 
  3 | Each application can be added to the elsciRL library to be used within the GUI interface app.
  4 | 
  5 | ## Add Application to elsciRL 
  6 | 
  7 | Once the following functions are specified, you can add the application to the elsciRL library by first publishing it to GitHub and then referencing this in the application suite.
  8 | 
  9 | > elsciRL > application_suite > import_data.py
 10 | 
 11 | ![import_data](<./_images/import_data_small.png>)
 12 | 
 13 | ## Core Requirements
 14 | 
 15 | ### Environment
 16 | Each application is defined by a unique engine that generates the data. Variations of the same problem specified by different data engines are considered different applications.
 17 | 
 18 | Define the MDP data engine with the following functions.
 19 | 
 20 | ```python
 21 | Class Engine:
 22 |   def __init__(self, local_setup_info:dict):
 23 |     # Store optional setup info
 24 |     # Initialize ledger of required & optional data
 25 |     # Prepare any internal environment data structures
 26 |     # Initialize histories (e.g., action_history, obs_history)
 27 | 
 28 |   def reset(self, start_obs = None):
 29 |     # Reset environment state
 30 |     # Optionally accept a specified start_obs
 31 |     return start_state
 32 | 
 33 |   def step(self, state, action):
 34 |     # Apply the chosen action
 35 |     # Update state and compute reward
 36 |     # Determine if episode is terminated
 37 |     return next_state, reward, terminated, info
 38 | 
 39 |   def legal_move_generator(self, state = None):
 40 |     # Return a list of valid actions given the current observation
 41 |     return legal_moves
 42 | 
 43 |   def render(self, state = None):
 44 |     # Generate and optionally display a visual representation of the environment
 45 |     # Return a figure object
 46 |     return fig
 47 | 
 48 |   def close(self):
 49 |     # Close active processes/handles related to the environment
 50 | ```
 51 | 
 52 | 
 53 | ### Configs
 54 | There are two type of configuration files. Note that only agent parameters can be adjusted in the GUI app interface.
 55 | 
 56 | Environment configurations can be varied and saved as separate inputs to change in the interface.
 57 | 
 58 | - *agent config* is used specify agent parameters
 59 | 	- fixed by agent methodology and hierarchy architecture
 60 | - *env config* is used to specify problems specific parameters
 61 | 	- Any parameters that are used by the environment
 62 | 	- Specify the action limit
 63 | 	- Specify manual sub_goal positions (exact state matching)
 64 | 	- Specify the reward signal
 65 | 		- [*sub_goal_reached, per_action, incomplete*]
 66 | 		- *sub_goal_reached* is used for instruction completion
 67 | 		- *per_action* and *incomplete* are optional if not specified by environment already
 68 | 
 69 | ```json
 70 | // agent config (agent_config.json)
 71 | name = "Gym-FrozenLake"
 72 | problem_type = "Gymnasium-ToyText"
 73 | number_training_episodes = 1000
 74 | number_training_repeats = 20
 75 | agent_select = ["Qlearntab"]
 76 | agent_parameters = {
 77 |     "Qlearntab": {
 78 |         "alpha": 0.1,
 79 |         "gamma": 0.9,
 80 |         "epsilon": 0.2,
 81 |         "epsilon_step": 0
 82 |     }
 83 | }
 84 | ```
 85 | 
 86 | ```json
 87 | // environment config (env_config.json)
 88 | environment_size = "4x4"
 89 | adapter_select = ["default", "language"]
 90 | action_limit = 100
 91 | reward_signal = [1, 0, -0.05]
 92 | sub_goal = "None"
 93 | ```
 94 | ### Adapters
 95 | Adapters unify problems into a standard form so any agent in the elsciRL library can be used.
 96 | 
 97 | In short, it transforms the state to a new form, optionally adding more context and then outputting a tensor.
 98 | - *inputs*: state, legal moves, action history for episode
 99 | - *outputs*: tensor for the encoded form of the adapted state
100 | 
101 | 
102 | ```python
103 | # numeric adapter (numeric.py)
104 | class DefaultAdapter(setup_info):
105 |   def __init__():
106 |     # Determine discrete environment size: e.g. "4x4" => 16 positions
107 |     # Initialize a StateEncoder for these positions
108 |     # Optionally define an observation space (e.g., Discrete) needed for Gym agents
109 | 
110 |   def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
111 |     # If encode=True, convert the numeric state to a tensor (StateEncoder)
112 |     # If indexed=True, map states to integer IDs
113 | 
114 | 	return tensor(state_encoded)
115 | ```
116 | 
117 | ```python
118 | # language adapter (language.py)
119 | class LanguageAdapter(setup_info):
120 |   def __init__():
121 |     # Build obs_mapping dictionary describing each state as text
122 |     # Initialize LanguageEncoder
123 | 
124 |   def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
125 |     # Convert numeric state ID to a text description (obs_mapping)
126 |     # Optionally encode the text into a tensor (LanguageEncoder)
127 |     # Optionally map each unique description to an indexed ID
128 | 
129 | 	return tensor(state_encoded)
130 | ```
131 | 
132 | ## Analysis Scripts
133 | 
134 | You can add a script file that produces problem specific analysis for the results tab.
135 | 
136 | The form of this is the following, note the class function must be call *Analysis* and it must return a dict of matplotlib figures.
137 | 
138 | ```python
139 | class Analysis:
140 | 	def __init__(self, save_dir):
141 | 		self.save_dir = save_dir
142 | 
143 | 	def plot_1(self):
144 | 		"""
145 | 		Extract the results data from the save_dir and create problem
146 | 		specific evaluation.
147 | 		Return a dict of the form:
148 | 			{
149 | 			 'plot_name_1':matplotlib.figure,
150 | 			 'plot_name_2':matplotlib.figure
151 | 			}
152 | 		"""
153 | 		plot_dict = {}
154 | 		n = 1
155 | 		for data in self.save_dir:
156 | 			...
157 | 			figure = plt.figure()
158 | 			ax = figure.add_subplot(1, 1, 1)
159 | 			ax.scatter(data['x'],data['y'])
160 | 			...
161 | 			
162 | 			plot_dict['plot'+str(n)] = figure
163 | 			n+=1
164 | 	return plot_dict
165 | 
166 | 	def plot_2(self)
167 | 		"""Any number of plot functions will be used."""
168 | 		...
169 | 		return plot_dict
170 | ```
171 | 
172 | 
173 | 
174 | ## Prerender Data
175 | Prerender data can be used to add and image to describe the problem.
176 | 
177 | Observed states are required to complete the unsupervised instruction following method. 
178 | 
179 | Once you have added the application to the *import_data.py* library you can use the elsciRL *get_prerender_data* tool to extract the observed states data.
180 | 
181 |  Run the following code and it will guide you through a prompt to select your application, the language adapter to use in instruction following and number of exploration episodes.
182 | 
183 | ```python
184 | from elsciRL import get_prerender_data
185 | get = get_prerender_data()
186 | get.run()
187 | ```
188 | 
189 | A fully random search agent is used to find as many states as possible. *observed_states.txt* will be saved in the directory you run the code and you can then add this to the prerender data.


--------------------------------------------------------------------------------
/elsciRL/application_suite/CACHE_README.md:
--------------------------------------------------------------------------------
  1 | # Import Tool Caching Functionality
  2 | 
  3 | This document describes the caching functionality added to the `PullApplications` class in `import_tool.py`.
  4 | 
  5 | ## Overview
  6 | 
  7 | The import tool now automatically caches imported data to improve performance and reduce network requests. When you import applications, the tool:
  8 | 
  9 | 1. **Checks cache first**: Before downloading from GitHub, it checks if the data is already cached
 10 | 2. **Saves to cache**: After successful imports, data is saved to a local cache file
 11 | 3. **Tracks imports**: A log file records all import activities with commit IDs and timestamps
 12 | 4. **Validates cache**: Cache is validated using commit IDs and source data hashes
 13 | 
 14 | ## Cache Directory Structure
 15 | 
 16 | The caching system creates a directory structure in `.cache`:
 17 | 
 18 | ```
 19 | .cache/
 20 | ├── import_log.json                    # Import activity log
 21 | ├── problem1/                          # Problem-specific cache
 22 | │   ├── cache_metadata.json           # Cache metadata
 23 | │   ├── engine/                       # Engine Python files
 24 | │   │   └── sailing.py
 25 | │   ├── adapters/                     # Adapter Python files
 26 | │   │   ├── adapter1.py
 27 | │   │   └── adapter2.py
 28 | │   ├── experiment_configs/            # Experiment configuration files
 29 | │   │   ├── config1.json
 30 | │   │   └── config2.json
 31 | │   ├── local_configs/                # Local configuration files
 32 | │   │   ├── local_config1.json
 33 | │   │   └── local_config2.json
 34 | │   ├── prerender_data/               # Prerender data files
 35 | │   │   ├── data1.json
 36 | │   │   └── data2.json
 37 | │   ├── prerender_data_encoded/       # Encoded prerender data (numpy arrays)
 38 | │   │   ├── data1.npy
 39 | │   │   └── data2.npy
 40 | │   ├── prerender_images/             # Image files
 41 | │   │   ├── image1.png
 42 | │   │   └── image2.jpg
 43 | │   └── instructions/                 # Instruction files
 44 | │       ├── instruction1.json
 45 | │       └── instruction2.json
 46 | └── problem2/                          # Another problem's cache
 47 |     └── ...
 48 | ```
 49 | 
 50 | ## Key Features
 51 | 
 52 | ### Automatic Caching
 53 | ```python
 54 | from elsciRL.application_suite.import_tool import PullApplications
 55 | 
 56 | puller = PullApplications()
 57 | result = puller.pull(['sailing'])  # Automatically uses cache if available
 58 | ```
 59 | 
 60 | ### Cache Information
 61 | ```python
 62 | # Get information about cached data
 63 | cache_info = puller.get_cache_info()
 64 | print(cache_info)
 65 | ```
 66 | 
 67 | ### Import History
 68 | ```python
 69 | # Get latest import information for a problem
 70 | latest_info = puller.get_latest_import_info('sailing')
 71 | print(latest_info)
 72 | ```
 73 | 
 74 | ### Force Refresh
 75 | ```python
 76 | # Force refresh (ignores cache)
 77 | result = puller.force_refresh(['sailing'])
 78 | ```
 79 | 
 80 | ### Cache Management
 81 | ```python
 82 | # Clear cache for specific problem
 83 | puller.clear_cache('sailing')
 84 | 
 85 | # Clear all cache
 86 | puller.clear_cache()
 87 | ```
 88 | 
 89 | ### Main Branch Status Check
 90 | ```python
 91 | # Check if main branch has been updated
 92 | status = puller.check_main_branch_status('sailing')
 93 | if status:
 94 |     print(f"Needs update: {status['needs_update']}")
 95 |     print(f"Current main date: {status['current_main_date']}")
 96 |     print(f"Cached main date: {status['cached_main_date']}")
 97 | ```
 98 | 
 99 | ### Automatic Main Branch Updates
100 | When importing with `commit_id='main'`, the system automatically:
101 | 1. Checks if the main branch has been updated since last cache
102 | 2. If updated, pulls fresh data and caches it
103 | 3. If unchanged, uses cached data
104 | 4. Logs all activities with timestamps and commit IDs
105 | 
106 | ```python
107 | # This will automatically check for updates and pull fresh data if needed
108 | result = puller.pull(['sailing'])  # commit_id='main' in config
109 | ```
110 | 
111 | ## Cache Validation
112 | 
113 | The cache is validated using:
114 | 1. **Commit ID**: Ensures the cached data matches the requested commit
115 | 2. **Source Hash**: Detects changes in source configuration files
116 | 3. **Timestamp**: Records when the data was cached
117 | 4. **Main Branch Date Check**: For 'main' branch, checks if the main branch has been updated since last cache
118 | 
119 | ## Log File Structure
120 | 
121 | The import log (`import_log.json`) contains entries like:
122 | ```json
123 | {
124 |   "sailing": [
125 |     {
126 |       "timestamp": "2024-01-15T10:30:00.123456",
127 |       "commit_id": "main",
128 |       "source_hash": "abc123...",
129 |       "cache_hit": false,
130 |       "source_data": {
131 |         "engine_folder": "environments",
132 |         "engine_filename": "sailing.py",
133 |         ...
134 |       }
135 |     }
136 |   ]
137 | }
138 | ```
139 | 
140 | ## Cache Metadata
141 | 
142 | Each cached problem includes metadata:
143 | ```python
144 | {
145 |   "cache_metadata": {
146 |     "commit_id": "main",
147 |     "source_hash": "abc123...",
148 |     "timestamp": "2024-01-15T10:30:00.123456",
149 |     "main_branch_date": "2024-01-15T10:30:00Z",  # Only for 'main' branch
150 |     "main_branch_sha": "abc123def456..."          # Only for 'main' branch
151 |   },
152 |   "engine": <engine_class>,
153 |   "adapters": {...},
154 |   "experiment_configs": {...},
155 |   ...
156 | }
157 | ```
158 | 
159 | ## Performance Benefits
160 | 
161 | - **Faster imports**: Cached data loads instantly
162 | - **Reduced network usage**: Avoids re-downloading unchanged data
163 | - **Offline capability**: Can work with previously cached data
164 | - **Version tracking**: Know exactly which version of data you're using
165 | - **Smart main branch updates**: Only re-downloads when main branch has actually changed
166 | 
167 | ## Engine and Adapter File Handling
168 | 
169 | - **Python files**: Engine and adapter .py files are downloaded and cached as actual Python files
170 | - **Dynamic loading**: When loading from cache, Python files are dynamically imported
171 | - **Path management**: Cache directories are temporarily added to Python path for import
172 | - **Error handling**: Graceful fallback if cached Python files can't be loaded
173 | - **Version consistency**: Ensures cached Python files match the commit version
174 | 
175 | ## Error Handling
176 | 
177 | The caching system includes robust error handling:
178 | - Graceful fallback if cache files are corrupted
179 | - Automatic cache directory creation
180 | - Detailed logging of cache operations
181 | - Safe cache validation
182 | 
183 | ## Example Usage
184 | 
185 | See `cache_example.py` for a complete demonstration of the caching functionality.
186 | 
187 | ## File Locations
188 | 
189 | - Cache directory: `./.cache/`
190 | - Log file: `./.cache/import_log.json`
191 | - Problem cache: `./.cache/problem_name/`
192 | - Engine files: `./.cache/problem_name/engine/`
193 | - Adapter files: `./.cache/problem_name/adapters/`
194 | - Metadata file: `./.cache/problem_name/cache_metadata.json`
195 | 
196 | The cache directory structure is automatically created when the `PullApplications` class is initialized. 


--------------------------------------------------------------------------------
/elsciRL/adapters/LLM_logic_generators/ollama_adapter_generator.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import ollama
  3 | from elsciRL.adapters.LLM_logic_generators.adapter_prompt import adapter_prompt
  4 | 
  5 | # Configure logging
  6 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  7 | 
  8 | class OllamaAdapterGenerator:
  9 |     def __init__(self, pseudocode_model: str, save_pseudocode: bool = False, pseudocode_file_path: str = None):
 10 |         """
 11 |         Initializes the OllamaAdapterGenerator.
 12 | 
 13 |         Args:
 14 |             primary_ollama_model_func: A function that simulates/calls the first Ollama model 
 15 |                                        (equivalent to text_ollama with encode=False).
 16 |                                        It should take text and return transformed text.
 17 |             pseudocode_ollama_model_func: A function that simulates/calls the Ollama model 
 18 |                                           for generating pseudocode. It should take a prompt 
 19 |                                           and return the generated pseudocode string.
 20 |         """
 21 |         logging.info("OllamaAdapterGenerator initialized.")
 22 |         self.pseudocode_model = pseudocode_model
 23 |         if save_pseudocode:
 24 |             self.pseudocode_file_path = pseudocode_file_path
 25 |         else:
 26 |             self.pseudocode_file_path = None
 27 | 
 28 |     def _generate_pseudocode_via_ollama(self, prompt: str) -> str:
 29 |         """
 30 |         Simulates calling another Ollama model to generate pseudocode.
 31 |         """
 32 |         logging.info("Generating pseudocode via (simulated) Ollama LLM...")
 33 | 
 34 |         response = ollama.chat(
 35 |             model=self.pseudocode_model, # Or another model suitable for code generation
 36 |             messages=[
 37 |                 {
 38 |                     'role': 'user',
 39 |                     'content': prompt,
 40 |                 }
 41 |             ],
 42 |             stream=False
 43 |             )
 44 |         
 45 |         logging.info(f"Generated pseudocode (simulated):\n{prompt}")
 46 |         return response['message']['content']
 47 | 
 48 |     def generate_adapter_pseudocode(self, environment_states: dict, transformed_states: str) -> str:
 49 |         """
 50 |         Logs environment states, processes input text using a primary Ollama model (simulated),
 51 |         and then uses another Ollama model (simulated) to generate Python pseudocode
 52 |         for an adapter function.
 53 | 
 54 |         Args:
 55 |             environment_states: A dictionary representing states from the environment.
 56 |             transformed_states: The LLM generated states.
 57 | 
 58 |         Returns:
 59 |             A string containing Python-like pseudocode for the adapter function.
 60 |         """
 61 |         logging.info(f"Generating adapter pseudocode for input text: '{transformed_states}'")
 62 |         logging.info(f"Environment states: {environment_states}")
 63 | 
 64 | 
 65 |         # 1. Prepare data for the pseudocode-generating LLM
 66 |         prompt_for_pseudocode_llm = f"""
 67 |                 Given the following information:
 68 |                 1. Environment States: {environment_states}
 69 |                 2. Transformed Output Text: "{transformed_states}"
 70 | 
 71 |                 Generate Python-like pseudocode for an 'adapter_function' in the form defined by {adapter_prompt}.
 72 | 
 73 |                 The pseudocode should outline the logic rules necessary to transform the original input text 
 74 |                 (or a similar input) into the transformed output text, considering the environment states.
 75 |                 The function should aim to replicate the transformation performed by the primary LLM.
 76 | 
 77 |                 These logic rules can be defied directly by a set of functions such as:
 78 |                 def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
 79 |                     if state[0] == 'some_value':
 80 |                         return "{transformed_states[:0]}..." # (Adjust based on logic)
 81 |                     elif state[1] == 'some_other_value':
 82 |                         return "{transformed_states[:0]}..." # (Adjust based on logic)
 83 |                     else:
 84 |                         return "some_other_transformation..."
 85 | 
 86 |                 Or a lookup dictionary or table such as:
 87 |                 obs_mapping = {{
 88 |                     'some_value': 'some_other_value',
 89 |                     'some_other_value': 'some_other_other_value',
 90 |                     'some_other_other_value': 'some_other_other_other_value',
 91 |                 }}
 92 |                 def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False):
 93 |                     return obs_mapping[state]
 94 | 
 95 |                 The logic rules can use the current state, legal moves, and action history to determine the output.
 96 | 
 97 |                 Please provide only the Python pseudocode for adapter_function.
 98 |             """
 99 |         logging.info("Constructed prompt for pseudocode generation LLM.")
100 | 
101 |         # 2. Pass data to another LLM to create Python pseudocode
102 |         pseudocode = self._generate_pseudocode_via_ollama(prompt_for_pseudocode_llm)
103 |         
104 |         logging.info("Successfully generated adapter pseudocode.")
105 |         if self.pseudocode_file_path:
106 |             with open(self.pseudocode_file_path, 'w') as f:
107 |                 f.write(pseudocode)
108 |             logging.info(f"Pseudocode saved to {self.pseudocode_file_path}")
109 |         return pseudocode
110 | 
111 | if __name__ == '__main__':
112 |     # Example Usage 
113 |     # Initialize the generator
114 |     adapter_gen = OllamaAdapterGenerator(pseudocode_model='llama3.2', save_pseudocode=True, pseudocode_file_path='./pseudocode_sample.py')
115 | 
116 |     # Example data
117 |     sample_env_states = {'Location': 'London',
118 |                          'Day': 'Monday', 
119 |                          'Time': 'Morning', 
120 |                          'Weather':{
121 |                             "cloud_cover": "low",
122 |                             "temperature": "70 degrees",
123 |                             "humidity": "20%",
124 |                             "wind_speed": "10 mph", 
125 |                             "wind_direction": "N"
126 |                             },
127 |                         'Location': 'London',
128 |                         'Day': 'Monday', 
129 |                         'Time': 'Afternoon', 
130 |                         'Weather':{
131 |                             "cloud_cover": "moderate",
132 |                             "temperature": "85 degrees",
133 |                             "humidity": "40%",
134 |                             "wind_speed": "15 mph", 
135 |                             "wind_direction": "SW"
136 |                             },
137 |                         
138 |                         }
139 |     sample_output = ["The weather on Monday morning in London is sunny and dry, the temperature is 70 degrees and low humidity and a light breeze.",
140 |                      "The weather on Monday afternoon in London is cloudy, the temperature is 85 degrees and moderate humidity and a moderate breeze from the south-west."]
141 | 
142 |     # Generate pseudocode
143 |     generated_code = adapter_gen.generate_adapter_pseudocode(sample_env_states, sample_output)
144 |     
145 | 


--------------------------------------------------------------------------------
/elsciRL/environment_setup/elsciRL_info.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import Tensor
  3 | import numpy.typing as npt
  4 | import random
  5 | 
  6 | 
  7 | class elsciRLInfo:
  8 |     def __init__(
  9 |         self,
 10 |         observed_states: dict | None = None,
 11 |         experience_sampling: dict | None = None,
 12 |         # tensor_index: dict | None = None,
 13 |     ) -> None:
 14 |         if not experience_sampling:
 15 |             self.experience_sampling = {}
 16 |         else:
 17 |             self.experience_sampling = experience_sampling
 18 | 
 19 |         if not observed_states:
 20 |             self.observed_states = {}
 21 |         else:
 22 |             self.observed_states = observed_states
 23 | 
 24 |     def observed_state_tracker(
 25 |         self,
 26 |         engine_observation: Tensor | npt.ArrayLike | list | None = None,
 27 |         language_state: Tensor | npt.ArrayLike | list | None = None,
 28 |     ):
 29 |         """Tracks adapted form of state from engine observation for unsupervised approaches."""
 30 |         if engine_observation not in self.observed_states:
 31 |             self.observed_states[engine_observation] = language_state
 32 |         return self.observed_states
 33 | 
 34 |     def experience_sampling_add(
 35 |         self,
 36 |         engine_observation: Tensor | npt.ArrayLike | list | None = None,
 37 |         action: int | str | bool | None = None,
 38 |         next_observation: Tensor | npt.ArrayLike | list | None = None,
 39 |         reward: float = 0,
 40 |         terminated: bool = False,
 41 |     ):
 42 |         """Adds experience from interaction with the Live environment to sample from."""
 43 |         # --------------------------------------------------------------------------
 44 |         # Required if input observation is tensor as this cant be used for dict keys
 45 |         # - create tuple store transitions
 46 |         if type(engine_observation) is Tensor:
 47 |             engine_observation = tuple(engine_observation.cpu().numpy().flatten())
 48 |         if type(next_observation) is Tensor:
 49 |             next_observation = tuple(next_observation.cpu().numpy().flatten())
 50 |         # --------------------------------------------------------------------------
 51 |         # Get occurrence of current observation+action
 52 |         if engine_observation not in self.experience_sampling:
 53 |             self.experience_sampling[engine_observation] = {}
 54 |         if action not in self.experience_sampling[engine_observation]:
 55 |             self.experience_sampling[engine_observation][action] = {}
 56 |             self.experience_sampling[engine_observation][action]["obs_a_count"] = 1
 57 |         obs_a_count = (
 58 |             self.experience_sampling[engine_observation][action]["obs_a_count"] + 1
 59 |         )
 60 |         self.experience_sampling[engine_observation][action][
 61 |             "obs_a_count"
 62 |         ] = obs_a_count
 63 | 
 64 |         # Get occurrence of next obs given obs+action
 65 |         # - Compute prob, reward is static and set on first occurrence
 66 |         if next_observation in self.experience_sampling[engine_observation][action]:
 67 |             next_obs_count = (
 68 |                 self.experience_sampling[engine_observation][action][next_observation][
 69 |                     "next_obs_count"
 70 |                 ]
 71 |                 + 1
 72 |             )
 73 |             prob = next_obs_count / obs_a_count
 74 |             self.experience_sampling[engine_observation][action][next_observation][
 75 |                 "next_obs_count"
 76 |             ] = next_obs_count
 77 |             self.experience_sampling[engine_observation][action][next_observation][
 78 |                 "prob"
 79 |             ] = prob
 80 |         else:
 81 |             self.experience_sampling[engine_observation][action][next_observation] = {}
 82 |             self.experience_sampling[engine_observation][action][next_observation][
 83 |                 "next_obs_count"
 84 |             ] = 1
 85 |             self.experience_sampling[engine_observation][action][next_observation][
 86 |                 "prob"
 87 |             ] = (1 / obs_a_count)
 88 |             self.experience_sampling[engine_observation][action][next_observation][
 89 |                 "reward"
 90 |             ] = reward
 91 |             self.experience_sampling[engine_observation][action][next_observation][
 92 |                 "terminated"
 93 |             ] = terminated
 94 | 
 95 |     def experience_sampling_legal_actions(
 96 |         self, engine_observation: Tensor | npt.ArrayLike | list | None = None
 97 |     ):
 98 |         """Returns a list of known actions from the experience."""
 99 |         if type(engine_observation) is Tensor:
100 |             engine_observation = tuple(engine_observation.cpu().numpy().flatten())
101 |             # state_tuple = tuple(engine_observation)
102 |             # engine_observation = self.tensor_index.index(state_tuple)
103 |         if engine_observation in self.experience_sampling:
104 |             legal_actions = list(self.experience_sampling[engine_observation].keys())
105 |         else:
106 |             legal_actions = None
107 |         return legal_actions
108 | 
109 |     def experience_sampling_step(
110 |         self,
111 |         engine_observation: Tensor | npt.ArrayLike | list | None = None,
112 |         action: int | str | bool | None = None,
113 |     ):
114 |         """Outcome of action given current observation from sampled experience."""
115 |         # If state-action has not been seen from live system
116 |         engine_observation_shape = None
117 |         next_obs = None
118 |         if type(engine_observation) is Tensor:
119 |             engine_observation_shape = engine_observation.shape
120 |             engine_observation = tuple(engine_observation.cpu().numpy().flatten())
121 | 
122 |         if action not in self.experience_sampling[engine_observation]:
123 |             next_obs = engine_observation
124 |             reward = 0
125 |             terminated = False
126 |         # Select action from distribution of probabilities
127 |         else:
128 |             cumulative = 0
129 |             rng = random.random()
130 |             for next_obs in self.experience_sampling[engine_observation][action]:
131 |                 # first key is just the count of obs+action so skip over this
132 |                 if next_obs != "obs_a_count":
133 |                     if (
134 |                         self.experience_sampling[engine_observation][action][next_obs][
135 |                             "prob"
136 |                         ]
137 |                         <= rng
138 |                     ):
139 |                         break
140 |                     else:
141 |                         cumulative += self.experience_sampling[engine_observation][
142 |                             action
143 |                         ][next_obs]["prob"]
144 | 
145 |             reward = self.experience_sampling[engine_observation][action][next_obs][
146 |                 "reward"
147 |             ]
148 |             terminated = self.experience_sampling[engine_observation][action][next_obs][
149 |                 "terminated"
150 |             ]
151 | 
152 |         # --------------------------------------------------------------------------
153 |         # Converts stored obs back from int to tensor to match env
154 |         if (type(next_obs) is tuple) and (
155 |             engine_observation_shape is not None
156 |         ):  # Phil: had to fix AND if statements by separating fully
157 |             next_obs = torch.tensor(next_obs).reshape(engine_observation_shape)
158 |         # --------------------------------------------------------------------------
159 |         return next_obs, reward, terminated
160 | 


--------------------------------------------------------------------------------
/elsciRL/agents/DQN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import numpy as np
  5 | from collections import deque
  6 | import random
  7 | import pickle
  8 | from typing import List, Tuple, Dict, Any, Hashable, Iterable
  9 | 
 10 | from elsciRL.agents.agent_abstract import QLearningAgent
 11 | 
 12 | class DQN(nn.Module):
 13 |     def __init__(self, input_size: int, output_size: int, hidden_size: int = 128):
 14 |         super(DQN, self).__init__()
 15 |         self.network = nn.Sequential(
 16 |             nn.Linear(input_size, hidden_size),
 17 |             nn.ReLU(),
 18 |             nn.Linear(hidden_size, hidden_size),
 19 |             nn.ReLU(),
 20 |             nn.Linear(hidden_size, output_size)
 21 |         )
 22 |     
 23 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
 24 |         return self.network(x)
 25 | 
 26 | class DQNAgent(QLearningAgent):
 27 |     def __init__(self, 
 28 |                  input_size: int,
 29 |                  output_size: int,
 30 |                  hidden_size: int = None,
 31 |                  learning_rate: float = 0.001,
 32 |                  gamma: float = 0.99,
 33 |                  epsilon: float = 1.0,
 34 |                  epsilon_min: float = 0.01,
 35 |                  epsilon_decay: float = 0.995,
 36 |                  memory_size: int = 10000,
 37 |                  batch_size: int = 64,
 38 |                  target_update: int = 10,
 39 |                  device: str = None,
 40 |                  ):
 41 |         
 42 |         self.input_size = input_size
 43 |         self.output_size = output_size
 44 |         
 45 |         
 46 |         self.memory = deque(maxlen=memory_size)
 47 |         self.batch_size = batch_size
 48 |         self.gamma = gamma
 49 |         self.epsilon_reset = epsilon
 50 |         self.epsilon = epsilon
 51 |         self.epsilon_min = epsilon_min
 52 |         self.epsilon_decay = epsilon_decay
 53 |         self.target_update = target_update
 54 |         self.update_counter = 0
 55 |         
 56 |         # Create main and target networks with optional device specification
 57 |         if device is None:
 58 |             self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 59 |         else:
 60 |             self.device = torch.device(device)
 61 |             
 62 |         self.policy_net = DQN(input_size, output_size, hidden_size).to(self.device)
 63 |         print(f"DQN Agent initialized on device: {self.device}")
 64 |         print(self.policy_net)
 65 |         self.target_net = DQN(input_size, output_size, hidden_size).to(self.device)
 66 |         self.target_net.load_state_dict(self.policy_net.state_dict())
 67 |         
 68 |         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
 69 |         self.criterion = nn.MSELoss()
 70 | 
 71 |     def save(self) -> List[Dict]:
 72 |         """Save the agent's state"""
 73 |         return [
 74 |             self.policy_net.state_dict(),
 75 |             self.target_net.state_dict(),
 76 |             self.optimizer.state_dict(),
 77 |             {
 78 |                 'epsilon': self.epsilon,
 79 |                 'update_counter': self.update_counter,
 80 |                 'memory': list(self.memory)  # Convert deque to list for serialization
 81 |             }
 82 |         ]
 83 |     
 84 |     def load(self, saved_agent: List[Dict]):
 85 |         """Load the agent's state"""
 86 |         if len(saved_agent) >= 4:
 87 |             self.policy_net.load_state_dict(saved_agent[0])
 88 |             self.target_net.load_state_dict(saved_agent[1])
 89 |             self.optimizer.load_state_dict(saved_agent[2])
 90 |             state_dict = saved_agent[3]
 91 |             self.epsilon = state_dict['epsilon']
 92 |             self.update_counter = state_dict['update_counter']
 93 |             if 'memory' in state_dict:
 94 |                 self.memory = deque(state_dict['memory'], maxlen=self.memory.maxlen)
 95 | 
 96 |     def exploration_parameter_reset(self):
 97 |         """Reset the exploration parameter to its initial value"""
 98 |         self.epsilon = self.epsilon_reset
 99 | 
100 |     def clone(self):
101 |         """Create a deep copy of the agent"""
102 |         clone = pickle.loads(pickle.dumps(self))
103 |         clone.epsilon = self.epsilon_reset
104 |         return clone
105 | 
106 |     def policy(self, state: torch.Tensor, legal_actions: list, **kwargs) -> Hashable:
107 |         """Select action using epsilon-greedy policy"""
108 |         if random.random() < self.epsilon:
109 |             # Decay epsilon
110 |             if self.epsilon > self.epsilon_min:
111 |                 self.epsilon *= self.epsilon_decay
112 |             return random.choice(legal_actions)
113 |         
114 |         with torch.no_grad():
115 |             state = state.to(self.device)
116 |             # Ensure state has correct shape [batch_size, input_size]
117 |             if len(state.shape) == 1:
118 |                 state = state.unsqueeze(0)
119 |             q_values = self.policy_net(state)
120 |             
121 |             # Mask illegal actions with large negative values
122 |             mask = torch.ones_like(q_values) * float('-inf')
123 |             for action in legal_actions:
124 |                 mask[0][action] = 0
125 |             q_values = q_values + mask
126 |             
127 |             return q_values.argmax().item()
128 | 
129 |     def learn(self, state: torch.Tensor, next_state: torch.Tensor, 
130 |               immediate_reward: float, action: Hashable, **kwargs) -> None:
131 |         """Store experience and train the network"""
132 |         # Detach tensors and move to CPU to save GPU memory
133 |         # This prevents keeping computational graphs in replay buffer
134 |         if isinstance(state, torch.Tensor):
135 |             state = state.detach().cpu()
136 |             if len(state.shape) == 1:
137 |                 state = state.unsqueeze(0)
138 |         if isinstance(next_state, torch.Tensor):
139 |             next_state = next_state.detach().cpu()
140 |             if len(next_state.shape) == 1:
141 |                 next_state = next_state.unsqueeze(0)
142 |             
143 |         # Store experience in replay memory (on CPU to save GPU memory)
144 |         self.memory.append((state, action, next_state, immediate_reward))
145 |         
146 |         # Train if enough samples
147 |         if len(self.memory) >= self.batch_size:
148 |             self._train()
149 |             
150 |         # Update target network periodically
151 |         self.update_counter += 1
152 |         if self.update_counter % self.target_update == 0:
153 |             self.target_net.load_state_dict(self.policy_net.state_dict())
154 |             
155 | 
156 |     def _train(self):
157 |         """Train the network using experience replay"""
158 |         batch = random.sample(self.memory, self.batch_size)
159 |         states, actions, next_states, rewards = zip(*batch)
160 |         
161 |         # Convert to tensors and move to GPU only for training
162 |         states = torch.cat(states).to(self.device)  # [batch_size, input_size]
163 |         next_states = torch.cat(next_states).to(self.device)  # [batch_size, input_size]
164 |         actions = torch.tensor(actions, device=self.device).long()  # [batch_size]
165 |         rewards = torch.tensor(rewards, device=self.device).float()  # [batch_size]
166 |         
167 |         # Clear optimizer gradients
168 |         self.optimizer.zero_grad()
169 |         
170 |         # Get current Q values
171 |         current_q_values = self.policy_net(states)  # [batch_size, output_size]
172 |         current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)  # [batch_size]
173 |         
174 |         # Get next Q values from target network
175 |         with torch.no_grad():
176 |             next_q_values = self.target_net(next_states).max(1)[0][:self.batch_size]  # [batch_size]
177 |         
178 |         # Compute target Q values (detach to prevent gradient flow)
179 |         target_q_values = (rewards + (self.gamma * next_q_values)).detach()
180 |         
181 |         # Compute loss and update
182 |         loss = self.criterion(current_q_values, target_q_values)
183 |         loss.backward()
184 |         self.optimizer.step()
185 |         
186 |         # Clear intermediate tensors from GPU memory
187 |         del states, next_states, actions, rewards, current_q_values, next_q_values, target_q_values, loss
188 |         torch.cuda.empty_cache() if torch.cuda.is_available() else None 


--------------------------------------------------------------------------------
/elsciRL/environment_setup/gym_translator.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Any
  2 | import gymnasium as gym
  3 | from gymnasium.envs.registration import register
  4 | from gymnasium import spaces
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from matplotlib.backends.backend_agg import FigureCanvasAgg
  8 | 
  9 | 
 10 | def _figure_to_rgb_array(fig):
 11 |     """Convert a Matplotlib figure into an RGB numpy array."""
 12 |     canvas = fig.canvas
 13 |     if not isinstance(canvas, FigureCanvasAgg):
 14 |         canvas = FigureCanvasAgg(fig)
 15 |     canvas.draw()
 16 |     width, height = canvas.get_width_height()
 17 |     # Prefer buffer RGBA for compatibility, then drop alpha channel.
 18 |     if hasattr(canvas, "buffer_rgba"):
 19 |         buffer = np.asarray(canvas.buffer_rgba(), dtype=np.uint8)
 20 |         array = np.array(buffer).reshape((height, width, 4))[..., :3]
 21 |     else:
 22 |         buffer = np.frombuffer(canvas.tostring_rgb(), dtype=np.uint8)
 23 |         array = buffer.reshape((height, width, 3))
 24 |     plt.close(fig)
 25 |     return array
 26 | 
 27 | class EngineToGym(gym.Env):
 28 |     def __init__(self):
 29 |         print("elsciRL Env transformed to Gym Env.")
 30 | 
 31 |     def load(self, Engine, engine_name:str=None, Adapter:Callable[[Any], Any]=None, setup_info:dict={}):
 32 |         self.engine = Engine(setup_info)
 33 |         self.Adapter = Adapter(setup_info=setup_info)
 34 |         self.reward_signal = None
 35 |         self.reward_signal_tracker = []
 36 |         # Use name if given directly, otherwise check engine ledger
 37 |         if engine_name is not None:
 38 |             self.name = engine_name
 39 |         elif (self.engine.ledger['id'] != 'Unique Problem ID')&(self.engine.ledger['id'] != ''):
 40 |             self.name = self.engine.ledger['id']
 41 |         else:
 42 |             print("\n WARNING: Engine name not set, using default name --> set inside ledger [id] field.")
 43 |             self.name = "elsciRLGymEnv-v0"
 44 |             
 45 |         # --------------------------
 46 |         # Define observation and action spaces
 47 |         # - Observations are dictionaries with the agent's and the target's location.
 48 |         # - Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
 49 |         try:
 50 |             # First check if observation space is defined by the adapter
 51 |             self.observation_space = self.Adapter.observation_space
 52 |         except:
 53 |             # Then check if observation space is defined by the engine
 54 |             try:
 55 |                 self.observation_space = self.engine.observation_space
 56 |             except AttributeError:
 57 |                 # Otherwise, use default observation space
 58 |                 print("WARNING: Observation space not defined in either adapter of engine.")
 59 | 
 60 |         # - A single dimension of N number of discrete actions 
 61 |         self.action_space = spaces.Discrete(self.engine.ledger['action_space_size'])
 62 |         # --------------------------
 63 |         self.render_mode = self.engine.ledger['render_data']['render_mode']
 64 | 
 65 |     def reset(self, seed=None, options=None):
 66 |         observation = self.engine.reset()
 67 |         self.last_obs = observation
 68 |         self.last_info = {}
 69 |         obs_enc = self.Adapter.adapter(
 70 |             observation,
 71 |             self.engine.legal_move_generator(),
 72 |             self.engine.action_history,
 73 |             encode=True,
 74 |         )
 75 |         obs_enc = self._format_observation(obs_enc)
 76 |         self.reward_signal_tracker = [] # Only give agent reward for first time it sees a sub-goal
 77 |         self.action_history = [] # Reset action history
 78 |         self.episode_reward = 0
 79 |         #self.obs_history = []
 80 |         return obs_enc, {}
 81 | 
 82 |     def step(self, state=[], action=0):
 83 |         # Gym step function combines elsciRL Engine step and Adapter
 84 |         base_state = getattr(self, "last_obs", None)
 85 |         step_result = self.engine.step(state=base_state, action=action)
 86 |         if not isinstance(step_result, tuple) or len(step_result) != 4:
 87 |             print(
 88 |                 "[EngineToGym] Invalid engine step output:",
 89 |                 {
 90 |                     "engine": type(self.engine).__name__,
 91 |                     "adapter": type(self.Adapter).__name__,
 92 |                     "result": step_result,
 93 |                 },
 94 |             )
 95 |             raise ValueError(
 96 |                 "Engine.step must return a tuple of (observation, reward, terminated, info). "
 97 |                 f"Received: {step_result!r}"
 98 |             )
 99 |         observation, reward, terminated, info = step_result
100 |         if isinstance(action, np.int64):
101 |             self.action_history.append(action.item())
102 |         else:
103 |             self.action_history.append(action)
104 |         # if observation not in self.obs_history:
105 |         #     reward += 0.05 # Give small reward to encourage exploration
106 |         # self.obs_history.append(observation)
107 |         if info:
108 |             info['obs'] = observation
109 |         else:
110 |             info = {'obs': observation}
111 | 
112 |         # Apply custom reward signal if defined
113 |         # - Defined as dict:= {obs:reward, obs:reward, ...}
114 |         engine_reward_signal = getattr(self.engine, "reward_signal", None)
115 |         if engine_reward_signal:
116 |             if observation in engine_reward_signal:
117 |                 if observation not in self.reward_signal_tracker:
118 |                     # Only override if new reward is higher
119 |                     if engine_reward_signal[observation] > reward:
120 |                         reward = engine_reward_signal[observation]
121 |                     self.reward_signal_tracker.append(observation)
122 |                     
123 | 
124 |         # If a language problem then we also want processed observation
125 |         # TODO: Need better method for checking if language problem
126 |         if 'lang' in self.engine.ledger['type'].lower():
127 |             obs_adapted = self.Adapter.adapter(observation, self.engine.legal_move_generator(), 
128 |                                     self.engine.action_history, encode = False)
129 |             info['obs_adapted'] = obs_adapted
130 |         obs_enc = self.Adapter.adapter(
131 |             observation,
132 |             self.engine.legal_move_generator(),
133 |             self.engine.action_history,
134 |             encode=True,
135 |         )
136 |         obs_enc = self._format_observation(obs_enc)
137 |         truncated = False
138 |         self.episode_reward += reward
139 |         self.last_obs = observation
140 |         self.last_info = info
141 |         return obs_enc, reward, terminated, truncated, info
142 | 
143 |     def _format_observation(self, obs_enc):
144 |         """Ensure adapter outputs match the declared Gym observation space."""
145 | 
146 |         # Handle PyTorch tensors - move to CPU before converting to numpy
147 |         if hasattr(obs_enc, "detach"):
148 |             obs_enc = obs_enc.detach()
149 |             if hasattr(obs_enc, "cpu"):
150 |                 obs_enc = obs_enc.cpu()
151 |         obs_array = np.asarray(obs_enc, dtype=np.float32)
152 | 
153 |         if isinstance(self.observation_space, spaces.Discrete):
154 |             # Convert one-hot or vector encodings to scalar indices
155 |             if obs_array.ndim == 0:
156 |                 return np.int64(obs_array.item())
157 |             if obs_array.ndim == 1 and obs_array.size > 1:
158 |                 return np.int64(np.argmax(obs_array))
159 |             return np.int64(obs_array.flatten()[0])
160 | 
161 |         # Default: ensure numpy array on CPU with correct dtype
162 |         return obs_array
163 | 
164 |     def render(self):
165 |         render_output = self.engine.render()
166 |         if isinstance(render_output, np.ndarray):
167 |             return render_output
168 |         if hasattr(render_output, "canvas"):
169 |             return _figure_to_rgb_array(render_output)
170 |         return np.asarray(render_output)
171 | 
172 |     def close(self):
173 |         self.engine.close()
174 | 
175 |     def __call__(self, *args: Any, **kwds: Any) -> Any:
176 |         return self
177 | 
178 |     
179 | @staticmethod
180 | def GymRegistration(Engine, Adapter, setup_info:dict={}):
181 |     """This provides a function for converting elsciRL engines into OpenAI Gym environments. \n
182 |     elsciRL engines include a conditional action space which is not inherently supported by OpenAI Gym. \n
183 |     Outputs Engine in the OpenAI Gym format with a wrapper for the elsciRL adapter.
184 |     """
185 |     # Translate Engine to OpenAI Gym class structure
186 |     environment = EngineToGym()
187 |     environment.load(Engine, 'Test-1', Adapter, setup_info)
188 |     # Register and make the environment
189 |     register(id=environment.name, entry_point=environment)
190 |     gym_env = gym.make(environment.name)
191 |     
192 |     return gym_env
193 | 


--------------------------------------------------------------------------------
/elsciRL/examples/DemoExperiment.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import os
  3 | # ====== elsciRL IMPORTS ===============================================
  4 | # ------ EXPERIMENT ----------------------------------------------------
  5 | from elsciRL.experiments.standard import Experiment as STANDARD_RL
  6 | # ------ Visual Analysis -----------------------------------------------
  7 | from elsciRL.analysis.combined_variance_visual import combined_variance_analysis_graph as COMBINED_VARIANCE_ANALYSIS_GRAPH
  8 | # ----------------------------------------------------------------------
  9 | 
 10 | class DemoExperiment:
 11 |     def __init__(self):        
 12 |         # Create output directory if it doesn't exist
 13 |         self.cwd = os.getcwd()+'/elsciRL-EXAMPLE-output'
 14 |         if not os.path.exists(self.cwd):
 15 |             os.mkdir(self.cwd)
 16 |     
 17 |     def help(self):
 18 |         help_output = """
 19 |         This is a demo experiment script for elsciRL.
 20 |         It allows you to run a standard RL experiment on a selected problem from the elsciRL application suite.
 21 |         The script will guide you through the process of selecting a problem, configuring the experiment, and running it.
 22 |         You can also evaluate the results of the experiment after it has been run.
 23 |         Usage:
 24 |         1. Run the script.
 25 |         2. Follow the prompts to select a problem and configure the experiment.
 26 |         3. The experiment will be run and the results will be saved in a directory.
 27 |         4. You can evaluate the results by calling the evaluate() method.
 28 |         Example:
 29 |         >>> demo = DemoExperiment()
 30 |         >>> demo.run()
 31 |         >>> demo.evaluate()
 32 |         """
 33 |         print(help_output)
 34 | 
 35 |     def input(self):
 36 |         # ----- User Input -----
 37 |         # 1. Number training episodes
 38 |         print("Please enter the number of ... (skip to use default) ")
 39 |         num_train_epi = input('\t - Training episodes: ')
 40 |         if num_train_epi == '':
 41 |             num_train_epi = 1000
 42 |         else:
 43 |             num_train_epi = int(num_train_epi)
 44 | 
 45 |         # Update experiment config
 46 |         self.num_train_epi = num_train_epi
 47 |         # ----------------------
 48 | 
 49 |     def results_save_dir(self):
 50 |         # Specify save dir
 51 |         # - Needs to be performed here in case user changes parameters and re-runs
 52 |         time = datetime.now().strftime("%d-%m-%Y_%H-%M")
 53 |         self.save_dir = self.cwd+'/test_'+time 
 54 |         if not os.path.exists(self.save_dir):
 55 |             os.mkdir(self.save_dir)
 56 |         # ---
 57 | 
 58 |     def experiment(self, problem:str, exp_save_dir:str, num_train_epi:int=0):
 59 | 
 60 |         # --- Select local config and experiment config ---
 61 |         print("--- LOCAL CONFIGURATION SELECTION ---")
 62 |         for i, local_config in enumerate(self.pull_app_data[problem]['local_configs'].keys()):
 63 |             print(f"{i+1}. {local_config}")
 64 |         local_config_id = input("Please select the local config number (default 1): ")
 65 |         if local_config_id.isdigit() and 0 < int(local_config_id) <= len(self.pull_app_data[problem]['local_configs']):
 66 |             local_config_id = int(local_config_id) - 1
 67 |         else:
 68 |             local_config_id = 0
 69 |         LocalConfig = self.pull_app_data[problem]['local_configs'][list(self.pull_app_data[problem]['local_configs'].keys())[local_config_id]]
 70 |         
 71 |         print("\n --- EXPERIMENT CONFIGURATION SELECTION ---")
 72 |         for i, experiment_config in enumerate(self.pull_app_data[problem]['experiment_configs'].keys()):
 73 |             print(f"{i+1}. {experiment_config}")
 74 |         experiment_config_id = input("Please select the experiment config number (default 1): ")
 75 |         if experiment_config_id.isdigit() and 0 < int(experiment_config_id) <= len(self.pull_app_data[problem]['experiment_configs']):
 76 |             experiment_config_id = int(experiment_config_id) - 1
 77 |         else:
 78 |             experiment_config_id = 0
 79 |         ExperimentConfig = self.pull_app_data[problem]['experiment_configs'][list(self.pull_app_data[problem]['experiment_configs'].keys())[experiment_config_id]]
 80 | 
 81 |         if num_train_epi != 0:
 82 |             ExperimentConfig['number_training_episodes'] = num_train_epi
 83 |             if int(num_train_epi/10) > 10:
 84 |                 ExperimentConfig['number_test_episodes'] = int(num_train_epi/10)
 85 |             else:
 86 |                 ExperimentConfig['number_test_episodes'] = 10
 87 | 
 88 |         # ------------------------------------------------------
 89 |         # Adapter Selection
 90 |         print("\n --- ADAPTER SELECTION ---")
 91 |         for i, adapter in enumerate(self.pull_app_data[problem]['adapters'].keys()):
 92 |             if not adapter.startswith('LLM'):
 93 |                 print(f"{i+1}. {adapter}")
 94 |         adapter_id = input("Please select the adapter number (default 1): ")
 95 |         if adapter_id.isdigit() and 0 < int(adapter_id) <= len(self.pull_app_data[problem]['adapters']):
 96 |             adapter_id = int(adapter_id) - 1
 97 |         else:
 98 |             adapter_id = 0
 99 | 
100 |         # --------------------------------------------------------------------
101 |         # Set the selected agent
102 |         ExperimentConfig['agent_select'] = ['Qlearntab']
103 |         ExperimentConfig['adapter_select'] = [list(self.pull_app_data[problem]['adapters'].keys())[adapter_id]]
104 |         ExperimentConfig['adapter_input_dict'] = {'Qlearntab': [list(self.pull_app_data[problem]['adapters'].keys())[adapter_id]]}
105 |         if ExperimentConfig['number_training_repeats'] > 1:
106 |             ExperimentConfig['number_training_repeats'] = 5
107 |         if ExperimentConfig['number_training_seeds'] > 1:
108 |             ExperimentConfig['number_training_seeds'] = 5
109 |         # Flat Baselines
110 |         exp = STANDARD_RL(Config=ExperimentConfig, ProblemConfig=LocalConfig, 
111 |                     Engine=self.pull_app_data[problem]['engine'], Adapters=self.pull_app_data[problem]['adapters'],
112 |                     save_dir=exp_save_dir, show_figures = 'No', window_size=0.1)
113 |         # --------------------------------------------------------------------
114 |         return exp
115 | 
116 |     def run(self):
117 |         # IMPORT HERE SO ITS NOT LOADED ON STARTUP
118 |         from elsciRL.application_suite.import_tool import PullApplications
119 |         self.application_data = PullApplications()
120 |         self.application_list:list=['Classroom', 'Gym-FrozenLake', 'Sailing']
121 |         self.pull_app_data = self.application_data.pull(problem_selection=self.application_list)
122 |         print("\n --- PULLING APPLICATION DATA ---")
123 |         for app in self.application_list:
124 |             print("--------------------------------------------------")
125 |             print(f"Application: {app}")
126 |             print("Engine:", self.pull_app_data[app]['engine'])
127 |             print("Adapters:", self.pull_app_data[app]['adapters'])
128 |             print("Experiment Configs:", self.pull_app_data[app]['experiment_configs'])
129 |             print("Local Configs:", self.pull_app_data[app]['local_configs'])
130 |         print("-------------------------------------------------- \n ")
131 |         # USER INPUTS FOR BASIC SELECTION OPTIONS
132 |         # --- Problem selection ---
133 |         print("\n --- PROBLEM SELECTION ---")
134 |         for i, prob in enumerate(self.application_list):
135 |             print(f"{i+1}. {prob}")
136 |         problem_id = input("Please enter the problem number to run (default 1): ")
137 |         if problem_id.isdigit() and 0 < int(problem_id) <= len(self.application_list):
138 |             problem = self.application_list[int(problem_id) - 1]
139 |         else:
140 |             problem = self.application_list[0]
141 | 
142 |         # --- TRAINING EPISODES INPUT ---
143 |         num_train_episodes = input("Please enter the number of training episodes (default 1000): ")
144 |         if num_train_episodes == '':
145 |             num_train_episodes = 1000
146 |         try:
147 |             num_train_episodes = int(num_train_episodes)
148 |         except ValueError:
149 |             print("Invalid input for number of training episodes. Using default value of 1000.")
150 |             num_train_episodes = 1000
151 |         # -------------------------------
152 |         
153 |         self.results_save_dir()
154 |         problem_save_dir = self.save_dir + '/' + problem
155 |         if not os.path.exists(problem_save_dir):
156 |             os.mkdir(problem_save_dir)
157 |         print("\n --------------------------------------------------")
158 |         print('Training and Testing on {p} environment'.format(p=problem))
159 |         print("-------------------------------------------------- \n ")
160 |         exp = self.experiment(problem, problem_save_dir, num_train_epi=int(num_train_episodes))
161 |         exp.train()
162 |         exp.test()
163 |         # exp.render_results()
164 | 
165 |     def evaluate(self):
166 |         COMBINED_VARIANCE_ANALYSIS_GRAPH(self.save_dir, 'TRAINING', show_figures='Yes')
167 |         COMBINED_VARIANCE_ANALYSIS_GRAPH(self.save_dir, 'TESTING', show_figures='Yes')


--------------------------------------------------------------------------------
/elsciRL/instruction_following/LLM_instr_planner/LLM_instr_validator.py:
--------------------------------------------------------------------------------
  1 | import ollama
  2 | from typing import Optional, Dict, Any
  3 | import json
  4 | import logging
  5 | 
  6 | class LLMInstructionValidator:
  7 |     """
  8 |     A class for validating if a given text matches or completes an instruction
  9 |     using Large Language Model reasoning via Ollama.
 10 |     """
 11 |     
 12 |     def __init__(self, 
 13 |                  model: str = "llama3.2"):
 14 |         """
 15 |         Initialize the LLM Instruction Validator.
 16 |         
 17 |         Args:
 18 |             model: Ollama model to use for validation (e.g., "llama3.2", "mistral", "codellama").
 19 |             temperature: Temperature for LLM responses (lower = more deterministic).
 20 |             host: Ollama host URL. If None, uses default localhost.
 21 |         """
 22 |         self.model = model
 23 |         self.logger = logging.getLogger(__name__)
 24 |     
 25 |     def validate_instruction_completion(self, 
 26 |                                       instruction_description: str, 
 27 |                                       best_match: str) -> Dict[str, Any]:
 28 |         """
 29 |         Compare instruction description with best match to determine if the 
 30 |         best match completes or fulfills the instruction.
 31 |         
 32 |         Args:
 33 |             instruction_description: The original instruction or task description
 34 |             best_match: The text/response that potentially completes the instruction
 35 |             
 36 |         Returns:
 37 |             Dict containing:
 38 |                 - 'is_complete': Boolean indicating if instruction is completed
 39 |                 - 'confidence': Float between 0-1 indicating confidence level
 40 |                 - 'reasoning': String explaining the LLM's reasoning
 41 |                 - 'partial_completion': Boolean if partially completed
 42 |         """
 43 |         
 44 |         # Construct the prompt for the LLM
 45 |         prompt = self._construct_validation_prompt(instruction_description, best_match)
 46 |         
 47 |         try:
 48 |             response = ollama.chat(
 49 |                 model=self.model,
 50 |                 messages=[
 51 |                     {
 52 |                         "role": "system",
 53 |                         "content": "You are an expert at evaluating whether responses complete given instructions. "
 54 |                                  "You must respond with valid JSON format."
 55 |                     },
 56 |                     {
 57 |                         "role": "user", 
 58 |                         "content": prompt
 59 |                     }
 60 |                 ],
 61 |                 options={
 62 |                     "num_predict": 500
 63 |                 }
 64 |             )
 65 |             
 66 |             # Parse the LLM response
 67 |             result = self._parse_llm_response(response['message']['content'])
 68 |             
 69 |             self.logger.info(f"Validation completed. Is complete: {result['is_complete']}")
 70 |             return result
 71 |             
 72 |         except Exception as e:
 73 |             self.logger.error(f"Error during LLM validation: {str(e)}")
 74 |             return {
 75 |                 'is_complete': False,
 76 |                 'confidence': 0.0,
 77 |                 'reasoning': f"Error occurred during validation: {str(e)}",
 78 |                 'partial_completion': False
 79 |             }
 80 |     
 81 |     def _construct_validation_prompt(self, instruction: str, match: str) -> str:
 82 |         """
 83 |         Construct the prompt for the LLM to evaluate instruction completion.
 84 |         
 85 |         Args:
 86 |             instruction: The instruction description
 87 |             match: The best match text
 88 |             
 89 |         Returns:
 90 |             Formatted prompt string
 91 |         """
 92 |         prompt = f"""
 93 | Please evaluate whether the "Best Match" text completes or fulfills the given "Instruction".
 94 | You only need to confirm that the language match well and do not need to check if the best match would update the environment.
 95 | The language structure from the environment is fixed and does not change, so do not expect a 'Best Match' that is better structured than what is given.
 96 | You need to determine if the current 'Best Match' is likely to be the best match for the instruction given the language structure of the environment, do not expect more detail than what is given.
 97 | 
 98 | INSTRUCTION:
 99 | {instruction}
100 | 
101 | BEST MATCH:
102 | {match}
103 | 
104 | Analyze if the Best Match adequately completes, addresses, or fulfills the Instruction. Consider:
105 | 1. Does it directly address what was asked?
106 | 2. Is the response complete and comprehensive?
107 | 3. Does it meet the intent of the instruction?
108 | 
109 | Respond ONLY with valid JSON in this exact format:
110 | {{
111 |     "is_complete": true/false,
112 |     "confidence": 0.0-1.0,
113 |     "reasoning": "Brief explanation of your evaluation",
114 |     "partial_completion": true/false
115 | }}
116 | 
117 | Your confidence should be:
118 | - 0.9-1.0: Very confident the instruction is completed
119 | - 0.7-0.8: Mostly confident but some minor gaps
120 | - 0.5-0.6: Partially completed with significant gaps
121 | - 0.0-0.4: Does not complete the instruction
122 | """
123 |         return prompt
124 |     
125 |     def _parse_llm_response(self, response_text: str) -> Dict[str, Any]:
126 |         """
127 |         Parse the LLM response and extract validation results.
128 |         
129 |         Args:
130 |             response_text: Raw text response from LLM
131 |             
132 |         Returns:
133 |             Parsed validation results
134 |         """
135 |         try:
136 |             # Clean and parse JSON response
137 |             cleaned_response = response_text.strip()
138 |             
139 |             # Handle potential markdown code blocks
140 |             if "```json" in cleaned_response:
141 |                 start = cleaned_response.find("```json") + 7
142 |                 end = cleaned_response.find("```", start)
143 |                 cleaned_response = cleaned_response[start:end].strip()
144 |             elif "```" in cleaned_response:
145 |                 start = cleaned_response.find("```") + 3
146 |                 end = cleaned_response.find("```", start)
147 |                 cleaned_response = cleaned_response[start:end].strip()
148 |             
149 |             result = json.loads(cleaned_response)
150 |             
151 |             # Validate required fields
152 |             required_fields = ['is_complete', 'confidence', 'reasoning', 'partial_completion']
153 |             for field in required_fields:
154 |                 if field not in result:
155 |                     raise ValueError(f"Missing required field: {field}")
156 |             
157 |             # Ensure confidence is between 0 and 1
158 |             result['confidence'] = max(0.0, min(1.0, float(result['confidence'])))
159 |             
160 |             return result
161 |             
162 |         except (json.JSONDecodeError, ValueError, KeyError) as e:
163 |             self.logger.error(f"Failed to parse LLM response: {str(e)}")
164 |             return {
165 |                 'is_complete': False,
166 |                 'confidence': 0.0,
167 |                 'reasoning': f"Failed to parse LLM response: {response_text[:100]}...",
168 |                 'partial_completion': False
169 |             }
170 |     
171 |     def batch_validate(self, instruction_match_pairs: list) -> list:
172 |         """
173 |         Validate multiple instruction-match pairs in batch.
174 |         
175 |         Args:
176 |             instruction_match_pairs: List of tuples (instruction, best_match)
177 |             
178 |         Returns:
179 |             List of validation results
180 |         """
181 |         results = []
182 |         for instruction, match in instruction_match_pairs:
183 |             result = self.validate_instruction_completion(instruction, match)
184 |             results.append(result)
185 |         
186 |         return results
187 |     
188 |     def list_available_models(self) -> list:
189 |         """
190 |         List available Ollama models.
191 |         
192 |         Returns:
193 |             List of available model names
194 |         """
195 |         try:
196 |             models = ollama.list()
197 |             return [model['model'].split(':')[0] for model in models['models']]
198 |         except Exception as e:
199 |             self.logger.error(f"Error listing models: {str(e)}")
200 |             return []
201 | 
202 | 
203 | # Example usage and convenience function
204 | def validate_instruction_match(instruction_description: str, 
205 |                              best_match: str,
206 |                              model: str = "llama3.2") -> Dict[str, Any]:
207 |     """
208 |     Convenience function to quickly validate if a best match completes an instruction.
209 |     
210 |     Args:
211 |         instruction_description: The instruction to validate against
212 |         best_match: The text that potentially completes the instruction
213 |         model: Ollama model to use (default: "llama2")
214 |         host: Ollama host URL (optional)
215 |         
216 |     Returns:
217 |         Validation result dictionary
218 |     """
219 |     validator = LLMInstructionValidator(model=model)
220 |     return validator.validate_instruction_completion(instruction_description, best_match)
221 | 


--------------------------------------------------------------------------------
/elsciRL/examples/environments/elsciRL_sailing.py:
--------------------------------------------------------------------------------
  1 | # Sailing Simulator
  2 | # - https://github.com/topics/sailing-simulator
  3 | # - Simple sailing simulator from https://github.com/PPierzc/ai-learns-to-sail
  4 | #   - https://github.com/PPierzc/ai-learns-to-sail/blob/master/tasks/channel.py
  5 | import io
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from elsciRL.examples.environments.sailing_image import SailingImageData
  9 | 
 10 | class Engine:
 11 |     """Defines the environment function from the generator engine.
 12 |        Expects the following:
 13 |         - reset() to reset the env a start position(s)
 14 |         - step() to make an action and update the game state
 15 |         - legal_moves_generator() to generate the list of legal moves
 16 |     """
 17 |     def __init__(self, local_setup_info:dict={}) -> None:
 18 |         """Initialize Engine"""
 19 |         #self.Environment = "Engine Initialization"
 20 |         self.x_limit = 10
 21 |         self.y_limit = local_setup_info["y_limit"]
 22 |         self.angle_limit = np.pi / 2
 23 |         self.supervised_rewards = local_setup_info["supervised_rewards"]
 24 |         # Precision parameter
 25 |         self.obs_precision = local_setup_info["obs_precision"]
 26 | 
 27 |         # Ledger of the environment with meta information for the problem
 28 |         ledger_required = {
 29 |             'id': 'Unique Problem ID',
 30 |             'type': 'Language/Numeric',
 31 |             'description': 'Problem Description',
 32 |             'goal': 'Goal Description'
 33 |             }
 34 |         
 35 |         ledger_optional = {
 36 |             'reward': 'Reward Description',
 37 |             'punishment': 'Punishment Description (if any)',
 38 |             'state': 'State Description',
 39 |             'constraints': 'Constraints Description',
 40 |             'action': 'Action Description',
 41 |             'author': 'Author',
 42 |             'year': 'Year',
 43 |             'render_data':{'render_mode':'rgb_array', 
 44 |                            'render_fps':4}
 45 |         }
 46 |         ledger_gym_compatibility = {
 47 |             # Limited to discrete actions for now, set to arbitrary large number if uncertain
 48 |             'action_space_size':2, 
 49 |         }
 50 |         self.ledger = ledger_required | ledger_optional | ledger_gym_compatibility
 51 |         # Initialize history
 52 |         self.action_history = []
 53 |         self.obs_history = []
 54 |         
 55 |         
 56 | 
 57 |     # --------------------------
 58 |     # Defined functions used by engine source
 59 |     @staticmethod
 60 |     def vel(theta, theta_0=0, theta_dead=np.pi / 12):
 61 |         return 1 - np.exp(-(theta - theta_0) ** 2 / theta_dead)
 62 |     
 63 |     @staticmethod
 64 |     def rew(theta, theta_0=0, theta_dead=np.pi / 12):
 65 |         return Engine.vel(theta, theta_0, theta_dead) * np.cos(theta)
 66 |     # --------------------------
 67 | 
 68 |     def reset(self, start_obs:str=None, render_dir:str=None):
 69 |         """Fully reset the environment."""
 70 |         # Allow reset to be at fixed start position or random
 71 |         if start_obs:
 72 |             self.x = np.round(float(start_obs.split('_')[0]),self.obs_precision)
 73 |             self.angle = np.round(float(start_obs.split('_')[1]),1)
 74 |         else:
 75 |             self.x = 0 #np.round(np.random.randint(-9.9, 9.9),4) # Changed to rand_int to reduce num of start states
 76 |             self.angle = 0  # always start with angle 0
 77 |         self.y = 0
 78 |         obs = "{n:.{d}f}".format(n=self.x, d=self.obs_precision)+'_'+"{:0.1f}".format(self.angle)
 79 |         
 80 |         if render_dir:
 81 |                 
 82 |             # SHOW PRETTY IMAGE OF PROBLEM
 83 |             raw_image = SailingImageData['data'].split(",")
 84 | 
 85 |             width = 240
 86 |             height = 300
 87 | 
 88 |             full_array = []
 89 |             column_counter = 0
 90 |             row = []
 91 |             pixel_counter = 0
 92 |             pixel_list = []
 93 |             for input_item in raw_image:
 94 |                 pixel_item = int(input_item.replace(" ",""))
 95 |                 if pixel_counter == 3:
 96 |                     # new pixel and reset pixel counter
 97 |                     pixel_counter = 0
 98 |                     pixel_list = []
 99 |                     # Add 3-d pixel to row
100 |                     if column_counter == width:
101 |                         # Add row to full array
102 |                         full_array.append(row)
103 |                         # new row and reset column counter
104 |                         column_counter = 0
105 |                         row = []
106 |                 
107 |                     row.append(pixel_list)
108 |                     column_counter+=1
109 |                 
110 |                 pixel_list.append(pixel_item)
111 |                 pixel_counter+=1
112 | 
113 |             render = np.array(full_array)
114 |             plt.imshow(render, interpolation='nearest')
115 |             plt.axis('off')
116 |             plt.title("Sailing Simulation \n Simple River with Fixed Wind Direction")
117 |             plt.show()
118 |             plt.pause(5)
119 |             plt.savefig(render_dir,bbox_inches='tight')
120 |             plt.close()
121 |         
122 |         return obs
123 | 
124 |     
125 |     def step(self, state:any=None, action:any=None):
126 |         """Enact an action."""
127 |         self.action_history.append(action)
128 |         a = [-0.1, 0.1][action]
129 |         # Observation space
130 |         self.x += np.round((Engine.vel(self.angle + a) * np.sin(self.angle + a)),self.obs_precision) # Round x to Ndp
131 |         self.y += np.round((Engine.vel(self.angle + a) * np.cos(self.angle + a)),4) # Round y to 4dp
132 |         self.angle = np.round(self.angle+a,1) 
133 |         #obs = str(self.x)+'_'+str(self.angle)
134 |         obs = "{n:.{d}f}".format(n=self.x, d=self.obs_precision)+'_'+"{:0.1f}".format(self.angle) # fix - https://docs.python.org/3.4/library/string.html#format-specification-mini-language
135 |         self.obs_history.append(obs)
136 |         # Reward signal
137 |         # - Added flag for whether we give agent immediate positive reward
138 |         # - Update: Added scale factor if using supervised rewards to not override goal rewards
139 |         if self.supervised_rewards=="True":
140 |             reward = Engine.rew(self.angle)/10
141 |         else:
142 |             reward = 0
143 | 
144 |         # Termination signal
145 |         # - Source: Terminal only on hitting piers/walls, otherwise continues to action limit
146 |         # - Update: Add terminal state if y > 25 (or another arbitrary value)
147 |         # - Update: Limit angle to [-90,90] degrees (i.e. no backwards sailing)
148 |         if np.abs(self.x)>self.x_limit:
149 |             reward = -1
150 |             terminated = True
151 |         elif np.abs(self.y)>self.y_limit:
152 |             reward = 1
153 |             terminated = True
154 |         elif np.abs(self.y)<0:
155 |             reward = -1
156 |             terminated = True
157 |         elif np.abs(self.angle)>self.angle_limit:
158 |             #print("\n \t - Angle limit reached")
159 |             reward = -1
160 |             terminated = True
161 |         else:
162 |             terminated = False
163 |         
164 |         return obs, reward, terminated, {}
165 | 
166 |     def legal_move_generator(self, obs:any=None):
167 |         """Define legal moves at each position"""
168 |         # Action space: [0,1] for turn slightly left or right
169 |         # - Kept as binary but might be better as continuous [-0.1, 0.1]
170 |         legal_moves = [0, 1]
171 |         return legal_moves
172 |     
173 |     def render(self, state:any=None):
174 |         """Render the environment."""            
175 |         #render = print("Current State: ", state, " | Action History: ", self.action_history)
176 |         # state = x_angle
177 |         x = self.x
178 |         y = self.y
179 |         angle = self.angle
180 |         # Angle is bearing into wind -pi/2 < angle < pi/2
181 |         if angle < np.pi/2:
182 |             U = np.sin(angle)
183 |             V = np.cos(angle)
184 |         elif angle == np.pi/2:
185 |             U = 1
186 |             V = 0
187 |         elif angle == -np.pi/2:
188 |             U = -1
189 |             V = 0
190 |         else:
191 |             U = np.sin(angle)
192 |             V = -np.cos(angle)
193 | 
194 |         DPI = 128
195 |         fig, ax = plt.subplots(figsize=(5,5), dpi = DPI)
196 |         ax.scatter(x,y,c='b',marker='x',alpha=1)
197 |         ax.quiver(x,y,U,V,angles='uv',scale_units='xy')
198 |         if y > 1:
199 |             ax.text(x+0.5,y-1,'Sailboat',color='b')
200 | 
201 |         # Draw wind direction
202 |         ax.quiver(0,25,0,-1,angles='uv',scale_units='xy',color='r')
203 |         ax.text(0,25.25,'Wind',color='r')
204 | 
205 | 
206 |         ax.plot([10,10],[0,25],'r')
207 |         ax.plot([-10,-10],[0,25],'r')
208 |         ax.set_title("Sailboat Position with Direction against Wind")
209 |         ax.set_xlabel("Horizontal Position (x)")
210 |         ax.set_ylabel("Vertical Position (y)")
211 |         # Save as rgba array 
212 |         # https://stackoverflow.com/questions/7821518/save-plot-to-numpy-array
213 |         
214 | 
215 |         fig.canvas.draw()
216 |         # data = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8)
217 |         # render = data.reshape(fig.canvas.get_width_height()[::-1] + (4,))
218 | 
219 |         buf = fig.canvas.buffer_rgba()
220 |         data = np.asarray(buf)
221 |         render = data.reshape(fig.canvas.get_width_height()[::-1] + (4,))
222 |         return render
223 | 
224 |     def close(self):
225 |         """Close/Exit the environment."""
226 |         print("Environment Closed")


--------------------------------------------------------------------------------
/elsciRL/interaction_loops/standard_gym.py:
--------------------------------------------------------------------------------
  1 | # TODO: Simplify and remove sub-goals/elsciRL tracking/live_env/exp sampling
  2 | import time
  3 | import numpy as np
  4 | from PIL import Image
  5 | from tqdm import tqdm
  6 | from gymnasium.wrappers import TimeLimit
  7 | # ------ Imports -----------------------------------------
  8 | # Agent Setup
  9 | from elsciRL.environment_setup.imports import ImportHelper
 10 | # Evaluation standards
 11 | from elsciRL.environment_setup.results_table import ResultsTable
 12 | from elsciRL.environment_setup.elsciRL_info import elsciRLInfo
 13 | # Non-gym interaction loop setup
 14 | from elsciRL.interaction_loops.standard import StandardInteractionLoop
 15 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir
 16 | 
 17 | 
 18 | def _apply_action_limit(env, max_steps: int | None):
 19 |     """Wrap env with a TimeLimit so runaway episodes truncate after max_steps."""
 20 | 
 21 |     if not max_steps or max_steps <= 0:
 22 |         return env
 23 |     if isinstance(env, TimeLimit):
 24 |         env._max_episode_steps = min(env._max_episode_steps, max_steps)
 25 |         return env
 26 |     try:
 27 |         return TimeLimit(env, max_episode_steps=max_steps)
 28 |     except Exception:
 29 |         # Fall back to manual attribute hints if wrapper fails (non-gym envs)
 30 |         setattr(env, "_elsci_max_episode_steps", max_steps)
 31 |         return env
 32 | 
 33 | def _normalize_render_stack(render_stack):
 34 |     """Convert renderer outputs to PIL Images so GIF saving works consistently."""
 35 | 
 36 |     normalized = []
 37 |     for frame in render_stack or []:
 38 |         if frame is None:
 39 |             continue
 40 |         if hasattr(frame, "save"):
 41 |             normalized.append(frame)
 42 |         elif isinstance(frame, np.ndarray):
 43 |             normalized.append(Image.fromarray(frame.astype(np.uint8)))
 44 |     return normalized
 45 | 
 46 | 
 47 | class GymInteractionLoop:
 48 |     """Interaction Loop for standard environments.
 49 |     REQUIRES:
 50 |         - Engine: Environment engine defined with elsciRLAI format
 51 |         - Adapters: Dictionary of local adapters with unique names: {"name_1": Adapter_1, "name_2": Adapter_2,...}
 52 |         - local_setup_info: Dictionary of local setup info (i.e. local config file)
 53 |     """
 54 |     def __init__(self, Engine, Adapters:dict, local_setup_info: dict):
 55 |         # Define agent type for interaction process, call alternative if not gym agent
 56 |         if local_setup_info['agent_type'].split('_')[0] == "SB3":
 57 |             self.gym_agent = True
 58 |             Imports = ImportHelper(local_setup_info)
 59 |             self.agent, self.agent_type, self.agent_name, self.agent_state_adapter = Imports.agent_info(Adapters)
 60 |             self.num_train_episodes, self.num_test_episodes, self.training_action_cap, self.testing_action_cap, self.reward_signal = Imports.parameter_info()  
 61 |             self.train = Imports.training_flag()
 62 |             # --- INIT env from engine
 63 |             self.env = Engine(local_setup_info)
 64 |             max_steps = self.training_action_cap if self.train else self.testing_action_cap
 65 |             self.env = _apply_action_limit(self.env, max_steps)
 66 |             self.start_obs = self.env.reset()
 67 |             # ---
 68 |             # --- PRESET elsciRL INFO
 69 |             # Agent
 70 |             # Training or testing phase flag
 71 |             # --- elsciRL
 72 |             self.live_env, self.observed_states, self.experience_sampling = Imports.live_env_flag()
 73 |             # Results formatting
 74 |             self.results = ResultsTable(local_setup_info)
 75 |             # elsciRL input function
 76 |             # - We only want to init trackers on first batch otherwise it resets knowledge
 77 |             self.elsciRL = elsciRLInfo(self.observed_states, self.experience_sampling)
 78 |         else:
 79 |             # --- Used for initialisation default interaction loop as alternative
 80 |             self.gym_agent = False
 81 |             self.interaction = StandardInteractionLoop(Engine, Adapters, local_setup_info)
 82 |             self.start_obs = self.interaction.start_obs
 83 |             self.results = ResultsTable(local_setup_info)
 84 | 
 85 |     def episode_loop(self, render:bool=False, render_save_dir:str=None):
 86 |         if self.gym_agent:
 87 |             # Mode selection (already initialized)
 88 |             if self.train:
 89 |                 number_episodes = self.num_train_episodes
 90 |             else:
 91 |                 number_episodes = self.num_test_episodes
 92 |             
 93 |             episode_render = []
 94 |             print("\n Episode Interaction Loop: ")
 95 |             if self.train:
 96 |                 for episode in tqdm(range(0, number_episodes)):
 97 |                     start_time = time.time()
 98 |                     # Can force the agent to train on a single episode
 99 |                     # Very time consuming to do this
100 |                     self.agent.learn(total_steps=self.training_action_cap)
101 |                     end_time = time.time()
102 |                     reward, actions, states, render_stack = self.agent.test(self.env, render=render)
103 |                     episode_render.append(render_stack)
104 |                     # Need to get values from actions
105 |                     # TODO: Ensure all agents output int directly to solve this
106 |                     if isinstance(actions[0], np.int64):
107 |                         actions = [action.item() for action in actions]
108 |                     elif isinstance(actions[0], np.ndarray):
109 |                         actions = [action.item() for action in actions]
110 | 
111 |                     
112 | 
113 |                     self.results.results_per_episode(self.agent_name, None, episode, len(actions), 
114 |                                                     reward, (end_time-start_time), actions, 0, 0)  
115 |             else:
116 |                 for episode in tqdm(range(0, number_episodes)):
117 |                     start_time = time.time()
118 |                     # Evaluate fixed policy on single episode
119 |                     reward, actions, states, render_stack = self.agent.test(self.env, render=render)
120 |                     # Need to get values from actions
121 |                     # TODO: Ensure all agents output int directly to solve this
122 |                     if isinstance(actions[0], np.int64):
123 |                         actions = [action.item() for action in actions]
124 |                     elif isinstance(actions[0], np.ndarray):
125 |                         actions = [action.item() for action in actions]
126 | 
127 |                     episode_render.append(render_stack)
128 |                     end_time = time.time()
129 |                     self.results.results_per_episode(self.agent_name, None, episode, len(actions), 
130 |                                                     reward, (end_time-start_time), actions, 0, 0)
131 |             table_results = self.results.results_table_format()
132 |             # Output GIF image of all episode frames
133 |             if render and render_stack:
134 |                 frames = _normalize_render_stack(render_stack)
135 |                 if frames:
136 |                     frames[0].save(
137 |                         render_save_dir + '/render.gif',
138 |                         save_all=True,
139 |                         append_images=frames[1:],
140 |                         optimize=False,
141 |                         duration=200,
142 |                         loop=1,
143 |                     )
144 |         else:
145 |             table_results = self.interaction.episode_loop()
146 |             self.agent = self.interaction.agent
147 |             self.results = self.interaction.results
148 |             self.elsciRL = self.interaction.elsciRL
149 | 
150 |         return table_results
151 | 
152 |     @staticmethod
153 |     def policy_rollout(
154 |         agent,
155 |         env,
156 |         agent_name: str,
157 |         num_episodes: int,
158 |         results_table,
159 |         render: bool = False,
160 |         render_save_dir: str | None = None,
161 |         action_limit: int | None = None,
162 |     ):
163 |         """Execute a pre-configured policy-gradient agent on a Gym env and log results."""
164 |         env = _apply_action_limit(env, action_limit)
165 |         episode_render = []
166 |         for episode in range(num_episodes):
167 |             start_time = time.time()
168 |             reward, actions, states, render_stack = agent.test(env, render=render)
169 |             end_time = time.time()
170 |             if actions:
171 |                 if isinstance(actions[0], np.int64):
172 |                     actions = [action.item() for action in actions]
173 |                 elif isinstance(actions[0], np.ndarray):
174 |                     actions = [action.item() for action in actions]
175 |             results_table.results_per_episode(
176 |                 agent_name,
177 |                 None,
178 |                 episode,
179 |                 len(actions),
180 |                 reward,
181 |                 (end_time - start_time),
182 |                 actions,
183 |                 0,
184 |                 0,
185 |             )
186 |             if render and render_stack:
187 |                 episode_render.extend(_normalize_render_stack(render_stack))
188 |         table_results = results_table.results_table_format()
189 |         if render and episode_render:
190 |             ensure_dir(render_save_dir or "renders")
191 |             episode_render[0].save(
192 |                 f"{render_save_dir or 'renders'}/{agent_name}_policy.gif",
193 |                 save_all=True,
194 |                 append_images=episode_render[1:],
195 |                 optimize=False,
196 |                 duration=200,
197 |                 loop=1,
198 |             )
199 |         return table_results


--------------------------------------------------------------------------------