├── elsciRL ├── agents │ ├── __init__.py │ ├── clean_rl │ │ └── __init__.py │ ├── LLM_agents │ │ └── agent_modelfiles │ │ │ └── llama3_2.modelfile │ ├── random_agent.py │ ├── agent_abstract.py │ ├── stable_baselines │ │ ├── SB3_DQN.py │ │ ├── SB3_PPO.py │ │ └── SB3_A2C.py │ └── DQN.py ├── examples │ ├── placeholder.png │ ├── sailing_setup.png │ ├── local_configs │ │ ├── gym_frozenlake_config_local.py │ │ └── sailing_config_local.py │ ├── Readme.md │ ├── experiment_config.py │ ├── adapters │ │ ├── gym_frozenlake_default.py │ │ ├── gym_frozenlake_language.py │ │ ├── elsciRL_sailing_default.py │ │ └── elsciRL_sailing_language.py │ ├── environments │ │ ├── gym_frozenlake.py │ │ └── elsciRL_sailing.py │ └── DemoExperiment.py ├── analysis │ ├── tabular_output.py │ ├── combined_tabular_results.py │ └── convergence_measure.py ├── experiments │ ├── experiment_utils │ │ ├── config_utils.py │ │ ├── render_current_results.py │ │ ├── policy_agent_factory.py │ │ ├── result_manager.py │ │ ├── env_manager.py │ │ └── agent_factory.py │ └── training_procedures │ │ ├── policy_gradient.py │ │ └── default_exp_training.py ├── adapters │ ├── __init__.py │ ├── LLM_state_generators │ │ ├── base_prompt.py │ │ └── text_gpt-4.1.py │ └── LLM_logic_generators │ │ ├── adapter_prompt.py │ │ └── ollama_adapter_generator.py ├── application_suite │ ├── search_agent.py │ ├── experiment_agent.py │ └── CACHE_README.md ├── encoders │ ├── encoder_abstract.py │ ├── __init__.py │ ├── poss_actions_encoded.py │ ├── observable_objects_encoded.py │ ├── prior_actions_encoded.py │ ├── poss_state_encoded.py │ └── language_transformers │ │ └── MiniLM_L6v2.py ├── environment_setup │ ├── instruction_reward_wrapper.py │ ├── imports.py │ ├── results_table.py │ ├── elsciRL_info.py │ └── gym_translator.py ├── GUI │ ├── templates │ │ └── _generic_agent_param_form.html │ ├── prerender_encoder.py │ ├── LLM_tools │ │ └── LLM_utils.py │ └── static │ │ └── app_setup.md ├── instruction_following │ ├── instr_utils │ │ └── elsciRL_instr_input.py │ └── LLM_instr_planner │ │ └── LLM_instr_validator.py ├── __init__.py ├── config_local.py ├── config.py └── interaction_loops │ ├── state_search.py │ ├── policy_gradient.py │ └── standard_gym.py ├── .github └── FUNDING.yml ├── requirements.txt ├── pyelsciRL.toml ├── setup.py ├── .gitignore └── tests └── test_policy_gradient_classroom.py /elsciRL/agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: pdfosborne 4 | -------------------------------------------------------------------------------- /elsciRL/agents/clean_rl/__init__.py: -------------------------------------------------------------------------------- 1 | from .ppo import CleanRLPPO 2 | 3 | __all__ = ["CleanRLPPO"] 4 | -------------------------------------------------------------------------------- /elsciRL/examples/placeholder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdfosborne/elsciRL/HEAD/elsciRL/examples/placeholder.png -------------------------------------------------------------------------------- /elsciRL/examples/sailing_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdfosborne/elsciRL/HEAD/elsciRL/examples/sailing_setup.png -------------------------------------------------------------------------------- /elsciRL/agents/LLM_agents/agent_modelfiles/llama3_2.modelfile: -------------------------------------------------------------------------------- 1 | FROM llama3.2 2 | 3 | # Set temperature to 0 for deterministic responses 4 | PARAMETER temperature 0 5 | 6 | # Set context length to 4000 tokens 7 | PARAMETER num_ctx 4000 8 | 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | numpy 3 | pandas 4 | matplotlib 5 | seaborn 6 | scipy>=1.10.1 7 | torch 8 | tqdm 9 | httpimport 10 | sentence-transformers 11 | gymnasium 12 | stable-baselines3 13 | ollama 14 | openai 15 | markdown 16 | pyboy -------------------------------------------------------------------------------- /elsciRL/examples/local_configs/gym_frozenlake_config_local.py: -------------------------------------------------------------------------------- 1 | LocalConfigData ={ 2 | "adapter_select": ["Default", "Language"], 3 | "training_action_cap": 100, 4 | "testing_action_cap":100, 5 | "reward_signal": [1,-0.01,-0.1], 6 | "sub_goal": "None" 7 | } -------------------------------------------------------------------------------- /elsciRL/examples/Readme.md: -------------------------------------------------------------------------------- 1 | # elsciRL Examples 2 | 3 | These are designed to be be run quickly to test installation. 4 | 5 | After installing elsciRL simply use the following Python commands: 6 | 7 | ```python 8 | import elsciRL.examples.experiment.DemoExperiment 9 | 10 | exp = DemoExperiment() 11 | 12 | exp.run() 13 | exp.evaluate() 14 | ``` -------------------------------------------------------------------------------- /elsciRL/examples/local_configs/sailing_config_local.py: -------------------------------------------------------------------------------- 1 | LocalConfigData = { 2 | "env_select":"simple_river", 3 | "adapter_select": ["Default", "Language"], 4 | "training_action_cap": 100, 5 | "testing_action_cap":100, 6 | "reward_signal": [0.5,0,-0.1], 7 | "sub_goal": "None", 8 | "supervised_rewards":"False", 9 | "y_limit":25, 10 | "obs_precision":2 11 | } -------------------------------------------------------------------------------- /elsciRL/analysis/tabular_output.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | class TabularOutput: 5 | def __init__(self, results_data, save_dir): 6 | self.results_data = results_data 7 | self.save_dir = save_dir 8 | self.num_episodes = np.max(results_data['episode']) 9 | 10 | def save_results(self): 11 | pd.DataFrame(self.results_data).to_csv(self.save_dir+'/results.csv') -------------------------------------------------------------------------------- /pyelsciRL.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "elscirl" 3 | version = "1.0.0" 4 | authors = [ 5 | { name="Philip Osborne", email="pdfosborne@gmail.com" }, 6 | ] 7 | description = "Applying the elsciRL architecture to Reinforcement Learning problems." 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | classifiers = [ 11 | "Programming Language :: Python :: 3", 12 | "License :: OSI Approved :: Apache-2.0 license", 13 | "Operating System :: OS Independent", 14 | ] 15 | 16 | [project.urls] 17 | "Homepage" = "https://github.com/pdfosborne/elscirl" 18 | "Bug Tracker" = "https://github.com/pdfosborne/elscirl/issues" -------------------------------------------------------------------------------- /elsciRL/examples/experiment_config.py: -------------------------------------------------------------------------------- 1 | ExperimentConfigData = { 2 | "name": "Example Experiment", 3 | "problem_type": "Examples", 4 | 5 | "number_training_episodes": 100, 6 | "number_training_repeats": 5, 7 | "number_training_seeds": 1, 8 | 9 | "test_agent_type":"best", 10 | "number_test_episodes": 25, 11 | "number_test_repeats": 5, 12 | 13 | "agent_select": ["Qlearntab", "Qlearntab"], 14 | "agent_parameters":{ 15 | "Qlearntab":{ 16 | "alpha": 0.1, 17 | "gamma": 0.95, 18 | "epsilon": 0.2, 19 | "epsilon_step":0.01 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/config_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | def ensure_dir(path): 5 | if not os.path.exists(path): 6 | os.makedirs(path) 7 | 8 | 9 | def load_config(config_path): 10 | with open(config_path, 'r') as f: 11 | if config_path.endswith('.json'): 12 | return json.load(f) 13 | # Add more config formats if needed 14 | raise ValueError("Unsupported config file format.") 15 | 16 | 17 | def merge_configs(config1, config2): 18 | # Simple dict merge, can be improved for deep merge 19 | merged = config1.copy() 20 | merged.update(config2) 21 | return merged 22 | -------------------------------------------------------------------------------- /elsciRL/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | import random 2 | from elsciRL.agents.agent_abstract import Agent 3 | import torch 4 | from torch import Tensor 5 | 6 | class RandomAgent(Agent): 7 | """This is simply a random decision maker, does not learn.""" 8 | def __init__(self): 9 | super().__init__() 10 | 11 | def policy(self, state: Tensor, legal_actions: list) -> str: 12 | action = random.choice(legal_actions) 13 | return action 14 | 15 | def learn(self, state: Tensor, next_state: Tensor, r_p: float, 16 | action_code: str) -> float: 17 | # Do nothing. 18 | return None 19 | 20 | def q_result(self): 21 | """Random agent has no knowledge.""" 22 | total_q = 0 23 | mean_q = 0 24 | return total_q, mean_q 25 | -------------------------------------------------------------------------------- /elsciRL/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Any 3 | 4 | class StateAdapter(ABC): 5 | def __init__(self, raw_state): 6 | super().__init__() 7 | # Define the fields that describe the state features: 8 | self.state: list = self._read(raw_state) 9 | 10 | @abstractmethod 11 | def _read(raw_state) -> list: 12 | # Read the data. 13 | # fill in the feature fields 14 | raise NotImplementedError 15 | 16 | def adapter(self): 17 | "Returns the adapted form, may require input flag for encoded or non-encoded output." 18 | 19 | 20 | def sample(self): 21 | """Returns a sample of an adapted state form (typically initial position of the environment).""" 22 | 23 | 24 | -------------------------------------------------------------------------------- /elsciRL/application_suite/search_agent.py: -------------------------------------------------------------------------------- 1 | class DefaultAgentConfig: 2 | def __init__(self): 3 | self.data ={ 4 | "name": "Default", 5 | "problem_type": "Default", 6 | 7 | "number_training_episodes": 1000, 8 | "number_training_repeats": 5, 9 | "number_training_seeds": 1, 10 | 11 | "test_agent_type":"best", 12 | "number_test_episodes": 200, 13 | "number_test_repeats": 10, 14 | 15 | "agent_select": ["Qlearntab"], 16 | "adapter_select": ["default"], 17 | "agent_parameters":{ 18 | "Qlearntab":{ 19 | "alpha": 0.1, 20 | "gamma": 0.95, 21 | "epsilon": 1, 22 | "epsilon_step":0 23 | }, 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /elsciRL/agents/agent_abstract.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Iterable, Hashable, Any 3 | from torch import Tensor 4 | 5 | class Agent(ABC): 6 | @abstractmethod 7 | def policy(self, **kwargs) -> str: 8 | pass 9 | 10 | def learn(self, **kwargs) -> str: 11 | pass 12 | 13 | class QLearningAgent(Agent): 14 | def policy(self, state:Tensor, game_over:bool, 15 | legal_actions:list, **kwargs) -> Hashable: 16 | pass 17 | 18 | def learn(self, state:Tensor, action:Hashable, next_state:Iterable[Any], 19 | immediate_reward:float, **kwargs): 20 | pass 21 | 22 | 23 | class LLMAgentAbstract(Agent): 24 | def policy(self, state:str, legal_actions:list, **kwargs) -> str: 25 | pass 26 | 27 | def learn(self, state:str, action:str, next_state:str, reward:float, **kwargs) -> str: 28 | pass 29 | 30 | -------------------------------------------------------------------------------- /elsciRL/encoders/encoder_abstract.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from typing import List, Dict 4 | from abc import ABC, abstractmethod 5 | from torch import Tensor 6 | 7 | class Encoder(ABC): 8 | @abstractmethod 9 | def encode(self, *args, **kwargs) -> Tensor: 10 | pass 11 | 12 | class StateEncoder(Encoder): 13 | tensor_cache: Dict[int, Tensor] = dict() 14 | tensor_cache_index: int = 0 15 | 16 | @staticmethod 17 | def cache_insert(t: Tensor): 18 | StateEncoder.tensor_cache[StateEncoder.tensor_cache_index] = t 19 | StateEncoder.tensor_cache_index += 1 20 | 21 | @staticmethod 22 | def cache_retrieve(offset: int, index: int): 23 | return StateEncoder.tensor_cache[offset + index] 24 | 25 | 26 | def encode(self, state:list = None, legal_actions:list = None, episode_action_history:str = None) -> Tensor: 27 | pass 28 | -------------------------------------------------------------------------------- /elsciRL/adapters/LLM_state_generators/base_prompt.py: -------------------------------------------------------------------------------- 1 | elsciRL_base_prompt = """ 2 | You are a helpful assistant that needs to describe the current state of a reinforcement learning environment to help an agent understand the context of the problem and how to act optimally. 3 | 4 | The state can be text but is typically a list of numbers, you will be provided with prior actions and their outcome states and should use this information to describe the current state. 5 | 6 | If no actions are provided, you should still describe the current state as best as you can. 7 | 8 | You will be provided with a list of legal actions that the agent can take in the current state, you should describe these actions in a way that is useful for the agent to understand what it can do. 9 | 10 | You do not need to provide any details about what the agent should do, just describe the current state and the legal actions available to the agent in a single paragraph with less than 200 words. 11 | 12 | 13 | """ -------------------------------------------------------------------------------- /elsciRL/environment_setup/instruction_reward_wrapper.py: -------------------------------------------------------------------------------- 1 | """Gym wrapper utilities for instruction-following reward shaping.""" 2 | from __future__ import annotations 3 | 4 | from typing import Callable, Dict, Optional 5 | 6 | import numpy as np 7 | 8 | from elsciRL.environment_setup.gym_wrapper_abstract import RewardWrapper 9 | 10 | 11 | class InstructionRewardWrapper(RewardWrapper): 12 | """Adds adapter-derived instruction rewards to a Gym environment.""" 13 | 14 | def __init__(self, env, reward_fn: Optional[Callable[[np.ndarray | None, Dict], float]] = None): 15 | super().__init__(env) 16 | self.reward_fn = reward_fn 17 | 18 | def reward(self, reward): 19 | if self.reward_fn is None: 20 | return reward 21 | obs = getattr(self.env, "last_obs", None) 22 | info = getattr(self.env, "last_info", {}) 23 | shaped_reward = self.reward_fn(obs, info) 24 | if shaped_reward is None: 25 | return reward 26 | return reward + shaped_reward 27 | -------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/render_current_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def render_current_result(training_setup, current_environment, current_agent, local_save_dir): 4 | """Apply fixed policy to render current decision making for limited number of episodes.""" 5 | # Override input training setups with previously saved 6 | 7 | test_setup_info = training_setup.copy() 8 | 9 | test_setup_info['train'] = False # Testing Phase 10 | test_setup_info['training_results'] = False 11 | test_setup_info['observed_states'] = False 12 | test_setup_info['experience_sampling'] = False 13 | print("----------") 14 | print("Rendering trained agent's policy:") 15 | 16 | env = current_environment 17 | # --- 18 | env.number_episodes = 1 # Only render 1 episode 19 | env.agent = current_agent 20 | env.agent.epsilon = 0 # Remove random actions 21 | # --- 22 | # Render results 23 | if not os.path.exists(local_save_dir): 24 | os.mkdir(local_save_dir) 25 | env.episode_loop(render=True, render_save_dir=local_save_dir) -------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/policy_agent_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Type 2 | 3 | from elsciRL.agents.stable_baselines.SB3_PPO import SB_PPO 4 | from elsciRL.agents.stable_baselines.SB3_A2C import SB_A2C 5 | from elsciRL.agents.stable_baselines.SB3_DQN import SB_DQN 6 | from elsciRL.agents.clean_rl.ppo import CleanRLPPO 7 | 8 | 9 | class PolicyAgentFactory: 10 | """Factory for Gym/PyTorch policy-gradient agents (SB3-backed).""" 11 | 12 | def __init__(self): 13 | self.agent_types: Dict[str, Type] = { 14 | "SB3_PPO": SB_PPO, 15 | "SB3_A2C": SB_A2C, 16 | "SB3_DQN": SB_DQN, 17 | "PPO": CleanRLPPO, 18 | } 19 | 20 | def register_agent(self, name: str, agent_cls: Type): 21 | self.agent_types[name] = agent_cls 22 | 23 | def create(self, agent_type: str, agent_parameters: Dict, env): 24 | if agent_type not in self.agent_types: 25 | raise ValueError(f"Unknown policy agent type: {agent_type}") 26 | agent_cls = self.agent_types[agent_type] 27 | # Most SB3 wrappers accept the env kwarg directly. 28 | return agent_cls(env=env, **agent_parameters) 29 | -------------------------------------------------------------------------------- /elsciRL/GUI/templates/_generic_agent_param_form.html: -------------------------------------------------------------------------------- 1 |
-------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/result_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | class ResultManager: 5 | """Handles saving, loading, and reporting of results.""" 6 | def __init__(self, analysis): 7 | self.analysis = analysis 8 | 9 | def save_results(self, results, save_dir, filename): 10 | os.makedirs(save_dir, exist_ok=True) 11 | path = os.path.join(save_dir, filename) 12 | results.to_csv(path) 13 | 14 | def load_results(self, path): 15 | # Assumes CSV for now 16 | import pandas as pd 17 | return pd.read_csv(path) 18 | 19 | def train_report(self, training_results, save_dir, show_figures): 20 | return self.analysis.train_report(training_results, save_dir, show_figures) 21 | 22 | def test_report(self, testing_results, save_dir, show_figures): 23 | return self.analysis.test_report(testing_results, save_dir, show_figures) 24 | 25 | def training_variance_report(self, save_dir, show_figures): 26 | return self.analysis.training_variance_report(save_dir, show_figures) 27 | 28 | def testing_variance_report(self, save_dir, show_figures): 29 | return self.analysis.testing_variance_report(save_dir, show_figures) 30 | -------------------------------------------------------------------------------- /elsciRL/instruction_following/instr_utils/elsciRL_instr_input.py: -------------------------------------------------------------------------------- 1 | class elsciRLInput: 2 | def __init__(self, description_lookup:dict=None): 3 | self.description_lookup = description_lookup 4 | # New: store descriptions provided so the user doesn't need to provide multiple times 5 | self.descriptions_stored:dict={} 6 | 7 | def user_input(self): 8 | instructions = [] 9 | instruction_descriptions = [] 10 | while True: 11 | instr = input("Please provide the current instruction... ([e/exit] to end path)") 12 | if (instr == "e")|(instr=="exit"): 13 | break 14 | 15 | if not self.description_lookup: 16 | if instr not in self.descriptions_stored: 17 | description = input("Please provide a description of the instruction...") 18 | else: 19 | print("Instruction description provided previously.") 20 | description = self.descriptions_stored[instr] 21 | if description == "None": 22 | description = instr 23 | 24 | instructions.append(instr) 25 | instruction_descriptions.append(description) 26 | self.descriptions_stored[instr] = description 27 | 28 | 29 | return instructions, instruction_descriptions -------------------------------------------------------------------------------- /elsciRL/application_suite/experiment_agent.py: -------------------------------------------------------------------------------- 1 | class DefaultAgentConfig: 2 | def __init__(self): 3 | self.data ={ 4 | "name": "Default", 5 | "problem_type": "Default", 6 | 7 | "instruction_chain": True, 8 | "instruction_chain_how": "continuous", 9 | 10 | "number_training_episodes": 1000, 11 | "number_training_repeats": 5, 12 | "number_training_seeds": 1, 13 | 14 | "test_agent_type":"best", 15 | "number_test_episodes": 200, 16 | "number_test_repeats": 10, 17 | 18 | "agent_select": ["Qlearntab"], 19 | "adapter_select": ["default"], 20 | "agent_parameters":{ 21 | "Qlearntab":{ 22 | "alpha": 0.1, 23 | "gamma": 0.95, 24 | "epsilon": 1, 25 | "epsilon_step":0 26 | }, 27 | "DQN":{ 28 | "learning_rate": 0.001, 29 | "gamma": 0.99, 30 | "epsilon": 1.0, 31 | "epsilon_min": 0.01, 32 | "epsilon_decay": 0.995, 33 | "memory_size": 10000, 34 | "batch_size": 64, 35 | "target_update": 10, 36 | "hidden_size": 128 37 | }, 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/env_manager.py: -------------------------------------------------------------------------------- 1 | from elsciRL.environment_setup.gym_translator import EngineToGym 2 | 3 | class EnvManager: 4 | """Handles environment setup and management.""" 5 | def __init__(self, interaction_loop_class, adapters): 6 | self.interaction_loop_class = interaction_loop_class 7 | self.adapters = adapters 8 | 9 | def create_env(self, Engine, Adapters, local_setup_info): 10 | return self.interaction_loop_class(Engine=Engine, Adapters=Adapters, local_setup_info=local_setup_info) 11 | 12 | def create_gym_env(self, Engine, Adapter, setup_info, wrappers=None): 13 | """Create a Gym environment from an elsciRL Engine and Adapter using gym_translator. 14 | 15 | Adapter can be either the adapter class itself or the lookup key registered in 16 | ``self.adapters``. Optional wrappers can be provided to post-process the created 17 | environment (e.g., to add reward shaping). 18 | """ 19 | adapter_cls = Adapter 20 | if not callable(Adapter): 21 | adapter_cls = self.adapters.get(Adapter) 22 | if adapter_cls is None: 23 | raise ValueError(f"Adapter '{Adapter}' not found when creating Gym environment.") 24 | 25 | gym_env = EngineToGym() 26 | gym_env.load(Engine, Adapter=adapter_cls, setup_info=setup_info) 27 | 28 | if wrappers: 29 | for wrapper in wrappers: 30 | gym_env = wrapper(gym_env) 31 | return gym_env 32 | -------------------------------------------------------------------------------- /elsciRL/__init__.py: -------------------------------------------------------------------------------- 1 | # Try to import modules, handle missing dependencies gracefully 2 | try: 3 | from .examples.DemoExperiment import DemoExperiment as Demo 4 | except ImportError as e: 5 | print(f"Warning: Could not import DemoExperiment: {e}") 6 | Demo = None 7 | 8 | try: 9 | from .GUI.app import app as App 10 | except ImportError as e: 11 | print(f"Warning: Could not import GUI app: {e}") 12 | App = None 13 | 14 | try: 15 | from .GUI.prerender import Prerender as get_prerender_data 16 | except ImportError as e: 17 | print(f"Warning: Could not import Prerender: {e}") 18 | get_prerender_data = None 19 | 20 | try: 21 | from .experiments.standard import Experiment as STANDARD_RL 22 | except ImportError as e: 23 | print(f"Warning: Could not import STANDARD_RL: {e}") 24 | STANDARD_RL = None 25 | 26 | try: 27 | from .instruction_following.elsciRL_instruction_search import elsciRLSearch as elsciRL_SEARCH 28 | except ImportError as e: 29 | print(f"Warning: Could not import elsciRL_SEARCH: {e}") 30 | elsciRL_SEARCH = None 31 | 32 | try: 33 | from .instruction_following.elsciRL_instruction_following import elsciRLOptimize as elsciRL_OPTIMIZE 34 | except ImportError as e: 35 | print(f"Warning: Could not import elsciRL_OPTIMIZE: {e}") 36 | elsciRL_OPTIMIZE = None 37 | 38 | try: 39 | from .analysis.combined_variance_visual import combined_variance_analysis_graph as COMBINED_VARIANCE_ANALYSIS_GRAPH 40 | except ImportError as e: 41 | print(f"Warning: Could not import COMBINED_VARIANCE_ANALYSIS_GRAPH: {e}") 42 | COMBINED_VARIANCE_ANALYSIS_GRAPH = None 43 | -------------------------------------------------------------------------------- /elsciRL/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import pandas as pd 4 | from typing import List, Dict, Iterable 5 | from abc import ABC, abstractmethod 6 | from elsciRL.adapters import StateAdapter 7 | from torch import Tensor 8 | 9 | 10 | class Encoder(ABC): 11 | @abstractmethod 12 | def encode(self, *args, **kwargs) -> Tensor: 13 | pass 14 | 15 | class StateEncoder(Encoder): 16 | tensor_cache: Dict[int, Tensor] = dict() 17 | tensor_cache_index: int = 0 18 | 19 | @staticmethod 20 | def cache_insert(t: Tensor): 21 | StateEncoder.tensor_cache[StateEncoder.tensor_cache_index] = t 22 | StateEncoder.tensor_cache_index += 1 23 | 24 | @staticmethod 25 | def cache_retrieve(offset: int, index: int): 26 | return StateEncoder.tensor_cache[offset + index] 27 | 28 | # index_objects are the complete list of adapter specific elements used to define the encoder's index 29 | def encode(self, index_objects:list=None, state:list = None, legal_actions:list = None, prior_action:str = None, 30 | opponent_action:str = None, indexed: bool = False) -> Tensor: 31 | pass 32 | 33 | 34 | class EncodedState(ABC): 35 | @abstractmethod 36 | def data() -> Iterable: 37 | raise NotImplementedError 38 | 39 | 40 | class StateConverter(ABC): 41 | def __init__(self, adapter: StateAdapter): 42 | super().__init__() 43 | # Calls the conversion procedure 44 | self.data: EncodedState = self.convert(adapter.s) 45 | 46 | 47 | def convert(state: list) -> EncodedState: 48 | pass -------------------------------------------------------------------------------- /elsciRL/encoders/poss_actions_encoded.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import List 3 | from torch import Tensor 4 | import numpy as np 5 | 6 | #from elsciRL.encoders.encoder_abstract import StateEncoder 7 | class PossibleActionsEncoder(): 8 | def __init__(self, all_possible_actions): 9 | self.all_possible_actions = all_possible_actions 10 | device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter 11 | self.vectors: Tensor = torch.cat([torch.eye(len(self.all_possible_actions)), torch.zeros(1, len(self.all_possible_actions))]).to(device) 12 | 13 | self.all_possible_actions_dict_init = {} 14 | for action in self.all_possible_actions: 15 | self.all_possible_actions_dict_init[action] = 0 16 | 17 | self.name = "PossibleActionsEncoder" 18 | self.input_type = "list" 19 | self.output_type = "tensor" 20 | self.output_dim = len(self.all_possible_actions)**2 21 | 22 | def encode(self, state: List[str] = None, legal_actions:list = None, episode_action_history:list = None, 23 | indexed: bool = False) -> Tensor: 24 | """Vector of possible actions.""" 25 | # Binary vector for all currently possible action to denote if it exists in all known possible actions 26 | all_possible_actions = self.all_possible_actions_dict_init.copy() 27 | for a,action in enumerate(legal_actions): 28 | all_possible_actions[action] = int(1) 29 | 30 | state_encoded = torch.tensor(list(all_possible_actions.values())) 31 | if (not indexed): 32 | state_encoded = self.vectors[state_encoded].flatten() 33 | 34 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/examples/adapters/gym_frozenlake_default.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | import pandas as pd 3 | import torch 4 | from torch import Tensor 5 | # StateAdapter includes static methods for adapters 6 | from elsciRL.encoders.poss_state_encoded import StateEncoder 7 | 8 | class DefaultAdapter: 9 | _cached_state_idx: Dict[str, int] = dict() 10 | 11 | def __init__(self, setup_info:dict={}): 12 | # NOTE: Update this based on the current problem, each requires preset 13 | # knowledge of all possible states/actions/objects 14 | # - Possible Atates 15 | # - Possible Actions 16 | # - Prior Actions 17 | # - Possible Objects 18 | 19 | # Initialise encoder based on all possilbe env states 20 | all_possible_states = [i for i in range(4*4)] 21 | self.encoder = StateEncoder(all_possible_states) 22 | 23 | def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor: 24 | """ Use Language name for every piece name for current board position """ 25 | 26 | # Encode to Tensor for agents 27 | if encode: 28 | state_encoded = self.encoder.encode(state=state) 29 | else: 30 | state_encoded = state 31 | 32 | if (indexed): 33 | state_indexed = list() 34 | for sent in state: 35 | if (sent not in DefaultAdapter._cached_state_idx): 36 | DefaultAdapter._cached_state_idx[sent] = len(DefaultAdapter._cached_state_idx) 37 | state_indexed.append(DefaultAdapter._cached_state_idx[sent]) 38 | 39 | state_encoded = torch.tensor(state_indexed) 40 | 41 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/examples/environments/gym_frozenlake.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | 3 | class Engine: 4 | """Defines the environment function from the generator engine. 5 | Expects the following: 6 | - reset() to reset the env a start position(s) 7 | - step() to make an action and update the game state 8 | - legal_moves_generator() to generate the list of legal moves 9 | """ 10 | def __init__(self, local_setup_info:dict={}) -> None: 11 | """Initialize Engine""" 12 | self.Environment = gym.make('FrozenLake-v1', desc=None, map_name="4x4", 13 | is_slippery=True, 14 | render_mode='rgb_array') 15 | 16 | def reset(self, start_obs:str=None): 17 | """Fully reset the environment.""" 18 | obs, info = self.Environment.reset() 19 | return obs 20 | 21 | 22 | def step(self, state:any, action:any): 23 | """Enact an action.""" 24 | # In problems where the agent can choose to reset the env 25 | if (state=="ENV_RESET")|(action=="ENV_RESET"): 26 | self.reset() 27 | 28 | obs, reward, terminated, truncated, info = self.Environment.step(action) 29 | return obs, reward, terminated, info 30 | 31 | def legal_move_generator(self, obs:any=None): 32 | """Define legal moves at each position""" 33 | legal_moves = [0,1,2,3] 34 | return legal_moves 35 | 36 | def render(self): 37 | """Render an image or text of the environment.""" 38 | return self.Environment.render() 39 | 40 | def close(self): 41 | """Close/Exit the environment.""" 42 | self.Environment.close() 43 | print("Environment Closed") 44 | -------------------------------------------------------------------------------- /elsciRL/adapters/LLM_logic_generators/adapter_prompt.py: -------------------------------------------------------------------------------- 1 | adapter_prompt = """ 2 | Your role is to generate pseudocode for an adapter function that will be used to transform the state of an environment into a form that can be used by an agent. 3 | 4 | Adapters unify problems into a standard form so any agent in the elsciRL library can be used. 5 | 6 | In short, it transforms the state to a new form, optionally adding more context and then outputting a tensor. 7 | 8 | inputs: state, legal moves, action history for episode 9 | outputs: tensor for the encoded form of the adapted state 10 | 11 | # numeric adapter (numeric.py) 12 | class DefaultAdapter(setup_info): 13 | def __init__(): 14 | # Determine discrete environment size: e.g. "4x4" => 16 positions 15 | # Initialize a StateEncoder for these positions 16 | # Optionally define an observation space (e.g., Discrete) needed for Gym agents 17 | 18 | def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False): 19 | # If encode=True, convert the numeric state to a tensor (StateEncoder) 20 | # If indexed=True, map states to integer IDs 21 | 22 | return tensor(state_encoded) 23 | 24 | # language adapter (language.py) 25 | class LanguageAdapter(setup_info): 26 | def __init__(): 27 | # Build obs_mapping dictionary describing each state as text 28 | # Initialize LanguageEncoder 29 | 30 | def adapter(state, legal_moves=[], episode_action_history=[], encode=True, indexed=False): 31 | # Convert numeric state ID to a text description (obs_mapping) 32 | # Optionally encode the text into a tensor (LanguageEncoder) 33 | # Optionally map each unique description to an indexed ID 34 | 35 | return tensor(state_encoded) 36 | 37 | """ -------------------------------------------------------------------------------- /elsciRL/encoders/observable_objects_encoded.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import List 3 | from torch import Tensor 4 | 5 | from elsciRL.encoders.encoder_abstract import StateEncoder 6 | 7 | class ObjectEncoder(): 8 | def __init__(self, local_objects): 9 | """Encoder for default state representation produced by the environment/engine.""" 10 | self.local_objects = {obj: i for i, obj in enumerate(local_objects)} 11 | device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter 12 | self.vectors: Tensor = torch.cat([torch.eye(len(self.local_objects)), torch.zeros(1, len(self.local_objects))]).to(device) # tensor needs to be defined to len(local_object) 13 | self.name = "ObjectEncoder" 14 | self.input_type = "list" 15 | self.output_type = "tensor" 16 | self.output_dim = len(self.local_objects)**2 17 | 18 | def encode(self, state:list = None, legal_actions:list = None, episode_action_history:list = None, 19 | indexed: bool = False) -> Tensor: 20 | """ NO CHANGE - Board itself is used as state as is and simply converted to a vector""" 21 | # Goes through every item in state and labels based on the known objects available in the environment 22 | # New vector encoded form, for Chess: 64x12 flattened into 768x1 int vector to denote object occurance 23 | # NOT BINARY vector, value is the occurance of each object type. 24 | # -> In chess this happens to be [1 or 0] because you cant have more than one piece in each position. 25 | state_encoded: Tensor = torch.tensor([self.local_objects.get(state_pos, len(self.local_objects)) for state_pos in state], 26 | device=self.vectors.device) 27 | 28 | if (not indexed): 29 | state_encoded = self.vectors[state_encoded].flatten() 30 | 31 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/encoders/prior_actions_encoded.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import List 3 | from torch import Tensor 4 | import numpy as np 5 | 6 | #from elsciRL.encoders.encoder_abstract import StateEncoder 7 | class PriorActionsEncoder(): 8 | def __init__(self, all_possible_actions): 9 | self.all_possible_actions = all_possible_actions 10 | device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter 11 | self.vectors: Tensor = torch.cat([torch.eye(len(self.all_possible_actions)), torch.zeros(1, len(self.all_possible_actions))]).to(device) 12 | 13 | self.all_possible_actions_dict_init = {} 14 | for action in self.all_possible_actions: 15 | self.all_possible_actions_dict_init[action] = int(0) 16 | 17 | self.name = "PriorActionsEncoder" 18 | self.input_type = "list" 19 | self.output_type = "tensor" 20 | self.output_dim = len(self.all_possible_actions)**2 21 | 22 | def encode(self, state: List[str] = None, legal_actions:list = None, episode_action_history:list = None, 23 | indexed: bool = False) -> Tensor: 24 | """Vector of prio actions in game so far, similar to blindfold chess.""" 25 | # STATE ENCODER 26 | # - Updated to use all possible actions for consistency with poss action encoder and generally more suitable 27 | # - Chess has loads of possible actions which is somewhat unique to the problem 28 | # - BUT order must be preserved in the prior action encoder 29 | all_possible_actions = self.all_possible_actions_dict_init.copy() 30 | for a,action in enumerate(episode_action_history): 31 | all_possible_actions[action] = int(a) 32 | 33 | state_encoded = torch.tensor(list(all_possible_actions.values())) 34 | if (not indexed): 35 | state_encoded = self.vectors[state_encoded].flatten() 36 | 37 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/examples/adapters/gym_frozenlake_language.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | import pandas as pd 3 | import torch 4 | from torch import Tensor 5 | # StateAdapter includes static methods for adapters 6 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder 7 | 8 | class LanguageAdapter: 9 | _cached_state_idx: Dict[str, int] = dict() 10 | 11 | def __init__(self, setup_info:dict={}): 12 | # Language encoder doesn't require any preset knowledge of env to use 13 | self.encoder = LanguageEncoder() 14 | self.obs_mapping = {0:'You are at the start position.', 1:'You are on ice.', 2:'You are on ice.', 3:'You are on ice.', 15 | 4:'You are on ice.', 5:'You fell through a hole in the ice!', 6:'You are on ice.', 7:'You fell through a hole in the ice!', 16 | 8:'You are on ice.', 9:'You are on ice.', 10:'You are on ice.', 11:'You fell through a hole in the ice!', 17 | 12:'You fell through a hole in the ice!', 13:'You are on ice.', 14:'You are on ice.', 15:'You found the chest!'} 18 | self.key_found = False 19 | 20 | def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor: 21 | """ Use Language name for every piece name for current board position """ 22 | # --- 23 | # Convert to lanugage 24 | state = self.obs_mapping[state] 25 | # --- 26 | 27 | # Encode to Tensor for agents 28 | if encode: 29 | state_encoded = self.encoder.encode(state=state) 30 | else: 31 | state_encoded = state 32 | 33 | if (indexed): 34 | state_indexed = list() 35 | for sent in state: 36 | if (sent not in LanguageAdapter._cached_state_idx): 37 | LanguageAdapter._cached_state_idx[sent] = len(LanguageAdapter._cached_state_idx) 38 | state_indexed.append(LanguageAdapter._cached_state_idx[sent]) 39 | 40 | state_encoded = torch.tensor(state_indexed) 41 | 42 | return state_encoded -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # from distutils.core import setup 2 | from setuptools import setup, find_packages 3 | 4 | setup( 5 | name='elsciRL', 6 | version='0.4.0', 7 | packages=[ 8 | 'elsciRL', 9 | 'elsciRL.adapters', 10 | 'elsciRL.adapters.LLM_state_generators', 11 | 'elsciRL.agents', 12 | 'elsciRL.agents.LLM_agents', 13 | 'elsciRL.agents.stable_baselines', 14 | 'elsciRL.analysis', 15 | 'elsciRL.application_suite', 16 | 'elsciRL.encoders', 17 | 'elsciRL.encoders.language_transformers', 18 | 'elsciRL.environment_setup', 19 | 'elsciRL.evaluation', 20 | 'elsciRL.examples', 21 | 'elsciRL.examples.adapters', 22 | 'elsciRL.examples.environments', 23 | 'elsciRL.examples.local_configs', 24 | 'elsciRL.experiments', 25 | 'elsciRL.experiments.experiment_utils', 26 | 'elsciRL.experiments.training_procedures', 27 | 'elsciRL.GUI', 28 | 'elsciRL.GUI.static', 29 | 'elsciRL.GUI.templates', 30 | 'elsciRL.experiments', 31 | 'elsciRL.instruction_following', 32 | 'elsciRL.instruction_following.LLM_instr_planner', 33 | 'elsciRL.instruction_following.instr_utils', 34 | 'elsciRL.interaction_loops', 35 | 'elsciRL.published_experiments', 36 | ], 37 | package_data={ 38 | 'elsciRL.GUI.templates': ['index.html', '_generic_agent_param_form.html'], 39 | 'elsciRL.GUI.static': ['styles.css', 'app_setup.md'], 40 | }, 41 | include_package_data=True, 42 | url='https://github.com/pdfosborne/elsciRL', 43 | license='Apache-2.0 license', 44 | author='Philip Osborne', 45 | author_email='pdfosborne@gmail.com', 46 | description='Apply language solutions to Reinforcement Learning problems.', 47 | install_requires=[ 48 | 'numpy', 49 | 'pandas', 50 | 'matplotlib', 51 | 'seaborn', 52 | 'scipy>=1.10.1', 53 | 'torch', 54 | 'tqdm', 55 | 'httpimport', 56 | 'sentence-transformers', 57 | 'gymnasium', 58 | 'stable-baselines3', 59 | 'flask', 60 | 'ollama', 61 | 'markdown', 62 | ] 63 | ) 64 | -------------------------------------------------------------------------------- /elsciRL/encoders/poss_state_encoded.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import List, Any 3 | from torch import Tensor 4 | from tqdm import tqdm 5 | from elsciRL.encoders.encoder_abstract import StateEncoder 6 | 7 | class StateEncoder(StateEncoder): 8 | def __init__(self, num_states): 9 | """Encoder for default state representation produced by the environment/engine.""" 10 | # Create dict lookup 11 | # - get binary list that indexes the state e.g. 0_0 -> [1,0,0,0] or 0_3 -> [0,0,0,1] 12 | # UPDATED - Now uses torch.nn.functional.one_hot for one-hot encoding 13 | # Using one-hot encoder is incredibly inefficient for large state spaces 14 | # Instead, we consider using an index-based encoding where each unique state is assigned a unique index. 15 | self.device = "cuda" if torch.cuda.is_available() else "cpu" # Make this optional choice with parameter 16 | self.vectors: Tensor = torch.cat([torch.eye(num_states), torch.zeros(1,num_states)]).to(self.device) # tensor needs to be defined to len(local_object) 17 | self.name = "StateEncoder" 18 | self.input_type = "list" 19 | self.output_type = "tensor" 20 | self.output_dim = num_states 21 | 22 | self.encoder = {} 23 | self.encoder_idx = 0 24 | self.num_states = num_states 25 | 26 | def encode(self, state:Any = None, legal_actions:list = None, episode_action_history:list = None, 27 | indexed: bool = False) -> Tensor: 28 | """ Set of all possible states are simply converted to a vector""" 29 | # One hot encode the state if it is not already indexed 30 | if state not in self.encoder: 31 | state_encoded = self.encoder_idx # Use the index as the state encoded value 32 | # Store the encoded state in the encoder dictionary 33 | self.encoder[state] = state_encoded 34 | # Increment the encoder index for the next unique state 35 | self.encoder_idx += 1 36 | else: 37 | state_encoded = self.encoder[state] 38 | 39 | # If indexed, use one-hot encoding 40 | # If not indexed, use the unique index to retrieve the vector 41 | if indexed: 42 | state_encoded = torch.nn.functional.one_hot(torch.tensor(state_encoded), num_classes=self.num_states).float().to(self.device) 43 | else: 44 | state_encoded = self.vectors[int(state_encoded)].flatten() 45 | 46 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/config_local.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | # Define Agent's parameters for problem 5 | # Opponent is considered a 'local' specification as benchmarks vary between setting 6 | 7 | class LocalConfig: 8 | def __init__(self, config_file_path: str): 9 | if (config_file_path): 10 | with open(config_file_path) as config_file: 11 | self.data = json.load(config_file) 12 | self.config_file_path = config_file_path 13 | 14 | else: 15 | self.data = dict() 16 | self.config_path = "" 17 | logging.info("No arguments given, using default configuration...") 18 | 19 | def __getitem__(self, key: str): 20 | item = None 21 | 22 | if (key in self.__dict__): 23 | item = self.__dict__[key] 24 | else: 25 | item = self.data[key] 26 | 27 | return item 28 | 29 | #TODO this is not universal at all !!! 30 | class ProblemConfig(LocalConfig): 31 | """Local Config is used to define any problem specific parameters.""" 32 | def __init__(self, config_path: str): 33 | super(ProblemConfig, self).__init__(config_path) 34 | # State form 35 | self.adapter_select = self.data.get("adapter_select", [""]) 36 | # Enabled agent to be trained against multiple opponents in order provided 37 | self.training_opponent_agent = self.data.get( 38 | "training_opponent_agent", "") 39 | self.testing_opponent_agent = self.data.get( 40 | "testing_opponent_agent", "") 41 | 42 | self.training_setup = self.data.get("training_setup",'default') 43 | self.testing_setup = self.data.get("testing_setup",'default') 44 | 45 | self.training_action_cap = self.data.get("training_action_cap",1000) # Arbitrary number to ensure games dont last forever 46 | self.testing_action_cap = self.data.get("testing_action_cap",1000) # Arbitrary number to ensure games dont last forever 47 | # Reward Signal, should be consistent between all agent being compared 48 | self.reward_signal = self.data.get("reward_signal",[1,-0.1,0,0] )# [Value of winning, Value for draw, Value for each action, Value for reaching new state] 49 | # Sub-Goal Defined 50 | self.sub_goal = self.data.get("sub_goal",None) 51 | 52 | class ConfigSetup(LocalConfig): 53 | def __init__(self, config_dir: str): 54 | super(ConfigSetup, self).__init__(config_dir) 55 | self.state_configs = ProblemConfig(os.path.join(config_dir)) -------------------------------------------------------------------------------- /elsciRL/environment_setup/imports.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from elsciRL.agents.agent_abstract import Agent, QLearningAgent, LLMAgentAbstract 3 | 4 | class ImportHelper: 5 | def __init__(self, local_setup_info:dict={}) -> None: 6 | self.setup_info = local_setup_info 7 | 8 | def agent_info(self, STATE_ADAPTER_TYPES:dict={}): 9 | agent: Agent | QLearningAgent | LLMAgentAbstract = self.setup_info['agent'] 10 | agent_type: str = self.setup_info['agent_type'] 11 | agent_name: str = self.setup_info['agent_name'] 12 | if self.setup_info['adapter_select'] in STATE_ADAPTER_TYPES: 13 | agent_state_adapter = STATE_ADAPTER_TYPES[self.setup_info['adapter_select']](setup_info=self.setup_info) 14 | else: 15 | print(f"Adapter {self.setup_info['adapter_select']} not found in STATE_ADAPTER_TYPES.") 16 | print(STATE_ADAPTER_TYPES) 17 | agent_state_adapter = '' 18 | return agent, agent_type, agent_name, agent_state_adapter 19 | 20 | def parameter_info(self): 21 | num_train_episodes: int = self.setup_info['number_training_episodes'] 22 | num_test_episodes: int = self.setup_info['number_test_episodes'] 23 | try: 24 | training_action_cap: int = self.setup_info['training_action_cap'] 25 | testing_action_cap: int = self.setup_info['testing_action_cap'] 26 | except: 27 | if 'action_limit' in self.setup_info: 28 | training_action_cap: int = self.setup_info['action_limit'] 29 | testing_action_cap: int = self.setup_info['action_limit'] 30 | elif 'action_cap' in self.setup_info: 31 | training_action_cap: int = self.setup_info['action_cap'] 32 | testing_action_cap: int = self.setup_info['action_cap'] 33 | else: 34 | print('No action cap specified, using default values') 35 | training_action_cap: int = 1000 36 | testing_action_cap: int = 1000 37 | reward_signal: List[int] = self.setup_info['reward_signal'] 38 | 39 | return num_train_episodes, num_test_episodes, training_action_cap, testing_action_cap, reward_signal 40 | 41 | def training_flag(self): 42 | train: bool = self.setup_info['train'] 43 | return train 44 | 45 | def live_env_flag(self): 46 | live_env: bool = self.setup_info['live_env'] 47 | observed_states: bool = self.setup_info['observed_states'] 48 | #experience_sampling: bool = self.setup_info['experience_sampling'] 49 | return live_env, observed_states -------------------------------------------------------------------------------- /elsciRL/agents/stable_baselines/SB3_DQN.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import numpy as np 4 | from elsciRL.agents.agent_abstract import QLearningAgent 5 | import gymnasium as gym 6 | from stable_baselines3 import DQN 7 | from stable_baselines3.common.evaluation import evaluate_policy 8 | from PIL import Image # Used to generate GIF 9 | 10 | class SB_DQN(QLearningAgent): 11 | def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate:float=0.0001, buffer_size:int=1000000): 12 | self.epsilon: int = 0 # Not used currently but required for compatibility 13 | self.device = "auto" if torch.cuda.is_available() else "cpu" 14 | self.dqn = DQN(policy, env, verbose=0, device=self.device, 15 | learning_rate=learning_rate, buffer_size=buffer_size) 16 | if torch.cuda.is_available(): 17 | print("---- Using GPU ----") 18 | print("Device:", self.dqn.device) 19 | 20 | def policy(self, state: any) -> str: 21 | return self.dqn.predict(state) 22 | 23 | def learn(self, total_steps:int=100) -> float: 24 | self.dqn.learn(total_timesteps=total_steps) 25 | 26 | def test(self, env, render:bool=False): 27 | #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1) 28 | vec_env = self.dqn.get_env() 29 | obs = vec_env.reset() 30 | 31 | actions = [] 32 | states = [] 33 | 34 | done = False 35 | render_stack = [] 36 | if render: 37 | render_stack.append( 38 | Image.fromarray(vec_env.render().astype('uint8')) 39 | ) 40 | while not done: 41 | action, _state = self.dqn.predict(obs, deterministic=True) 42 | if isinstance(action, np.int64): 43 | actions.append(action.item()) 44 | else: 45 | actions.append(action[0]) 46 | #actions.append(action[0]) 47 | 48 | obs, r, done, info = vec_env.step(action) 49 | states.append(info[0]['obs']) 50 | if render: 51 | render_stack.append( 52 | Image.fromarray(vec_env.render().astype('uint8')) 53 | ) 54 | 55 | #vec_env.render("human") 56 | episode_reward = info[0]['episode']['r'] 57 | if episode_reward > 0.5: 58 | print("----> ", episode_reward) 59 | 60 | return episode_reward, actions, states, render_stack 61 | 62 | def q_result(self): 63 | results = [0,0] 64 | total_q = results[0] 65 | mean_q = results[1] 66 | return total_q, mean_q 67 | 68 | def clone(self): 69 | clone = pickle.loads(pickle.dumps(self)) 70 | return clone -------------------------------------------------------------------------------- /elsciRL/agents/stable_baselines/SB3_PPO.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import numpy as np 4 | from elsciRL.agents.agent_abstract import QLearningAgent 5 | import gymnasium as gym 6 | from stable_baselines3 import PPO 7 | from stable_baselines3.common.evaluation import evaluate_policy 8 | from PIL import Image # Used to generate GIF 9 | 10 | class SB_PPO(QLearningAgent): 11 | def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate:float=0.0003, n_steps:int=2048): 12 | self.epsilon: int = 0 # Not used currently but required for compatibility 13 | self.device = "auto" if torch.cuda.is_available() else "cpu" 14 | self.ppo = PPO(policy, env, verbose=0, device=self.device, 15 | learning_rate=learning_rate, n_steps=n_steps) 16 | if torch.cuda.is_available(): 17 | print("---- Using GPU ----") 18 | print("Device:", self.ppo.device) 19 | 20 | def policy(self, state: any) -> str: 21 | return self.ppo.predict(state) 22 | 23 | def learn(self, total_steps:int=100) -> float: 24 | self.ppo.learn(total_timesteps=total_steps) 25 | 26 | def test(self, env, render:bool=False): 27 | #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1) 28 | vec_env = self.ppo.get_env() 29 | obs = vec_env.reset() 30 | 31 | actions = [] 32 | states = [] 33 | 34 | done = False 35 | render_stack = [] 36 | if render: 37 | render_stack.append( 38 | Image.fromarray(vec_env.render().astype('uint8')) 39 | ) 40 | while not done: 41 | action, _state = self.ppo.predict(obs, deterministic=True) 42 | if isinstance(action, np.int64): 43 | actions.append(action.item()) 44 | else: 45 | actions.append(action[0]) 46 | #actions.append(action[0]) 47 | 48 | obs, r, done, info = vec_env.step(action) 49 | states.append(info[0]['obs']) 50 | if render: 51 | render_stack.append( 52 | Image.fromarray(vec_env.render().astype('uint8')) 53 | ) 54 | 55 | #vec_env.render("human") 56 | episode_reward = info[0]['episode']['r'] 57 | if episode_reward > 0.5: 58 | print("----> ", episode_reward) 59 | 60 | return episode_reward, actions, states, render_stack 61 | 62 | def q_result(self): 63 | results = [0,0] 64 | total_q = results[0] 65 | mean_q = results[1] 66 | return total_q, mean_q 67 | 68 | def clone(self): 69 | clone = pickle.loads(pickle.dumps(self)) 70 | return clone 71 | 72 | -------------------------------------------------------------------------------- /elsciRL/encoders/language_transformers/MiniLM_L6v2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from multiprocessing.spawn import import_main_path 4 | from typing import Dict, List, Tuple 5 | from collections import Counter 6 | from gymnasium.spaces import Box 7 | 8 | from torch import Tensor 9 | from elsciRL.encoders.encoder_abstract import StateEncoder 10 | 11 | # Language Encoder 12 | from sentence_transformers import SentenceTransformer 13 | 14 | 15 | 16 | class LanguageEncoder(StateEncoder): 17 | """Required Language Model included in requisite packages.""" 18 | _cached_enc: Dict[str, Tensor] = dict() 19 | _cached_freq: Counter = Counter() 20 | 21 | def __init__(self, device: str = None): 22 | autodev = "cuda" if torch.cuda.is_available() else "cpu" 23 | self.device = device if device else autodev 24 | self.sentence_model: SentenceTransformer = SentenceTransformer('all-MiniLM-L6-v2', device=self.device) 25 | low_array = [-1 for i in range(384)] 26 | high_array = [1 for i in range(384)] 27 | self.observation_space = Box(low=np.array(low_array), high=np.array(high_array), dtype=np.float32) 28 | self.name = "MiniLM_L6v2" 29 | self.input_type = "text" 30 | self.output_type = "tensor" 31 | self.output_dim = 384 32 | 33 | def encode(self, state: str|List[str], legal_actions:list = None, episode_action_history:list = None, 34 | indexed: bool = False, progress_bar:bool=False) -> Tensor: 35 | 36 | # I think typing is overriding the input type anyway -> need to ensure sentences are split up 37 | if type(state) == str: 38 | state = [state] 39 | # state = state.split(".") 40 | # state = [s for s in state if s.strip()] 41 | if (len(state) == 0): 42 | state = [""] 43 | to_encode = [sent for sent in state if sent not in LanguageEncoder._cached_enc] 44 | if (to_encode): 45 | # Show progress bar if state is a list of strings 46 | encoded = self.sentence_model.encode(to_encode, batch_size=256, convert_to_tensor=True, show_progress_bar=progress_bar) 47 | LanguageEncoder._cached_enc.update({to_encode[i]: encoded[i] for i in range(len(to_encode))}) 48 | 49 | LanguageEncoder._cached_freq.update(state) 50 | LanguageEncoder._cached_freq.subtract(LanguageEncoder._cached_freq.keys()) 51 | state_encoded = torch.stack([LanguageEncoder._cached_enc[sent] for sent in state]) 52 | 53 | if (len(LanguageEncoder._cached_freq) > 10000): 54 | for key, freq in list(reversed(LanguageEncoder._cached_freq.most_common()))[:2000]: 55 | del LanguageEncoder._cached_enc[key] 56 | del LanguageEncoder._cached_freq[key] 57 | 58 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/agents/stable_baselines/SB3_A2C.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import numpy as np 4 | from elsciRL.agents.agent_abstract import QLearningAgent 5 | import gymnasium as gym 6 | from stable_baselines3 import A2C 7 | from PIL import Image # Used to generate GIF 8 | 9 | 10 | class SB_A2C(QLearningAgent): 11 | def __init__(self, policy:str='MlpPolicy', env:gym.Env = None, learning_rate=0.0007, n_steps=500): 12 | self.epsilon: int = 0 # Not used currently but required for compatibility 13 | self.device = "auto" if torch.cuda.is_available() else "cpu" # A2C is meant to be run primarily on the CPU, especially when you are not using a CNN. 14 | self.a2c = A2C(policy, env, verbose=0, device="cpu", 15 | learning_rate=learning_rate, n_steps=n_steps) 16 | if torch.cuda.is_available(): 17 | print("---- A2C is meant to be run primarily on the CPU ----") 18 | print("Device:", self.a2c.device) 19 | 20 | def policy(self, state: any) -> str: 21 | # TODO: make sure output is int 22 | return self.a2c.predict(state) 23 | 24 | def learn(self, total_steps:int=100) -> float: 25 | self.a2c.learn(total_timesteps=total_steps) 26 | 27 | def test(self, env, render:bool=False): 28 | #mean_reward, std_reward = evaluate_policy(self.a2c, env, n_eval_episodes=1) 29 | # Using environment from agent may limit episodes based on prior experience 30 | #vec_env = self.a2c.get_env() 31 | 32 | vec_env = env 33 | obs, info = vec_env.reset() 34 | 35 | actions = [] 36 | states = [] 37 | 38 | done = False 39 | episode_reward = 0 40 | render_stack = [] 41 | if render: 42 | render_stack.append( 43 | Image.fromarray(vec_env.render().astype('uint8')) 44 | ) 45 | while not done: 46 | action, _state = self.a2c.predict(obs, deterministic=True) 47 | if isinstance(action, np.int64): 48 | actions.append(action.item()) 49 | else: 50 | actions.append(action) 51 | # actions.append(int(action)) 52 | obs, r, done, truncated, info = vec_env.step(action) 53 | episode_reward += r 54 | if render: 55 | render_stack.append(Image.fromarray(vec_env.render().astype('uint8'))) 56 | 57 | #states.append(info[0]['obs']) 58 | states.append(info['obs']) 59 | #vec_env.render("human") 60 | 61 | #episode_reward = info[0]['episode']['r'] 62 | if episode_reward > 0.5: 63 | print("----> ", episode_reward) 64 | 65 | return episode_reward, actions, states, render_stack 66 | 67 | def q_result(self): 68 | results = [0,0] 69 | total_q = results[0] 70 | mean_q = results[1] 71 | return total_q, mean_q 72 | 73 | def clone(self): 74 | clone = pickle.loads(pickle.dumps(self)) 75 | return clone 76 | 77 | -------------------------------------------------------------------------------- /elsciRL/experiments/experiment_utils/agent_factory.py: -------------------------------------------------------------------------------- 1 | class AgentFactory: 2 | """Factory for creating agent instances based on type name and parameters.""" 3 | def __init__(self, adapters, setup_info): 4 | from elsciRL.agents.table_q_agent import TableQLearningAgent 5 | from elsciRL.agents.DQN import DQNAgent 6 | from elsciRL.agents.LLM_agents.ollama_agent import LLMAgent as OllamaAgent 7 | self.adapters = adapters 8 | self.agent_types = { 9 | "Qlearntab": TableQLearningAgent, 10 | "DQN": DQNAgent, 11 | "LLM_Ollama": OllamaAgent, 12 | } 13 | self.setup_info = setup_info 14 | 15 | def register_agent(self, name, agent_class): 16 | self.agent_types[name] = agent_class 17 | 18 | def create(self, agent_type, agent_parameters, engine=None, adapter=None): 19 | if agent_type == "DQN": 20 | if adapter: 21 | adapter_sample = self.adapters[adapter](setup_info=self.setup_info) 22 | # Set input_size from adapter 23 | try: 24 | input_size = adapter_sample.input_dim 25 | print(f"Using input_dim from adapter {adapter}: {input_size}") 26 | except Exception: 27 | try: 28 | input_size = adapter_sample.encoder.output_dim 29 | print(f"Using encoder output_dim from encoder {adapter_sample.encoder}: {input_size}") 30 | except Exception: 31 | try: 32 | input_size = adapter_sample.LLM_adapter.encoder.output_dim 33 | print(f"Using LLM_adapter encoder output_dim from LLM adapter {adapter_sample.LLM_adapter}: {input_size}") 34 | except Exception: 35 | print(f"Adapter {adapter} does not have input_dim specified.") 36 | raise ValueError(f"No input dim size found in adapter: {adapter}") 37 | 38 | if engine: 39 | print(engine) 40 | engine_sample = engine(local_setup_info=self.setup_info) 41 | try: 42 | output_size = engine_sample.output_size 43 | except Exception: 44 | try: 45 | output_size = engine_sample.output_dim 46 | except Exception: 47 | try: 48 | output_size = engine_sample.output_dim_size 49 | except Exception: 50 | print(f"Engine {engine} does not contain output dim size for DQN agent, using default 1,000.") 51 | output_size = 1000 52 | # Order must match DQN input 53 | temp_dict = {'input_size': input_size, 'output_size': output_size} 54 | temp_dict.update(agent_parameters) 55 | else: 56 | # For other agents, we assume the parameters are already in the correct format 57 | temp_dict = agent_parameters 58 | if agent_type not in self.agent_types: 59 | raise ValueError(f"Unknown agent type: {agent_type}") 60 | return self.agent_types[agent_type](**temp_dict) 61 | -------------------------------------------------------------------------------- /elsciRL/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | 5 | 6 | class Config: 7 | def __init__(self, config_file_path: str): 8 | if config_file_path: 9 | with open(config_file_path) as config_file: 10 | self.data = json.load(config_file) 11 | self.config_file_path = config_file_path 12 | 13 | else: 14 | self.data = dict() 15 | self.config_path = "" 16 | logging.info("No arguments given, using default configuration...") 17 | 18 | def __getitem__(self, key: str): 19 | item = None 20 | 21 | if key in self.__dict__: 22 | item = self.__dict__[key] 23 | else: 24 | item = self.data[key] 25 | 26 | return item 27 | 28 | 29 | class ExperimentConfig(Config): 30 | def __init__(self, config_path: str): 31 | super(ExperimentConfig, self).__init__(config_path) 32 | 33 | # Name setup 34 | self.name = self.data.get( 35 | "name", os.path.split(self.config_file_path)[-1].replace(".json", "") 36 | ) 37 | # Define Problem Type Choice 38 | self.problem_type = self.data.get("problem_type", "") 39 | # Specify local config choices to select agents of interest 40 | self.agent_select = self.data.get("agent_select", ["Qlearntab"]) 41 | 42 | # ---> We then parse these three inputs to obtain the local config setup info 43 | # ---> Ideally input is a dict input: setups = { 'Setup1':{"Adapter":"Engine", "Encoder":"Yes", "Agent":"TabQ"},... } 44 | 45 | # Training repeated 46 | self.num_training_episodes = self.data.get("num_training_episodes", 1000) 47 | self.number_training_repeats = self.data.get("number_training_repeats", 5) 48 | 49 | # Testing repeated 50 | self.number_test_episodes = self.data.get("number_test_episodes", 100) 51 | self.number_test_repeats = self.data.get("number_test_repeats", 5) 52 | self.test_agent_type = self.data.get("test_agent_type", "best") 53 | 54 | # Tab Q Agent parameters 55 | self.alpha = self.data.get("alpha", [0.05]) 56 | self.gamma = self.data.get("gamma", [0.95]) 57 | self.epsilon = self.data.get("epsilon", [0.05]) 58 | # Neural Agent Parameters 59 | self.input_type = "lm" 60 | self.input_size = self.data.get("input_size", [384]) 61 | self.sent_hidden_dim = self.data.get("sent_hidden_dim", [10]) 62 | self.hidden_dim = self.data.get("hidden_dim", [128]) 63 | self.num_hidden = self.data.get("num_hidden", [2]) 64 | self.sequence_size = self.data.get("sequence_size", [20]) 65 | self.memory_size = self.data.get("memory_size", [2000]) 66 | self.target_replace_iter = self.data.get("target_replace_iter", [100]) 67 | self.learning_rate = self.data.get("learning_rate", [0.001]) 68 | self.batch_size = self.data.get("batch_size", [1]) 69 | 70 | self.number_test_episodes = self.data.get("number_test_episodes", 250) 71 | self.number_test_repeats = self.data.get("number_test_repeats", 5) 72 | 73 | 74 | class TestingSetupConfig(Config): 75 | def __init__(self, config_dir: str): 76 | super(TestingSetupConfig, self).__init__(config_dir) 77 | self.state_configs = ExperimentConfig(os.path.join(config_dir)) 78 | -------------------------------------------------------------------------------- /elsciRL/GUI/prerender_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import Tensor 4 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder as MiniLM_L6v2 5 | 6 | # Get search method 7 | import os 8 | import json 9 | from datetime import datetime 10 | 11 | 12 | def encode_prerender_data(observed_states:dict|str=None, 13 | save_dir:str=None, 14 | encoder:str ='MiniLM_L6v2') -> Tensor: 15 | """ Encodes the observed states using a language encoder. 16 | Args: 17 | observed_states (dict or str): The observed states to encode, can be the dictionary or the directory path string. 18 | save_dir (str): The directory where the encoded states will be saved. If None, defaults to './encoded-prerender-data'. 19 | encoder (str): The name of the encoder to use. Defaults to 'MiniLM_L6v2', options include: 20 | - 'MiniLM_L6v2': A lightweight language model suitable for encoding text. 21 | - ~~Other encoders can be added in the future.~~ 22 | Returns: 23 | Tensor: The encoded representation of the observed states. 24 | """ 25 | # ------------------------------------------------------------------ 26 | # Define the available encoders 27 | # Currently only MiniLM_L6v2 is available, but can be extended in the future. 28 | ENCODERS = {'MiniLM_L6v2': MiniLM_L6v2} 29 | encoder = ENCODERS[encoder]() 30 | # ------------------------------------------------------------------ 31 | if observed_states is None: 32 | print("\n ----------------------------------------------------") 33 | print(" No observed states provided. Please select a file to encode.") 34 | print(" ----------------------------------------------------\n") 35 | file_names = [file for file in os.listdir('./') if file.endswith('.txt')] 36 | for n, file in enumerate(file_names): 37 | print(f"- {n}: {file}") 38 | selection = input("\n Select the file to encode (by number): ") 39 | observed_states_filename = file_names[int(selection)] 40 | observed_states_path = os.path.join('./', observed_states_filename) 41 | with open(observed_states_path, 'r') as f: 42 | observed_states = json.loads(f.read()) 43 | save_dir = './' 44 | else: 45 | if isinstance(observed_states, str): 46 | observed_states_filename = observed_states.split('/')[-1].split('.')[0] 47 | if not save_dir: 48 | save_dir = os.path.dirname(observed_states) 49 | with open(observed_states, 'r') as f: 50 | observed_states = json.loads(f.read()) 51 | else: 52 | observed_states_filename = 'observed_states' 53 | if not save_dir: 54 | save_dir = './' 55 | 56 | # Encode the observed states 57 | print(f"\n Encoding observed state file {observed_states_filename} using {encoder.name}...") 58 | str_states = [str_state for str_state in observed_states.values()] 59 | observed_states_encoded = encoder.encode(str_states) 60 | 61 | if not os.path.exists(save_dir): 62 | os.makedirs(save_dir) 63 | file_path = os.path.join(save_dir, 'encoded_' + observed_states_filename.split('.')[0] + '.txt') 64 | np.savetxt(file_path, observed_states_encoded.numpy()) 65 | print(f"Encoded states saved to {file_path}") 66 | print(f"Number of States: {len(observed_states_encoded)}") 67 | 68 | return observed_states_encoded -------------------------------------------------------------------------------- /elsciRL/examples/adapters/elsciRL_sailing_default.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | import pandas as pd 3 | import numpy as np 4 | import torch 5 | from torch import Tensor 6 | # StateAdapter includes static methods for adapters 7 | from elsciRL.encoders.poss_state_encoded import StateEncoder 8 | from gymnasium.spaces import Text, Discrete 9 | 10 | class DefaultAdapter: 11 | 12 | # ------ Static Methods --------------------------------------- 13 | # - Defined by simulator source https://github.com/PPierzc/ai-learns-to-sail/blob/master/tasks/channel.py 14 | @staticmethod 15 | def angle_to_state(angle): 16 | return int(30 * ((angle + np.pi) / (2 * np.pi) % 1)) # Discretization of the angle space 17 | 18 | @staticmethod 19 | def x_to_state(x): 20 | return int(40 * ((x + -10) / 20)) # Discretization of the x space 21 | 22 | @staticmethod 23 | def state_discretizer(state): 24 | x = float(state.split('_')[0]) 25 | x_state = DefaultAdapter.x_to_state(x) 26 | 27 | angle = float(state.split('_')[1]) 28 | angle_state = DefaultAdapter.angle_to_state(angle) 29 | 30 | state_out = str(x_state)+'_'+str(angle_state) 31 | return state_out 32 | # ------------------------------------------------------------- 33 | 34 | _cached_state_idx: Dict[str, int] = dict() 35 | def __init__(self, setup_info:dict={}) -> None: 36 | # ------ State Encoder --------------------------------------- 37 | # Initialise encoder based on all possible env states 38 | all_possible_x = [i*-1 for i in range(40)] 39 | all_possible_angle = [i for i in range(30)] 40 | # Need an index that preserves the identity of both the x and angle values 41 | all_possible_states = [] 42 | for x_ind in all_possible_x: 43 | for angle_ind in all_possible_angle: 44 | index = str(x_ind)+'_'+str(angle_ind) 45 | all_possible_states.append(index) 46 | # Input to pre-built possible state encoder 47 | #self.encoder = StateEncoder(all_possible_states) 48 | self.encoder = {} 49 | # Observartion is string: "x_angle" 50 | # -> Then discretized and returned as string: "x_state_angle_state" 51 | # -> Before being numeritized to a unique id (x:-10-10*2dp * angle:0-2pi*1dp) 52 | self.observation_space = Discrete(2000*30) 53 | 54 | 55 | def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor: 56 | """ Use Language name for every piece name for current board position """ 57 | 58 | state = DefaultAdapter.state_discretizer(state) 59 | 60 | # Encode to Tensor for agents 61 | if encode: 62 | #state_encoded = self.encoder.encode(state=state) 63 | # elsciRL state encoder is large and not needed for tabular agents 64 | # - Wont work for neural agents 65 | if (state not in self.encoder): 66 | state_encoded = torch.tensor(len(self.encoder)) 67 | self.encoder[state] = state_encoded 68 | else: 69 | state_encoded = self.encoder[state] 70 | else: 71 | state_encoded = state 72 | 73 | if (indexed): 74 | state_indexed = list() 75 | for sent in state: 76 | if (sent not in DefaultAdapter._cached_state_idx): 77 | DefaultAdapter._cached_state_idx[sent] = len(DefaultAdapter._cached_state_idx) 78 | state_indexed.append(DefaultAdapter._cached_state_idx[sent]) 79 | 80 | state_encoded = torch.tensor(state_indexed) 81 | 82 | return state_encoded -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .vscode/ 163 | 164 | # Dev testing problem 165 | benchmark/output/* 166 | elsciRL/benchmark/output/* 167 | elsciRL-App-output/* -------------------------------------------------------------------------------- /elsciRL/experiments/training_procedures/policy_gradient.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from elsciRL.environment_setup.results_table import ResultsTable 4 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir 5 | from elsciRL.interaction_loops.policy_gradient import PolicyGradientInteractionLoop 6 | 7 | 8 | def run_policy_gradient_training_loop( 9 | env_manager, 10 | policy_agent_factory, 11 | result_manager, 12 | training_render, 13 | training_render_save_dir, 14 | save_dir, 15 | engine_name, 16 | engine, 17 | agent_type, 18 | adapter, 19 | train_setup_info, 20 | trained_agents, 21 | num_training_seeds, 22 | test_agent_type, 23 | show_figures, 24 | number_training_repeats, 25 | wrappers=None, 26 | ): 27 | """Specialized training loop for policy-gradient agents.""" 28 | 29 | key = f"{engine_name}_{agent_type}_{adapter}" 30 | if key not in trained_agents: 31 | trained_agents[key] = {} 32 | 33 | seed_recall = {} 34 | seed_results_connection = {} 35 | observed_states_stored = {} 36 | training_results_stored = None 37 | 38 | for seed_num in range(num_training_seeds): 39 | if num_training_seeds > 1: 40 | print("------\n- Seed Num: ", seed_num) 41 | 42 | setup_num = 0 43 | temp_agent_store = {} 44 | 45 | for training_repeat in range(1, number_training_repeats + 1): 46 | setup_num += 1 47 | env = env_manager.create_gym_env(engine, adapter, train_setup_info, wrappers=wrappers) 48 | agent_parameters = train_setup_info['agent_parameters'][agent_type] 49 | agent = policy_agent_factory.create(agent_type, agent_parameters, env) 50 | 51 | total_steps = train_setup_info.get('training_action_cap', 100) * train_setup_info.get('number_training_episodes', 1) 52 | agent.learn(total_steps=total_steps) 53 | 54 | agent_name = train_setup_info.get('agent_name', f"{agent_type}_{adapter}") 55 | results_table = ResultsTable(train_setup_info) 56 | table_results = PolicyGradientInteractionLoop.policy_rollout( 57 | agent, 58 | env, 59 | agent_name, 60 | train_setup_info.get('number_training_episodes', 1), 61 | results_table, 62 | render=False, 63 | action_limit=train_setup_info.get('training_action_cap'), 64 | ) 65 | 66 | table_results['episode'] = table_results.index 67 | table_results.insert(loc=0, column='Repeat', value=setup_num) 68 | 69 | agent_save_dir = os.path.join( 70 | save_dir, 71 | f"{engine_name}_{agent_type}_{adapter}__training_results_{setup_num}" 72 | ) 73 | ensure_dir(agent_save_dir) 74 | Return = result_manager.train_report(table_results, agent_save_dir, show_figures) 75 | train_setup_info['train_save_dir'] = agent_save_dir 76 | 77 | if key not in temp_agent_store: 78 | temp_agent_store[key] = {} 79 | temp_agent_store[key][setup_num] = {'Return': Return, 'agent': agent, 'train_setup': train_setup_info.copy()} 80 | 81 | seed_recall[agent_name] = setup_num 82 | training_results_stored = table_results 83 | 84 | seed_results_connection[key] = training_results_stored 85 | 86 | def _select_training_setups(): 87 | if test_agent_type.lower() == 'best': 88 | best_repeat = max(temp_agent_store[key], key=lambda r: temp_agent_store[key][r]['Return']) 89 | return [temp_agent_store[key][best_repeat]] 90 | if test_agent_type.lower() == 'all': 91 | return list(temp_agent_store[key].values()) 92 | return [temp_agent_store[key][setup_num]] 93 | 94 | selected_setups = _select_training_setups() 95 | trained_agents[key][agent_name] = [entry['agent'] for entry in selected_setups] if len(selected_setups) > 1 else selected_setups[0]['agent'] 96 | 97 | training_setups_for_key = {} 98 | for idx, entry in enumerate(selected_setups, start=1): 99 | training_setup = entry['train_setup'] 100 | repeat_label = entry.get('train_setup', {}).get('Repeat', idx) 101 | training_setups_for_key[f"Training_Setup_{engine_name}_{agent_type}_{adapter}_{repeat_label}"] = training_setup 102 | 103 | return trained_agents, seed_results_connection, temp_agent_store, training_results_stored, observed_states_stored 104 | -------------------------------------------------------------------------------- /tests/test_policy_gradient_classroom.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the following to test: 3 | pytest tests/test_policy_gradient_classroom.py -k Classroom 4 | """ 5 | 6 | import pytest 7 | 8 | from elsciRL.application_suite.import_tool import PullApplications 9 | from elsciRL.experiments.policy_gradient import PolicyGradientExperiment 10 | 11 | 12 | @pytest.mark.integration 13 | @pytest.mark.slow 14 | def test_policy_gradient_runs_on_classroom(tmp_path): 15 | pytest.importorskip("stable_baselines3") 16 | 17 | puller = PullApplications() 18 | try: 19 | application_data = puller.pull(['Classroom']) 20 | except Exception as exc: 21 | pytest.skip(f"Classroom application unavailable: {exc}") 22 | 23 | classroom_data = application_data.get('Classroom') 24 | if not classroom_data: 25 | pytest.skip("Classroom application data missing") 26 | 27 | default_adapter = 'default' 28 | if default_adapter not in classroom_data['adapters']: 29 | default_adapter = list(classroom_data['adapters'].keys())[0] 30 | 31 | # Sanity-check the Classroom engine's API matches Gym expectations. 32 | engine_cls = classroom_data['engine'] 33 | engine_instance = engine_cls(classroom_data['local_configs']['classroom_A']) 34 | initial_obs = engine_instance.reset() 35 | step_output = engine_instance.step(state=initial_obs, action=0) 36 | assert isinstance(step_output, tuple), "Engine.step should return a tuple" 37 | assert len(step_output) == 4, f"Engine.step must return 4 values, got {len(step_output)}" 38 | 39 | base_experiment_data = { 40 | "number_training_episodes": 100, 41 | "number_training_repeats": 1, 42 | "number_training_seeds": 1, 43 | "number_test_episodes": 100, 44 | "number_test_repeats": 1, 45 | "training_action_cap": 16, 46 | "testing_action_cap": 16, 47 | "test_agent_type": "best", 48 | "reward_signal": [1, 0, 0], 49 | "train": True, 50 | "live_env": True, 51 | } 52 | 53 | agent_configs = { 54 | "PPO": { 55 | "agent_parameters": { 56 | "learning_rate": 3e-4, 57 | "batch_size": 64, 58 | "minibatch_size": 16, 59 | "update_epochs": 2, 60 | "hidden_size": 64, 61 | } 62 | }, 63 | } 64 | 65 | for agent_type, config in agent_configs.items(): 66 | experiment_data = base_experiment_data.copy() 67 | experiment_data["agent_select"] = [agent_type] 68 | experiment_data["adapter_select"] = [default_adapter] 69 | experiment_data["adapter_input_dict"] = {agent_type: [default_adapter]} 70 | experiment_data["agent_parameters"] = {agent_type: config["agent_parameters"]} 71 | 72 | experiment_config = {"data": experiment_data} 73 | local_config = {"data": classroom_data['local_configs']['classroom_A']} 74 | 75 | agent_tmp_dir = tmp_path / agent_type 76 | agent_tmp_dir.mkdir(parents=True, exist_ok=True) 77 | 78 | experiment = PolicyGradientExperiment( 79 | Config=experiment_config, 80 | ProblemConfig=local_config, 81 | Engine=classroom_data['engine'], 82 | Adapters=classroom_data['adapters'], 83 | save_dir=str(agent_tmp_dir), 84 | show_figures='No', 85 | window_size=0.1, 86 | ) 87 | 88 | print(f"Training {agent_type} agent...") 89 | training_setups = experiment.train() 90 | assert training_setups, f"Policy gradient training should generate setups for {agent_type}" 91 | 92 | print(f"Testing {agent_type} agent...") 93 | evaluation = experiment.test() 94 | assert evaluation is not None 95 | 96 | print(f"Rendering {agent_type} agent...") 97 | render_dir = agent_tmp_dir / "renders" 98 | render_outputs = experiment.render_results(render_save_dir=str(render_dir)) 99 | assert render_outputs is not None 100 | print('Test complete') 101 | 102 | 103 | if __name__ == "__main__": 104 | import argparse 105 | import tempfile 106 | from pathlib import Path 107 | 108 | parser = argparse.ArgumentParser(description="Run policy-gradient classroom test") 109 | parser.add_argument( 110 | "--output-dir", 111 | type=Path, 112 | default=None, 113 | help="Directory where test artifacts should be saved. Uses a temp dir if omitted.", 114 | ) 115 | args = parser.parse_args() 116 | 117 | if args.output_dir is not None: 118 | args.output_dir.mkdir(parents=True, exist_ok=True) 119 | test_policy_gradient_runs_on_classroom(args.output_dir) 120 | else: 121 | with tempfile.TemporaryDirectory() as tmp_dir: 122 | test_policy_gradient_runs_on_classroom(Path(tmp_dir)) 123 | -------------------------------------------------------------------------------- /elsciRL/analysis/combined_tabular_results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | import json 5 | 6 | def combined_tabular_analysis_results(results_dir:str='', analysis_type='training'): 7 | if results_dir == '': 8 | raise ValueError("Save directory not specified.") 9 | analysis_type = analysis_type.lower() # lowercase analysis type input 10 | # Get sub-dir for each problem-experiment type 11 | instruction_folders = [os.path.join(results_dir, instr) for instr in os.listdir(results_dir) if os.path.isdir(os.path.join(results_dir, instr))] 12 | variance_results = {} 13 | for instr_folder_dir in instruction_folders: 14 | instr_id = instr_folder_dir.split('/')[-1].split('//')[-1].split('\\')[-1].split('\\\\')[-1] 15 | if instr_id not in variance_results.keys(): 16 | variance_results[instr_id] = {} 17 | print(f"Processing {instr_id} for {analysis_type} analysis.") 18 | problem_folders = [name for name in os.listdir(instr_folder_dir) if os.path.isdir(os.path.join(instr_folder_dir, name))] 19 | # Find experiment folders 20 | # - Capture case where there is only one experiment type 21 | # and so wont have sub-directory for experiments to search 22 | for experiment_dir in problem_folders: 23 | if analysis_type == 'training': 24 | experiment_name = experiment_dir+'_training' 25 | file_names = [name for name in os.listdir(instr_folder_dir+'/'+experiment_dir) if name[0:25] == 'training_variance_results'] 26 | elif analysis_type == 'testing': 27 | experiment_name = experiment_dir+'_testing' 28 | file_names = [name for name in os.listdir(instr_folder_dir+'/'+experiment_dir) if name[0:24] == 'testing_variance_results'] 29 | else: 30 | raise ValueError("Analysis type must be either 'training' or 'testing'.") 31 | 32 | if experiment_name not in variance_results[instr_id].keys(): 33 | variance_results[instr_id][experiment_name] = {} 34 | 35 | for file in file_names: 36 | results = pd.read_csv(instr_folder_dir+'/'+experiment_dir+'/'+file) 37 | agent = results['agent'].iloc[0].split('__')[0] 38 | if agent not in variance_results[instr_id][experiment_name].keys(): 39 | variance_results[instr_id][experiment_name][agent] = {} 40 | 41 | # Calculate Mean and Std Dev 42 | variance_results[instr_id][experiment_name][agent]['num_repeats'] = results['num_repeats'].iloc[0] 43 | variance_results[instr_id][experiment_name][agent]['number_episodes'] = results.index.max() + 1 44 | # - rolling avg R per episode 45 | variance_results[instr_id][experiment_name][agent]['mean'] = results['avg_R_mean'].mean() 46 | variance_results[instr_id][experiment_name][agent]['median'] = results['avg_R_mean'].median() 47 | variance_results[instr_id][experiment_name][agent]['std_error'] = results['avg_R_mean'].sem() 48 | variance_results[instr_id][experiment_name][agent]['std_dev'] = results['avg_R_mean'].std() 49 | variance_results[instr_id][experiment_name][agent]['variance'] = results['avg_R_mean'].var() 50 | # - cumulative R per episode 51 | variance_results[instr_id][experiment_name][agent]['cum_R_mean'] = results['cum_R_mean'].mean() 52 | variance_results[instr_id][experiment_name][agent]['cum_R_median'] = results['cum_R_mean'].median() 53 | variance_results[instr_id][experiment_name][agent]['cum_R_std_error'] = results['cum_R_mean'].sem() 54 | variance_results[instr_id][experiment_name][agent]['cum_R_std_dev'] = results['cum_R_mean'].std() 55 | variance_results[instr_id][experiment_name][agent]['cum_R_variance'] = results['cum_R_mean'].var() 56 | # - time avg per episode 57 | variance_results[instr_id][experiment_name][agent]['time_avg'] = results['time_mean'].mean() 58 | 59 | variance_results_df = pd.DataFrame.from_dict( 60 | {f"{instr}/{experiment}/{agent}": data for instr, experiments in variance_results.items() 61 | for experiment, agents in experiments.items() 62 | for agent, data in agents.items()}, 63 | orient='index' 64 | ).reset_index() 65 | variance_results_df.columns = ['Instruction/Experiment/Agent', 'Num Repeats', 'Number Episodes', 66 | 'Avg R Mean', 'Avg R Median', 'Avg R Std Error', 'Avg R Std Dev', 'Avg R Variance', 67 | 'Cumulative R Mean', 'Cumulative R Median', 'Cumulative R Std Error', 68 | 'Cumulative R Std Dev', 'Cumulative R Variance', 'Time Avg'] 69 | # Save the combined results to a CSV file 70 | combined_results_filename = f"{analysis_type}_combined_results.csv" 71 | combined_results_path = os.path.join(results_dir, combined_results_filename) 72 | variance_results_df.to_csv(combined_results_path, index=False) 73 | -------------------------------------------------------------------------------- /elsciRL/examples/adapters/elsciRL_sailing_language.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | from torch import Tensor 6 | # StateAdapter includes static methods for adapters 7 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder 8 | from gymnasium.spaces import Text, Box 9 | 10 | 11 | class LanguageAdapter: 12 | _cached_state_idx: Dict[str, int] = dict() 13 | 14 | def __init__(self, setup_info:dict={}) -> None: 15 | # Language encoder doesn't require any preset knowledge of env to use 16 | self.encoder = LanguageEncoder() 17 | # Observartion is string: "x_angle" 18 | # -> encoder output is 1x384 tensor from miniLM 19 | self.observation_space = Box(low=-1, high=1, shape=(1,384), dtype=np.float32) 20 | 21 | def adapter(self, state:any, legal_moves:list = None, episode_action_history:list = None, encode:bool = True, indexed: bool = False) -> Tensor: 22 | """ Use Language name for every piece name for current board position """ 23 | 24 | # state = 'x_angle' 25 | # legal_moves = [0,1] 26 | # episode_action_history = [action, action, action] where aciton = [0,1] 27 | 28 | # Angle is relative to the goal of moving forward (i.e. bearing) 29 | # - angle=0 is directly forward 30 | # - angle<0 is slightly left 31 | # - angle>0 is slightly right 32 | 33 | x = float(state.split('_')[0]) 34 | angle = float(state.split('_')[1]) 35 | 36 | # Horizontal position 37 | if (x>-1)&(x<1): 38 | L_x = 'in the middle' 39 | elif (x>-3)&(x<3): 40 | L_x = 'near to the center' 41 | elif (x>-5)&(x<5): 42 | L_x = 'in between the edge and the center' 43 | elif (x>-7)&(x<7): 44 | L_x = 'near to the edge' 45 | elif (x>=-10)&(x<=10): 46 | L_x = 'very close to the edge' 47 | else: 48 | L_x = 'out of bounds' 49 | 50 | # Side of river 51 | if x<0: 52 | L_x_side = 'on the harbor side of the river' 53 | elif x>0: 54 | L_x_side = 'on the beach side of the river' 55 | else: 56 | L_x_side = '' 57 | 58 | # Angle 59 | # - Defined in radians where 90deg = 1.57 60 | # - Peak velocity at 45deg = pi/4 = 0.7853... 61 | if angle==0: 62 | L_angle = 'facing directly into the wind' 63 | elif (angle>-0.1)&(angle<0.1): 64 | L_angle = 'facing into the wind' 65 | elif (angle>-0.5)&(angle<0.5): 66 | L_angle = 'close hauled with wind' 67 | elif (angle>-1)&(angle<1): 68 | L_angle = 'cutting the wind' 69 | else: 70 | L_angle = 'moving across the wind' 71 | # Wind side 72 | if angle<0: 73 | L_wind_side = 'on the starboard side' 74 | elif angle>0: 75 | L_wind_side = 'on the port side' 76 | else: 77 | L_wind_side = '' 78 | 79 | L_state = 'The boat is ' + L_x_side + ' ' + L_x + ', ' + L_angle + ' ' + L_wind_side + ', ' 80 | L_state = L_state.replace(' ', ' ').replace(' .','.').replace(' ,',',').replace(' and,','') # Remove double spaces 81 | 82 | # Last action taken and final language state output 83 | if len(episode_action_history)>0: 84 | last_action = episode_action_history[-1] 85 | # if last_action==0: 86 | # L_action = 'the last action was to turn to the left slightly.' 87 | # elif last_action==1: 88 | # L_action = 'the last action was to turn to the right slightly.' 89 | 90 | if (x<=0)&(last_action==0): 91 | L_action = 'the last action was to turn towards the harbor.' 92 | elif (x<0)&(last_action==1): 93 | L_action = 'the last action was to turn towards the center of the river.' 94 | elif (x>=0)&(last_action==1): 95 | L_action = 'the last action was to turn towards the beach.' 96 | elif (x>0)&(last_action==0): 97 | L_action = 'the last action was to turn towards the center of the river.' 98 | 99 | state = L_state + ' ' + L_action 100 | else: 101 | state = L_state 102 | 103 | #print(state) 104 | 105 | # Encode to Tensor for agents 106 | if encode: 107 | state_encoded = self.encoder.encode(state=state) 108 | else: 109 | state_encoded = state 110 | 111 | if (indexed): 112 | state_indexed = list() 113 | for sent in state: 114 | if (sent not in LanguageAdapter._cached_state_idx): 115 | LanguageAdapter._cached_state_idx[sent] = len(LanguageAdapter._cached_state_idx) 116 | state_indexed.append(LanguageAdapter._cached_state_idx[sent]) 117 | 118 | state_encoded = torch.tensor(state_indexed) 119 | 120 | return state_encoded -------------------------------------------------------------------------------- /elsciRL/environment_setup/results_table.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | class ResultsTable: 5 | def __init__(self, local_setup_info:dict = None) -> None: 6 | if type(local_setup_info['training_results']) != type(pd.DataFrame()): 7 | self.agent:list = [] 8 | self.opponent:list =[] 9 | self.episode:list = [] 10 | self.num_actions:list = [] 11 | self.episode_reward:list = [] 12 | self.cumulative_reward:list = [] 13 | self.time_per_episode:list = [] 14 | self.action_history:list = [] 15 | self.q_total:list = [] 16 | self.q_mean:list = [] 17 | # new 18 | self.cum_r = 0 19 | else: 20 | self.agent:list = local_setup_info['training_results'].agent.tolist() 21 | self.opponent:list =local_setup_info['training_results'].opponent.tolist() 22 | self.episode:list = local_setup_info['training_results'].episode.tolist() 23 | self.num_actions:list = local_setup_info['training_results'].num_actions.tolist() 24 | self.episode_reward:list = local_setup_info['training_results'].episode_reward.tolist() 25 | self.cumulative_reward:list = local_setup_info['training_results'].cumulative_reward.tolist() 26 | self.cum_r = self.cumulative_reward[-1] 27 | self.time_per_episode:list = local_setup_info['training_results'].time_per_episode.tolist() 28 | self.action_history:list = local_setup_info['training_results'].action_history.tolist() 29 | self.q_total:list = local_setup_info['training_results'].q_total.tolist() 30 | self.q_mean:list = local_setup_info['training_results'].q_mean.tolist() 31 | 32 | def results_per_episode(self,agent_name:str='missing', opponent_name:str='None', episode_num:int=0, action_num:int=0, episode_reward:float=0, time:float=0, episode_action_history:list=[], q_total:float=0, q_mean:float=0): 33 | self.agent.append(agent_name) 34 | self.opponent.append(opponent_name) 35 | self.episode.append(episode_num) 36 | self.num_actions.append(action_num) 37 | self.episode_reward.append(episode_reward) 38 | self.cum_r +=episode_reward 39 | self.cumulative_reward.append(self.cum_r) 40 | self.time_per_episode.append(time) 41 | self.action_history.append(episode_action_history) 42 | self.q_total.append(q_total) 43 | self.q_mean.append(q_mean) 44 | 45 | 46 | def results_table_format(self): 47 | results= pd.DataFrame({ 48 | 'agent': self.agent, 49 | 'opponent': self.opponent, 50 | 'episode': self.episode, 51 | 'num_actions': self.num_actions, 52 | 'episode_reward': self.episode_reward, 53 | "cumulative_reward": self.cumulative_reward, 54 | "time_per_episode":self.time_per_episode, 55 | "action_history": self.action_history, 56 | "q_total":self.q_total, 57 | "q_mean":self.q_mean}) 58 | return results 59 | 60 | def reset(self): 61 | self.agent:list = [] 62 | self.opponent:list =[] 63 | self.episode:list = [] 64 | self.num_actions:list = [] 65 | self.episode_reward:list = [] 66 | self.cum_r = 0 67 | self.cumulative_reward:list = [] 68 | self.time_per_episode:list = [] 69 | self.action_history:list = [] 70 | self.q_total:list = [] 71 | self.q_mean:list = [] 72 | 73 | def copy(self): 74 | results_copy= pd.DataFrame({ 75 | 'agent': self.agent.copy(), 76 | 'opponent': self.opponent.copy(), 77 | 'episode': self.episode.copy(), 78 | 'num_actions': self.num_actions.copy(), 79 | 'episode_reward': self.episode_reward.copy(), 80 | "cumulative_reward": self.cumulative_reward.copy(), 81 | "time_per_episode":self.time_per_episode.copy(), 82 | "action_history":self.action_history.copy(), 83 | "q_total":self.q_total.copy(), 84 | "q_mean":self.q_mean.copy()}) 85 | return results_copy 86 | 87 | def load(self, results_copy): 88 | self.agent:list = results_copy.agent.tolist() 89 | self.opponent:list = results_copy.opponent.tolist() 90 | self.episode:list = results_copy.episode.tolist() 91 | self.num_actions:list = results_copy.num_actions.tolist() 92 | self.episode_reward:list = results_copy.episode_reward.tolist() 93 | self.cumulative_reward:list = results_copy.cumulative_reward.tolist() 94 | self.cum_r = self.cumulative_reward[-1] 95 | self.time_per_episode:list = results_copy.time_per_episode.tolist() 96 | self.action_history:list = results_copy.action_history.tolist() 97 | self.q_total:list = results_copy.q_total.tolist() 98 | self.q_mean:list = results_copy.q_mean.tolist() 99 | 100 | 101 | -------------------------------------------------------------------------------- /elsciRL/adapters/LLM_state_generators/text_gpt-4.1.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Any 3 | import os 4 | 5 | from openai import OpenAI 6 | 7 | try: 8 | from torch import Tensor 9 | from elsciRL.encoders.language_transformers.MiniLM_L6v2 import LanguageEncoder 10 | except ImportError: 11 | print("Warning: torch or LanguageEncoder not found. Please ensure elsciRL is properly installed.") 12 | Tensor = None 13 | LanguageEncoder = None 14 | 15 | class LLMAdapter(ABC): 16 | """Convert a general prompt and raw text state into a description of the state.""" 17 | def __init__(self, base_prompt: str): 18 | super().__init__() 19 | # Define the fields that describe the state features: 20 | self.base_prompt = base_prompt 21 | 22 | @abstractmethod 23 | def _read(raw_state) -> list: 24 | # Read the data. 25 | # fill in the feature fields 26 | raise NotImplementedError 27 | 28 | 29 | class GPTAdapter(LLMAdapter): 30 | """Adapter for OpenAI GPT models.""" 31 | 32 | def __init__(self, base_prompt: str, model_name: str = "gpt-4"): 33 | super().__init__(base_prompt) 34 | self.model_name = model_name 35 | 36 | # Initialize the language encoder for encoding functionality 37 | if LanguageEncoder is not None: 38 | self.encoder = LanguageEncoder() 39 | else: 40 | print("Warning: LanguageEncoder not available. Encoding will not work.") 41 | self.encoder = None 42 | 43 | def _read(self, raw_state) -> list: 44 | """Read the data and fill in the feature fields.""" 45 | # This method should be implemented based on specific requirements 46 | # For now, returning the raw state as a list 47 | return [raw_state] if isinstance(raw_state, str) else raw_state 48 | 49 | def call_gpt_api(self, prompt: str): 50 | """Call the OpenAI GPT API with the given prompt.""" 51 | try: 52 | api_key = os.environ.get("OPENAI_API_KEY") 53 | if not api_key: 54 | raise ValueError("OPENAI_API_KEY environment variable not set") 55 | 56 | client = OpenAI(api_key=api_key) 57 | response = client.chat.completions.create( 58 | model=self.model_name, 59 | messages=[ 60 | {"role": "system", "content": self.base_prompt}, 61 | {"role": "user", "content": prompt} 62 | ], 63 | max_tokens=5000 64 | ) 65 | return response.to_dict() if hasattr(response, 'to_dict') else response 66 | except Exception as e: 67 | print(f"Error calling OpenAI API: {e}") 68 | return None 69 | 70 | def process_gpt_response(self, response): 71 | """Process the response from OpenAI API.""" 72 | if response and 'choices' in response: 73 | return response['choices'][0]['message']['content'] 74 | return None 75 | 76 | def adapter(self, state: any, legal_moves: list = None, episode_action_history: list = None, encode: bool = True, indexed: bool = False) -> Tensor: 77 | """Returns the adapted form, may require input flag for encoded or non-encoded output.""" 78 | # Build the full context prompt including legal moves and action history 79 | context_parts = [] 80 | 81 | # Add state information 82 | if state is not None: 83 | context_parts.append(f"Current state: {state}") 84 | 85 | # Add legal moves if provided 86 | if legal_moves is not None and len(legal_moves) > 0: 87 | context_parts.append(f"Legal moves: {legal_moves}") 88 | 89 | # Add action history if provided 90 | if episode_action_history is not None and len(episode_action_history) > 0: 91 | recent_actions = episode_action_history[-5:] # Last 5 actions 92 | context_parts.append(f"Recent actions: {recent_actions}") 93 | 94 | # Combine all context into a single prompt 95 | full_prompt = " | ".join(context_parts) 96 | 97 | # Get GPT response 98 | adapted_state = self.call_gpt_api(full_prompt) 99 | processed_response = self.process_gpt_response(adapted_state) 100 | 101 | if processed_response is None: 102 | processed_response = str(state) if state is not None else "No state available" 103 | 104 | # Handle encoding 105 | if encode: 106 | if self.encoder is not None: 107 | # Use the LanguageEncoder to encode the response 108 | state_encoded = self.encoder.encode( 109 | state=processed_response, 110 | legal_actions=legal_moves, 111 | episode_action_history=episode_action_history, 112 | indexed=indexed 113 | ) 114 | return state_encoded 115 | else: 116 | print("Warning: Encoder not available, returning processed response as string") 117 | return processed_response 118 | else: 119 | return processed_response 120 | 121 | def sample(self, state: any): 122 | """Returns a sample of an adapted state form (typically initial position of the environment).""" 123 | if not state: 124 | state = 'The current state is empty.' 125 | return self.adapter(state, encode=True) 126 | 127 | 128 | -------------------------------------------------------------------------------- /elsciRL/experiments/training_procedures/default_exp_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | from elsciRL.experiments.experiment_utils.config_utils import ensure_dir 3 | from elsciRL.experiments.experiment_utils.render_current_results import render_current_result 4 | 5 | 6 | def run_training_loop( 7 | env_manager, 8 | agent_factory, 9 | result_manager, 10 | training_render, 11 | training_render_save_dir, 12 | save_dir, 13 | engine_name, 14 | engine, 15 | agent_type, 16 | adapter, 17 | all_adapters, 18 | train_setup_info, 19 | trained_agents, 20 | num_training_seeds, 21 | test_agent_type, 22 | show_figures, 23 | number_training_repeats, 24 | gym_env:bool=False 25 | ): 26 | if f"{engine_name}_{agent_type}_{adapter}" not in trained_agents: 27 | trained_agents[f"{engine_name}_{agent_type}_{adapter}"] = {} 28 | seed_recall = {} 29 | seed_results_connection = {} 30 | for seed_num in range(num_training_seeds): 31 | if num_training_seeds > 1: 32 | print("------\n- Seed Num: ", seed_num) 33 | if seed_num == 0: 34 | train_setup_info['training_results'] = False 35 | train_setup_info['observed_states'] = False 36 | else: 37 | train_setup_info['training_results'] = False 38 | train_setup_info['observed_states'] = observed_states_stored.copy() 39 | setup_num = 0 40 | temp_agent_store = {} 41 | for training_repeat in range(1, number_training_repeats + 1): 42 | if number_training_repeats > 1: 43 | print("------\n- Repeat Num: ", training_repeat) 44 | setup_num += 1 45 | agent = agent_factory.create(agent_type, train_setup_info['agent_parameters'][agent_type], engine, adapter) 46 | train_setup_info['agent'] = agent 47 | # Create the environment, use gym_env if specified 48 | if gym_env: 49 | live_env = env_manager.create_gym_env(engine, adapter, train_setup_info) 50 | else: 51 | live_env = env_manager.create_env(engine, all_adapters, train_setup_info) 52 | # --- 53 | if training_repeat > 1: 54 | live_env.start_obs = env_start 55 | env_start = live_env.start_obs 56 | goal = str(env_start).split(".")[0] + "---GOAL" 57 | print("Flat agent Goal: ", goal) 58 | if goal in seed_recall: 59 | setup_num = seed_recall[goal] 60 | else: 61 | seed_recall[goal] = 1 62 | agent_save_dir = os.path.join(save_dir, 63 | f"{engine_name}_{agent_type}_{adapter}__training_results_{goal}_{setup_num}" 64 | ) if num_training_seeds > 1 else os.path.join(save_dir, 65 | f"{engine_name}_{agent_type}_{adapter}__training_results_{setup_num}" 66 | ) 67 | ensure_dir(agent_save_dir) 68 | if goal in trained_agents[f"{engine_name}_{agent_type}_{adapter}"]: 69 | live_env.agent = trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal].clone() 70 | live_env.agent.exploration_parameter_reset() 71 | if goal in seed_results_connection: 72 | live_env.results.load(seed_results_connection[goal]) 73 | training_results = live_env.episode_loop() 74 | training_results['episode'] = training_results.index 75 | training_results.insert(loc=0, column='Repeat', value=setup_num) 76 | Return = result_manager.train_report(training_results, agent_save_dir, show_figures) 77 | if goal not in temp_agent_store: 78 | temp_agent_store[goal] = {} 79 | temp_agent_store[goal][setup_num] = {'Return': Return, 'agent': live_env.agent.clone()} 80 | if training_repeat == 1: 81 | max_Return = Return 82 | best_agent = live_env.agent 83 | training_results_stored = live_env.results.copy() 84 | observed_states_stored = live_env.elsciRL.observed_states 85 | if Return > max_Return: 86 | max_Return = Return 87 | best_agent = live_env.agent 88 | training_results_stored = live_env.results.copy() 89 | observed_states_stored = live_env.elsciRL.observed_states 90 | seed_recall[goal] = seed_recall[goal] + 1 91 | train_setup_info['train_save_dir'] = agent_save_dir 92 | if training_render: 93 | current_render_save_dir = training_render_save_dir or agent_save_dir 94 | render_current_result( 95 | training_setup=train_setup_info, 96 | current_environment=live_env, 97 | current_agent=live_env.agent, 98 | local_save_dir=current_render_save_dir 99 | ) 100 | seed_results_connection[goal] = training_results_stored 101 | # Save trained agent(s) 102 | if test_agent_type.lower() == 'best': 103 | trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal] = best_agent.clone() 104 | elif test_agent_type.lower() == 'all': 105 | start_repeat_num = list(temp_agent_store[goal].keys())[0] 106 | end_repeat_num = list(temp_agent_store[goal].keys())[-1] 107 | all_agents = [temp_agent_store[goal][repeat]['agent'] for repeat in range(start_repeat_num, end_repeat_num + 1)] 108 | trained_agents[f"{engine_name}_{agent_type}_{adapter}"][goal] = all_agents 109 | 110 | return trained_agents, seed_results_connection, temp_agent_store, training_results_stored, observed_states_stored 111 | -------------------------------------------------------------------------------- /elsciRL/GUI/LLM_tools/LLM_utils.py: -------------------------------------------------------------------------------- 1 | # IMPORTS LLM API TOOLS 2 | # EDITED OUT FOR NOW UNTIL FULL IMPLEMENTATION READY 3 | 4 | # import os 5 | # import json 6 | 7 | # from openai import OpenAI 8 | 9 | # def call_gpt_api(prompt): 10 | # import os 11 | # api_key = os.environ.get("OPENAI_API_KEY") 12 | # client = OpenAI(api_key=api_key) 13 | # response = client.chat.completions.create( 14 | # model="gpt-4.1", 15 | # messages=[{"role": "system", "content": prompt}], 16 | # max_tokens=5000 17 | # ) 18 | # return response.to_dict() if hasattr(response, 'to_dict') else response 19 | 20 | # def process_gpt_response(response): 21 | # if response and 'choices' in response: 22 | # return response['choices'][0]['message']['content'] 23 | # return None 24 | 25 | # def generate_application(self, user_input:str=''): 26 | # # TODO: Use this in a new tab with user input to update application list 27 | # # Load the app_setup.md content as part of the system prompt 28 | 29 | # # Add requirement to system prompt for code chunk separation 30 | # system_prompt_requirement = ( 31 | # "If your response contains any code chunks, you must output them in a separate section clearly marked as 'Code Output', " 32 | # "so that the application can extract and save them to a file. Do not mix code with explanations in the same section." 33 | # ) 34 | # # Combine the app_setup.md info with the system prompt and the new requirement 35 | # system_prompt = ( 36 | # "You are a helpful assistant. " 37 | # "Below is important application setup information for elsciRL:\n" 38 | # f"{self.app_setup_info}\n" 39 | # f"{system_prompt_requirement}\n" 40 | # "Please use this information to answer user queries." 41 | # ) 42 | 43 | # if not user_input: 44 | # return {"error": "No input provided"} 45 | 46 | # # Use the utils function to call the GPT API 47 | # response = call_gpt_api(system_prompt + "\nUser: " + user_input) 48 | # reply = process_gpt_response(response) 49 | # print(reply) 50 | # if not reply: 51 | # return {"error": "Failed to get response from GPT API"} 52 | 53 | # # Save the complete output to a .txt file 54 | # output_dir = os.path.join(os.path.dirname(__file__), 'output') 55 | # os.makedirs(output_dir, exist_ok=True) 56 | # output_path = os.path.join(output_dir, 'last_gpt_response.txt') 57 | # with open(output_path, 'w', encoding='utf-8') as f: 58 | # f.write(str(response)) 59 | 60 | # # Follow-up: Ask the AI model to extract all Python code and JSON config blocks and return a list of (filename, code) pairs 61 | # followup_prompt = ( 62 | # "Extract all Python code blocks and JSON config blocks from the following text. " 63 | # "For each code or config block, output a JSON array where each item has 'filename' and 'code' fields. " 64 | # "Choose a descriptive filename for each code block (e.g., based on class/function names or comments, use .py for Python and .json for configs). " 65 | # "Do not include any explanation, only the JSON array.\n\n" + reply 66 | # ) 67 | # code_response = call_gpt_api(followup_prompt) 68 | # code_reply = process_gpt_response(code_response) 69 | # try: 70 | # code_blocks = json.loads(code_reply) 71 | # generated_data = {} 72 | # for block in code_blocks: 73 | # fname = block.get('filename', 'extracted_code.py') 74 | # code = block.get('code', '') 75 | # generated_data[fname] = code 76 | # code_file_path = os.path.join(output_dir, fname) 77 | # with open(code_file_path, 'w', encoding='utf-8') as code_file: 78 | # code_file.write(code) 79 | # except Exception as e: 80 | # # fallback: save the raw reply if not valid JSON 81 | # code_file_path = os.path.join(output_dir, 'extracted_code.py') 82 | # with open(code_file_path, 'w', encoding='utf-8') as code_file: 83 | # code_file.write(code_reply.strip()) 84 | 85 | # for name,code in generated_data.items(): 86 | # if 'engine' in name.lower(): 87 | # generated_data['engine'] = code 88 | # elif 'analysis' in name.lower(): 89 | # generated_data['analysis'] = code 90 | # elif ('experiment' in name.lower()) | ('agent' in name.lower()): 91 | # generated_data['agent_config'] = code 92 | # elif ('local' in name.lower()) | ('env' in name.lower()): 93 | # generated_data['local_config'] = code 94 | # elif 'adapter_language' in name.lower(): 95 | # generated_data['adapter_language'] = code 96 | # elif ('numeric' in name.lower()) | ('default' in name.lower()): 97 | # generated_data['adapter_numeric'] = code 98 | 99 | # # Create the application setup dictionary 100 | # application_setup = { 101 | # 'engine':generated_data['engine'], 102 | # 'experiment_configs':{'quick_test':generated_data['agent_config']}, 103 | # 'local_configs':{'env_config':generated_data['local_config']}, 104 | # 'adapters':{'numeric_adapter':generated_data['adapter_numeric'], 105 | # 'language_adapter':generated_data['adapter_language']}, 106 | # 'local_analysis':{'blackjack_graphs':generated_data['analysis']}, 107 | # 'prerender_data':{}, 108 | # 'prerender_images':{}, 109 | # } 110 | 111 | # # Add the new application to the application data 112 | # self.pull_app_data = self.application_data.add_applicaiton( 113 | # problem=generated_data['agent_config']['name'], 114 | # application_data=application_setup 115 | # ) 116 | 117 | # return reply -------------------------------------------------------------------------------- /elsciRL/interaction_loops/state_search.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from tqdm import tqdm 4 | 5 | # ------ Imports ----------------------------------------- 6 | # Agent Setup 7 | from elsciRL.environment_setup.imports import ImportHelper 8 | 9 | # Evaluation standards 10 | from elsciRL.environment_setup.results_table import ResultsTable 11 | from elsciRL.environment_setup.elsciRL_info import elsciRLInfo 12 | 13 | 14 | def episode_loop(Engine, Adapters: dict, local_setup_info: dict, number_episodes: int = 1000, 15 | batch_number: int = 0, observed_states: dict = {}) -> dict: 16 | # --- INIT state space from engine 17 | agent_adapter_name = local_setup_info['agent_type'] + "_" + local_setup_info['adapter_select'] 18 | engine = Engine(local_setup_info) 19 | start_obs = engine.reset() 20 | # --- PRESET elsciRL INFO 21 | # Agent 22 | Imports = ImportHelper(local_setup_info) 23 | agent, agent_type, agent_name, agent_state_adapter = ( 24 | Imports.agent_info(Adapters) 25 | ) 26 | ( 27 | num_train_episodes, 28 | num_test_episodes, 29 | training_action_cap, 30 | testing_action_cap, 31 | reward_signal, 32 | ) = Imports.parameter_info() 33 | 34 | # Training or testing phase flag 35 | train = Imports.training_flag() 36 | 37 | # Mode selection (already initialized) 38 | # --- elsciRL 39 | live_env, observed_states_flag = ( 40 | Imports.live_env_flag() 41 | ) 42 | # Results formatting 43 | results = ResultsTable(local_setup_info) 44 | # elsciRL input function 45 | # - We only want to init trackers on first batch otherwise it resets knowledge 46 | elsciRL = elsciRLInfo(observed_states) 47 | # RENDER AND SUB-GOALS REMOVED COMPLETELY SO SAVE RUN-TIME 48 | 49 | for episode in tqdm(range(0, number_episodes)): 50 | action_history = [] 51 | # --- 52 | # Start observation is used instead of .reset() fn so that this can be overridden for repeat analysis from the same start pos 53 | obs = engine.reset(start_obs=start_obs) 54 | legal_moves = engine.legal_move_generator(obs) 55 | 56 | # LLM agents need to pass the state as a string 57 | if agent_type.split("_")[0] == "LLM": 58 | state = agent_state_adapter.adapter( 59 | state=obs, 60 | legal_moves=legal_moves, 61 | episode_action_history=action_history, 62 | encode=False, 63 | ) 64 | else: 65 | state = agent_state_adapter.adapter( 66 | state=obs, 67 | legal_moves=legal_moves, 68 | episode_action_history=action_history, 69 | encode=True, 70 | ) 71 | # --- 72 | start_time = time.time() 73 | episode_reward: int = 0 74 | # --- 75 | for action in range(0, training_action_cap): 76 | if live_env: 77 | # Agent takes action 78 | legal_moves = engine.legal_move_generator(obs) 79 | agent_action = agent.policy(state, legal_moves) 80 | 81 | if isinstance(agent_action, np.int64): 82 | action_history.append(agent_action.item()) 83 | else: 84 | action_history.append(agent_action) 85 | 86 | next_obs, reward, terminated, _ = engine.step( 87 | state=obs, action=agent_action 88 | ) 89 | 90 | # Can override reward per action with small negative punishment 91 | if reward == 0: 92 | reward = reward_signal[1] 93 | 94 | # Only update observed states if not already observed 95 | if next_obs not in observed_states: 96 | legal_moves = engine.legal_move_generator(next_obs) 97 | # LLM agents need to pass the state as a string 98 | if agent_type.split("_")[0] == "LLM": 99 | next_state = agent_state_adapter.adapter( 100 | state=next_obs, 101 | legal_moves=legal_moves, 102 | episode_action_history=action_history, 103 | encode=False, 104 | ) 105 | else: 106 | next_state = agent_state_adapter.adapter( 107 | state=next_obs, 108 | legal_moves=legal_moves, 109 | episode_action_history=action_history, 110 | encode=True, 111 | ) 112 | # elsciRL trackers 113 | # TODO: Consider adding prior action history to the tracker so that we can 114 | # transform observed data across adapters without loss of information 115 | observed_states = elsciRL.observed_state_tracker( 116 | engine_observation=next_obs, 117 | language_state=agent_state_adapter.adapter( 118 | state=next_obs, 119 | legal_moves=legal_moves, 120 | episode_action_history=action_history, 121 | encode=False, 122 | ), 123 | ) 124 | 125 | episode_reward += reward 126 | if terminated: 127 | break 128 | else: 129 | state = next_state 130 | if live_env: 131 | obs = next_obs 132 | 133 | # If action limit reached 134 | if not terminated: 135 | reward = reward_signal[2] 136 | 137 | end_time = time.time() 138 | try: 139 | agent_results = agent.q_result() 140 | except: 141 | agent_results = [0, 0] 142 | 143 | if live_env: 144 | results.results_per_episode( 145 | agent_name, 146 | None, 147 | episode, 148 | action, 149 | episode_reward, 150 | (end_time - start_time), 151 | action_history, 152 | agent_results[0], 153 | agent_results[1], 154 | ) 155 | # Output GIF image of all episode frames 156 | return observed_states 157 | -------------------------------------------------------------------------------- /elsciRL/analysis/convergence_measure.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from typing import List 3 | 4 | # Define convergence evaluation function 5 | class Convergence_Measure: 6 | def __init__(self, total_num_episodes): 7 | # --- PARAMETERS --- 8 | self.conv_threshold_perc = 5 9 | self.num_prior_epi = int(total_num_episodes/10) 10 | self.num_prior_epi_points = 5 11 | self.plot_convergence_figures = False 12 | # ------------------ 13 | # Ploy display time 14 | self.display_plot_time = 10 15 | 16 | def convergence_check(self, value_list: List[float], player_side: str, visual_save_dir: str): 17 | """ CONVERGENCE CHECK METHODOLOGY 18 | - Goes through each Q value by episode and calculates the percentage change from the previous result 19 | - Because a single point can not provide accurate results, we introduce a system in which N prior output points are checked 20 | - We set the prior check points by setting a range of episodes and evenly space N points between the current episode and the fist check point defined by the range 21 | - We accept that the output has converged if ALL the prior N outputs percentage change is less than our threshold 22 | - The episode for which the output has converged in then the first check point of this providing a systematic numeric convergence evaluation 23 | """ 24 | perc_change_tracker = [] 25 | prior_change_long_term_tracker = [] 26 | conv_met_check = [] 27 | conv_met = False 28 | for n in range(0,len(value_list)): 29 | value = value_list[n] 30 | # First row fixed value 31 | if n == 0: 32 | perc_change = 100 33 | else: 34 | perc_change = abs((value - prior_row_value)/prior_row_value)*100 35 | perc_change_tracker.append(perc_change) 36 | 37 | prior_epi_points_tracker = [] 38 | if n