├── pink ├── __init__.py ├── tonic.py ├── colorednoise.py ├── cnrl.py └── sb3.py ├── pyproject.toml ├── LICENSE ├── examples └── example.py └── README.md /pink/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .cnrl import * 3 | 4 | try: 5 | from .sb3 import * 6 | except: 7 | pass 8 | 9 | try: 10 | from .tonic import * 11 | except: 12 | pass 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pink-noise-rl" 3 | version = "2.0.1" 4 | description = "Pink noise for exploration in reinforcement learning" 5 | authors = ["Onno Eberhard "] 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/martius-lab/pink-noise-rl" 9 | packages = [ 10 | { include = "pink" } 11 | ] 12 | 13 | 14 | [tool.poetry.dependencies] 15 | python = "^3.8" 16 | numpy = "*" 17 | 18 | 19 | [build-system] 20 | requires = ["poetry-core"] 21 | build-backend = "poetry.core.masonry.api" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Onno Eberhard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | """Comparing pink action noise with the default noise on SAC.""" 2 | 3 | import gym 4 | import numpy as np 5 | import torch 6 | from pink import PinkNoiseDist 7 | from stable_baselines3 import SAC 8 | 9 | # Reproducibility 10 | seed = 0 11 | np.random.seed(seed) 12 | torch.manual_seed(seed) 13 | rng = np.random.default_rng(seed) 14 | 15 | # Initialize environment 16 | env = gym.make("MountainCarContinuous-v0") 17 | action_dim = env.action_space.shape[-1] 18 | seq_len = env._max_episode_steps 19 | rng = np.random.default_rng(0) 20 | 21 | # Initialize agents 22 | model_default = SAC("MlpPolicy", env, seed=seed) 23 | model_pink = SAC("MlpPolicy", env, seed=seed) 24 | 25 | # Set action noise 26 | model_pink.actor.action_dist = PinkNoiseDist(seq_len, action_dim, rng=rng) 27 | 28 | # Train agents 29 | model_default.learn(total_timesteps=10_000) 30 | model_pink.learn(total_timesteps=10_000) 31 | 32 | # Evaluate learned policies 33 | N = 100 34 | for name, model in zip(["Default noise\n-------------", "Pink noise\n----------"], [model_default, model_pink]): 35 | solved = 0 36 | for i in range(N): 37 | obs = env.reset() 38 | done = False 39 | while not done: 40 | obs, r, done, _ = env.step(model.predict(obs, deterministic=True)[0]) 41 | if r > 0: 42 | solved += 1 43 | break 44 | 45 | print(name) 46 | print(f"Solved: {solved/N * 100:.0f}%\n") 47 | 48 | 49 | # - Output of this program - 50 | # Default noise 51 | # ------------- 52 | # Solved: 0% 53 | # 54 | # Pink noise 55 | # ---------- 56 | # Solved: 100% 57 | -------------------------------------------------------------------------------- /pink/tonic.py: -------------------------------------------------------------------------------- 1 | """Colored noise implementations for Tonic RL library""" 2 | 3 | import numpy as np 4 | import torch as th 5 | from tonic.torch.agents import MPO 6 | 7 | from .cnrl import ColoredNoiseProcess 8 | 9 | 10 | class MPO_CN(MPO): 11 | """MPO with colored noise exploration""" 12 | def __init__(self, *args, **kwargs): 13 | super().__init__(*args, **kwargs) 14 | 15 | def initialize(self, beta, seq_len, observation_space, action_space, rng=None, seed=None): 16 | """For documentation of beta, seq_len, rng see `pink.sb3.ColoredNoiseDist`.""" 17 | super().initialize(observation_space, action_space, seed) 18 | self.seq_len = seq_len 19 | self.rng = rng 20 | self.action_space = action_space 21 | self.set_beta(beta) 22 | 23 | def set_beta(self, beta): 24 | if np.isscalar(beta): 25 | self.beta = beta 26 | self.gen = ColoredNoiseProcess( 27 | beta=self.beta, size=(self.action_space.shape[0], self.seq_len), rng=self.rng) 28 | else: 29 | self.beta = np.asarray(beta) 30 | self.gen = [ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in self.beta] 31 | 32 | def _step(self, observations): 33 | observations = th.as_tensor(observations, dtype=th.float32) 34 | if np.isscalar(self.beta): 35 | cn_sample = th.tensor(self.gen.sample()).float() 36 | else: 37 | cn_sample = th.tensor([[cnp.sample() for cnp in self.gen]]).float() 38 | with th.no_grad(): 39 | loc = self.model.actor(observations).loc 40 | scale = self.model.actor(observations).scale 41 | return loc + scale*cn_sample 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Colored Action Noise for Deep RL 2 | 3 | This repository contains easy-to-use implementations of pink noise and general colored noise for use as action noise in deep reinforcement learning. Included are the following classes: 4 | - `ColoredNoiseProcess` and `PinkNoiseProcess` for general use, based on the [colorednoise](https://github.com/felixpatzelt/colorednoise) library 5 | - `ColoredActionNoise` and `PinkActionNoise` to be used with deterministic policy algorithms like DDPG and TD3 in Stable Baselines3, both are subclasses of `stable_baselines3.common.noise.ActionNoise` 6 | - `ColoredNoiseDist`, `PinkNoiseDist` to be used with stochastic policy algorithms like SAC in Stable Baselines3 7 | - `MPO_CN` for using colored noise (incl. pink noise) with MPO using the Tonic RL library. 8 | 9 | For more information, please see our paper: [Pink Noise Is All You Need: Colored Noise Exploration in Deep Reinforcement Learning](https://bit.ly/pink-noise-rl) (ICLR 2023 Spotlight). 10 | 11 | ## Installation 12 | You can install the library via pip: 13 | ``` 14 | pip install pink-noise-rl 15 | ``` 16 | Note: In Python, the import statement is simply `import pink`. 17 | 18 | ## Usage 19 | We provide minimal examples for using pink noise on SAC, TD3 and MPO below. An example comparing pink noise with the default action noise of SAC is included in the `examples` directory. 20 | 21 | ### Simple pink noise process 22 | ```python 23 | from pink import PinkNoiseProcess 24 | 25 | noise = PinkNoiseProcess((10, 1000)) # 10-dimensional pink noise process with buffer size 1000 26 | noise.sample() # Sample a single 10-dimensional time step 27 | noise.sample(5000) # Sample 5000 time steps at once. Since the buffer size is 1000, 5 independent signals are stitched together. 28 | ``` 29 | 30 | ### Stable Baselines3: SAC, TD3 31 | This example only works if Stable Baselines3 is installed. 32 | ```python 33 | import gym 34 | from stable_baselines3 import SAC, TD3 35 | 36 | # All classes mentioned above can be imported from `pink` 37 | from pink import PinkNoiseDist, PinkActionNoise 38 | 39 | # Initialize environment 40 | env = gym.make("MountainCarContinuous-v0") 41 | seq_len = env._max_episode_steps 42 | action_dim = env.action_space.shape[-1] 43 | ``` 44 | 45 | #### SAC 46 | ```python 47 | # Initialize agent 48 | model = SAC("MlpPolicy", env) 49 | 50 | # Set action noise 51 | model.actor.action_dist = PinkNoiseDist(seq_len, action_dim) 52 | 53 | # Train agent 54 | model.learn(total_timesteps=100_000) 55 | ``` 56 | 57 | #### TD3 58 | ```python 59 | # Initialize agent 60 | model = TD3("MlpPolicy", env) 61 | 62 | # Set action noise 63 | noise_scale = 0.3 64 | model.action_noise = PinkActionNoise(noise_scale, seq_len, action_dim) 65 | 66 | # Train agent 67 | model.learn(total_timesteps=100_000) 68 | ``` 69 | 70 | ### Tonic: MPO 71 | This example only works if Tonic is installed. 72 | ```python 73 | import gym 74 | from tonic import Trainer 75 | from pink import MPO_CN 76 | 77 | # Initialize environment 78 | env = gym.make("MountainCarContinuous-v0") 79 | seq_len = env._max_episode_steps 80 | 81 | # Initialize agent with pink noise 82 | beta = 1 83 | model = MPO_CN() 84 | model.initialize(beta, seq_len, env.observation_space, env.action_space) 85 | 86 | # Train agent 87 | trainer = tonic.Trainer(steps=100_000) 88 | trainer.initialize(model, env) 89 | trainer.run() 90 | ``` 91 | 92 | 93 | ## Citing 94 | If you use this code in your research, please cite our paper: 95 | ```bibtex 96 | @inproceedings{eberhard-2023-pink, 97 | title = {Pink Noise Is All You Need: Colored Noise Exploration in Deep Reinforcement Learning}, 98 | author = {Eberhard, Onno and Hollenstein, Jakob and Pinneri, Cristina and Martius, Georg}, 99 | booktitle = {Proceedings of the Eleventh International Conference on Learning Representations (ICLR 2023)}, 100 | month = may, 101 | year = {2023}, 102 | url = {https://openreview.net/forum?id=hQ9V5QN27eS} 103 | } 104 | ``` 105 | 106 | If there are any problems, or if you have a question, don't hesitate to open an issue here on GitHub. 107 | -------------------------------------------------------------------------------- /pink/colorednoise.py: -------------------------------------------------------------------------------- 1 | """Colored noise generation script 2 | Modified from colorednoise package: https://github.com/felixpatzelt/colorednoise 3 | """ 4 | 5 | import numpy as np 6 | from numpy.fft import irfft, rfftfreq 7 | 8 | 9 | def powerlaw_psd_gaussian(exponent, size, fmin=0, rng=None): 10 | """Gaussian (1/f)**beta noise. 11 | 12 | Based on the algorithm in: 13 | Timmer, J. and Koenig, M.: 14 | On generating power law noise. 15 | Astron. Astrophys. 300, 707-710 (1995) 16 | 17 | Normalised to unit variance 18 | 19 | Parameters: 20 | ----------- 21 | 22 | exponent : float 23 | The power-spectrum of the generated noise is proportional to 24 | 25 | S(f) = (1 / f)**beta 26 | flicker / pink noise: exponent beta = 1 27 | brown noise: exponent beta = 2 28 | 29 | Furthermore, the autocorrelation decays proportional to lag**-gamma 30 | with gamma = 1 - beta for 0 < beta < 1. 31 | There may be finite-size issues for beta close to one. 32 | 33 | shape : int or iterable 34 | The output has the given shape, and the desired power spectrum in 35 | the last coordinate. That is, the last dimension is taken as time, 36 | and all other components are independent. 37 | 38 | fmin : float, optional 39 | Low-frequency cutoff. 40 | Default: 0 corresponds to original paper. 41 | 42 | The power-spectrum below fmin is flat. fmin is defined relative 43 | to a unit sampling rate (see numpy's rfftfreq). For convenience, 44 | the passed value is mapped to max(fmin, 1/samples) internally 45 | since 1/samples is the lowest possible finite frequency in the 46 | sample. The largest possible value is fmin = 0.5, the Nyquist 47 | frequency. The output for this value is white noise. 48 | 49 | rng : np.random.Generator, optional 50 | Random number generator (for reproducibility). If not passed, a new 51 | random number generator is created by calling 52 | `np.random.default_rng()`. 53 | 54 | 55 | Returns 56 | ------- 57 | out : array 58 | The samples. 59 | 60 | 61 | Examples: 62 | --------- 63 | 64 | >>> # generate 1/f noise == pink noise == flicker noise 65 | >>> import colorednoise as cn 66 | >>> y = cn.powerlaw_psd_gaussian(1, 5) 67 | """ 68 | 69 | # Make sure size is a list so we can iterate it and assign to it. 70 | try: 71 | size = list(size) 72 | except TypeError: 73 | size = [size] 74 | 75 | # The number of samples in each time series 76 | samples = size[-1] 77 | 78 | # Calculate Frequencies (we asume a sample rate of one) 79 | # Use fft functions for real output (-> hermitian spectrum) 80 | f = rfftfreq(samples) 81 | 82 | # Validate / normalise fmin 83 | if 0 <= fmin <= 0.5: 84 | fmin = max(fmin, 1./samples) # Low frequency cutoff 85 | else: 86 | raise ValueError("fmin must be chosen between 0 and 0.5.") 87 | 88 | # Build scaling factors for all frequencies 89 | s_scale = f 90 | ix = np.sum(s_scale < fmin) # Index of the cutoff 91 | if ix and ix < len(s_scale): 92 | s_scale[:ix] = s_scale[ix] 93 | s_scale = s_scale**(-exponent/2.) 94 | 95 | # Calculate theoretical output standard deviation from scaling 96 | w = s_scale[1:].copy() 97 | w[-1] *= (1 + (samples % 2)) / 2. # correct f = +-0.5 98 | sigma = 2 * np.sqrt(np.sum(w**2)) / samples 99 | 100 | # Adjust size to generate one Fourier component per frequency 101 | size[-1] = len(f) 102 | 103 | # Add empty dimension(s) to broadcast s_scale along last 104 | # dimension of generated random power + phase (below) 105 | dims_to_add = len(size) - 1 106 | s_scale = s_scale[(None,) * dims_to_add + (Ellipsis,)] 107 | 108 | # Generate scaled random power + phase 109 | if rng is None: 110 | rng = np.random.default_rng() 111 | sr = rng.normal(scale=s_scale, size=size) 112 | si = rng.normal(scale=s_scale, size=size) 113 | 114 | # If the signal length is even, frequencies +/- 0.5 are equal 115 | # so the coefficient must be real. 116 | if not (samples % 2): 117 | si[..., -1] = 0 118 | sr[..., -1] *= np.sqrt(2) # Fix magnitude 119 | 120 | # Regardless of signal length, the DC component must be real 121 | si[..., 0] = 0 122 | sr[..., 0] *= np.sqrt(2) # Fix magnitude 123 | 124 | # Combine power + corrected phase to Fourier components 125 | s = sr + 1J * si 126 | 127 | # Transform to real time series & scale to unit variance 128 | y = irfft(s, n=samples, axis=-1) / sigma 129 | 130 | return y 131 | -------------------------------------------------------------------------------- /pink/cnrl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from . import colorednoise as cn 4 | 5 | 6 | class ColoredNoiseProcess(): 7 | """Infinite colored noise process. 8 | 9 | Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences the 10 | PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified. 11 | 12 | Methods 13 | ------- 14 | sample(T=1) 15 | Sample `T` timesteps from the colored noise process. 16 | reset() 17 | Reset the buffer with a new time series. 18 | """ 19 | def __init__(self, beta, size, scale=1, max_period=None, rng=None): 20 | """Infinite colored noise process. 21 | 22 | Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences 23 | the PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified. 24 | 25 | Parameters 26 | ---------- 27 | beta : float 28 | Exponent of colored noise power-law spectrum. 29 | size : int or tuple of int 30 | Shape of the sampled colored noise signals. The last dimension (`size[-1]`) specifies the time range, and 31 | is thus ths maximum possible correlation length of the combined signal. 32 | scale : int, optional, by default 1 33 | Scale parameter with which samples are multiplied 34 | max_period : float, optional, by default None 35 | Maximum correlation length of sampled colored noise singals (1 / low-frequency cutoff). If None, it is 36 | automatically set to `size[-1]` (the sequence length). 37 | rng : np.random.Generator, optional 38 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 39 | calling `np.random.default_rng()`. 40 | """ 41 | self.beta = beta 42 | if max_period is None: 43 | self.minimum_frequency = 0 44 | else: 45 | self.minimum_frequency = 1 / max_period 46 | self.scale = scale 47 | self.rng = rng 48 | 49 | # The last component of size is the time index 50 | try: 51 | self.size = list(size) 52 | except TypeError: 53 | self.size = [size] 54 | self.time_steps = self.size[-1] 55 | 56 | # Fill buffer and reset index 57 | self.reset() 58 | 59 | def reset(self): 60 | """Reset the buffer with a new time series.""" 61 | self.buffer = cn.powerlaw_psd_gaussian( 62 | exponent=self.beta, size=self.size, fmin=self.minimum_frequency, rng=self.rng) 63 | self.idx = 0 64 | 65 | def sample(self, T=1): 66 | """ 67 | Sample `T` timesteps from the colored noise process. 68 | 69 | The buffer is automatically refilled when necessary. 70 | 71 | Parameters 72 | ---------- 73 | T : int, optional, by default 1 74 | Number of samples to draw 75 | 76 | Returns 77 | ------- 78 | array_like 79 | Sampled vector of shape `(*size[:-1], T)` 80 | """ 81 | n = 0 82 | ret = [] 83 | while n < T: 84 | if self.idx >= self.time_steps: 85 | self.reset() 86 | m = min(T - n, self.time_steps - self.idx) 87 | ret.append(self.buffer[..., self.idx:(self.idx + m)]) 88 | n += m 89 | self.idx += m 90 | 91 | ret = self.scale * np.concatenate(ret, axis=-1) 92 | return ret if n > 1 else ret[..., 0] 93 | 94 | 95 | class PinkNoiseProcess(ColoredNoiseProcess): 96 | """Infinite pink noise process. 97 | 98 | Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences the 99 | PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified. 100 | 101 | Methods 102 | ------- 103 | sample(T=1) 104 | Sample `T` timesteps from the pink noise process. 105 | reset() 106 | Reset the buffer with a new time series. 107 | """ 108 | def __init__(self, size, scale=1, max_period=None, rng=None): 109 | """Infinite pink noise process. 110 | 111 | Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences 112 | the PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified. 113 | 114 | Parameters 115 | ---------- 116 | size : int or tuple of int 117 | Shape of the sampled pink noise signals. The last dimension (`size[-1]`) specifies the time range, and is 118 | thus ths maximum possible correlation length of the combined signal. 119 | scale : int, optional, by default 1 120 | Scale parameter with which samples are multiplied 121 | max_period : float, optional, by default None 122 | Maximum correlation length of sampled pink noise singals (1 / low-frequency cutoff). If None, it is 123 | automatically set to `size[-1]` (the sequence length). 124 | rng : np.random.Generator, optional 125 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 126 | calling `np.random.default_rng()`. 127 | """ 128 | super().__init__(1, size, scale, max_period, rng) 129 | -------------------------------------------------------------------------------- /pink/sb3.py: -------------------------------------------------------------------------------- 1 | """Colored noise implementations for Stable Baselines3""" 2 | 3 | import numpy as np 4 | import torch as th 5 | from stable_baselines3.common.distributions import SquashedDiagGaussianDistribution 6 | from stable_baselines3.common.noise import ActionNoise 7 | 8 | from .cnrl import ColoredNoiseProcess 9 | 10 | 11 | class ColoredActionNoise(ActionNoise): 12 | def __init__(self, beta, sigma, seq_len, action_dim=None, rng=None): 13 | """Action noise from a colored noise process. 14 | 15 | Parameters 16 | ---------- 17 | beta : float or array_like 18 | Exponent(s) of colored noise power-law spectra. If it is a single float, then `action_dim` has to be 19 | specified and the noise will be sampled in a vectorized manner for each action dimension. If it is 20 | array_like, then it specifies one beta for each action dimension. This allows different betas for different 21 | action dimensions, but sampling might be slower for high-dimensional action spaces. 22 | sigma : float or array_like 23 | Noise scale(s) of colored noise signals. Either a single float to be used for all action dimensions, or 24 | an array_like of the same dimensionality as the action space (one scale for each action dimension). 25 | seq_len : int 26 | Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new 27 | colored noise signal of the same length is sampled. Should usually be set to the episode length 28 | (horizon) of the RL task. 29 | action_dim : int, optional 30 | Dimensionality of the action space. If passed, `beta` has to be a single float and the noise will be 31 | sampled in a vectorized manner for each action dimension. 32 | rng : np.random.Generator, optional 33 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 34 | calling `np.random.default_rng()`. 35 | """ 36 | super().__init__() 37 | assert (action_dim is not None) == np.isscalar(beta), \ 38 | "`action_dim` has to be specified if and only if `beta` is a scalar." 39 | 40 | self.sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma) 41 | 42 | if np.isscalar(beta): 43 | self.beta = beta 44 | self.gen = ColoredNoiseProcess(beta=self.beta, scale=self.sigma, size=(action_dim, seq_len), rng=rng) 45 | else: 46 | self.beta = np.asarray(beta) 47 | self.gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng) 48 | for b, s in zip(self.beta, self.sigma)] 49 | 50 | def __call__(self) -> np.ndarray: 51 | return self.gen.sample() if np.isscalar(self.beta) else np.asarray([g.sample() for g in self.gen]) 52 | 53 | def __repr__(self) -> str: 54 | return f"ColoredActionNoise(beta={self.beta}, sigma={self.sigma})" 55 | 56 | 57 | class PinkActionNoise(ColoredActionNoise): 58 | def __init__(self, sigma, seq_len, action_dim, rng=None): 59 | """Action noise from a pink noise process. 60 | 61 | Parameters 62 | ---------- 63 | sigma : float or array_like 64 | Noise scale(s) of colored noise signals. Either a single float to be used for all action dimensions, or 65 | an array_like of the same dimensionality as the action space (one scale for each action dimension). 66 | seq_len : int 67 | Length of sampled pink noise signals. If sampled for longer than `seq_len` steps, a new 68 | pink noise signal of the same length is sampled. Should usually be set to the episode length 69 | (horizon) of the RL task. 70 | action_dim : int 71 | Dimensionality of the action space. 72 | rng : np.random.Generator, optional 73 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 74 | calling `np.random.default_rng()`. 75 | """ 76 | super().__init__(1, sigma, seq_len, action_dim, rng) 77 | 78 | 79 | class ColoredNoiseDist(SquashedDiagGaussianDistribution): 80 | def __init__(self, beta, seq_len, action_dim=None, rng=None, epsilon=1e-6): 81 | """ 82 | Gaussian colored noise distribution for using colored action noise with stochastic policies. 83 | 84 | The colored noise is only used for sampling actions. In all other respects, this class acts like its parent 85 | class (`SquashedDiagGaussianDistribution`). 86 | 87 | Parameters 88 | ---------- 89 | beta : float or array_like 90 | Exponent(s) of colored noise power-law spectra. If it is a single float, then `action_dim` has to be 91 | specified and the noise will be sampled in a vectorized manner for each action dimension. If it is 92 | array_like, then it specifies one beta for each action dimension. This allows different betas for different 93 | action dimensions, but sampling might be slower for high-dimensional action spaces. 94 | seq_len : int 95 | Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new 96 | colored noise signal of the same length is sampled. Should usually be set to the episode length 97 | (horizon) of the RL task. 98 | action_dim : int, optional 99 | Dimensionality of the action space. If passed, `beta` has to be a single float and the noise will be 100 | sampled in a vectorized manner for each action dimension. 101 | rng : np.random.Generator, optional 102 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 103 | calling `np.random.default_rng()`. 104 | epsilon : float, optional, by default 1e-6 105 | A small value to avoid NaN due to numerical imprecision. 106 | """ 107 | assert (action_dim is not None) == np.isscalar(beta), \ 108 | "`action_dim` has to be specified if and only if `beta` is a scalar." 109 | 110 | if np.isscalar(beta): 111 | super().__init__(action_dim, epsilon) 112 | self.beta = beta 113 | self.gen = ColoredNoiseProcess(beta=self.beta, size=(action_dim, seq_len), rng=rng) 114 | else: 115 | super().__init__(len(beta), epsilon) 116 | self.beta = np.asarray(beta) 117 | self.gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self.beta] 118 | 119 | def sample(self) -> th.Tensor: 120 | if np.isscalar(self.beta): 121 | cn_sample = th.tensor(self.gen.sample()).float() 122 | else: 123 | cn_sample = th.tensor([cnp.sample() for cnp in self.gen]).float() 124 | self.gaussian_actions = self.distribution.mean + self.distribution.stddev*cn_sample 125 | return th.tanh(self.gaussian_actions) 126 | 127 | def __repr__(self) -> str: 128 | return f"ColoredNoiseDist(beta={self.beta})" 129 | 130 | 131 | class PinkNoiseDist(ColoredNoiseDist): 132 | def __init__(self, seq_len, action_dim, rng=None, epsilon=1e-6): 133 | """ 134 | Gaussian pink noise distribution for using pink action noise with stochastic policies. 135 | 136 | The pink noise is only used for sampling actions. In all other respects, this class acts like its parent 137 | class (`SquashedDiagGaussianDistribution`). 138 | 139 | Parameters 140 | ---------- 141 | seq_len : int 142 | Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new 143 | colored noise signal of the same length is sampled. Should usually be set to the episode length 144 | (horizon) of the RL task. 145 | action_dim : int 146 | Dimensionality of the action space. 147 | rng : np.random.Generator, optional 148 | Random number generator (for reproducibility). If not passed, a new random number generator is created by 149 | calling `np.random.default_rng()`. 150 | epsilon : float, optional, by default 1e-6 151 | A small value to avoid NaN due to numerical imprecision. 152 | """ 153 | super().__init__(1, seq_len, action_dim, rng, epsilon) 154 | --------------------------------------------------------------------------------