├── pink
    ├── __init__.py
    ├── tonic.py
    ├── colorednoise.py
    ├── cnrl.py
    └── sb3.py
├── pyproject.toml
├── LICENSE
├── examples
    └── example.py
└── README.md


/pink/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .cnrl import *
 3 | 
 4 | try:
 5 |     from .sb3 import *
 6 | except:
 7 |     pass
 8 | 
 9 | try:
10 |     from .tonic import *
11 | except:
12 |     pass
13 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pink-noise-rl"
 3 | version = "2.0.1"
 4 | description = "Pink noise for exploration in reinforcement learning"
 5 | authors = ["Onno Eberhard <onnoeberhard@gmail.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | repository = "https://github.com/martius-lab/pink-noise-rl"
 9 | packages = [
10 |     { include = "pink" }
11 | ]
12 | 
13 | 
14 | [tool.poetry.dependencies]
15 | python = "^3.8"
16 | numpy = "*"
17 | 
18 | 
19 | [build-system]
20 | requires = ["poetry-core"]
21 | build-backend = "poetry.core.masonry.api"
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Onno Eberhard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
 1 | """Comparing pink action noise with the default noise on SAC."""
 2 | 
 3 | import gym
 4 | import numpy as np
 5 | import torch
 6 | from pink import PinkNoiseDist
 7 | from stable_baselines3 import SAC
 8 | 
 9 | # Reproducibility
10 | seed = 0
11 | np.random.seed(seed)
12 | torch.manual_seed(seed)
13 | rng = np.random.default_rng(seed)
14 | 
15 | # Initialize environment
16 | env = gym.make("MountainCarContinuous-v0")
17 | action_dim = env.action_space.shape[-1]
18 | seq_len = env._max_episode_steps
19 | rng = np.random.default_rng(0)
20 | 
21 | # Initialize agents
22 | model_default = SAC("MlpPolicy", env, seed=seed)
23 | model_pink = SAC("MlpPolicy", env, seed=seed)
24 | 
25 | # Set action noise
26 | model_pink.actor.action_dist = PinkNoiseDist(seq_len, action_dim, rng=rng)
27 | 
28 | # Train agents
29 | model_default.learn(total_timesteps=10_000)
30 | model_pink.learn(total_timesteps=10_000)
31 | 
32 | # Evaluate learned policies
33 | N = 100
34 | for name, model in zip(["Default noise\n-------------", "Pink noise\n----------"], [model_default, model_pink]):
35 |     solved = 0
36 |     for i in range(N):
37 |         obs = env.reset()
38 |         done = False
39 |         while not done:
40 |             obs, r, done, _ = env.step(model.predict(obs, deterministic=True)[0])
41 |             if r > 0:
42 |                 solved += 1
43 |                 break
44 | 
45 |     print(name)
46 |     print(f"Solved: {solved/N * 100:.0f}%\n")
47 | 
48 | 
49 | # - Output of this program -
50 | # Default noise
51 | # -------------
52 | # Solved: 0%
53 | #
54 | # Pink noise
55 | # ----------
56 | # Solved: 100%
57 | 


--------------------------------------------------------------------------------
/pink/tonic.py:
--------------------------------------------------------------------------------
 1 | """Colored noise implementations for Tonic RL library"""
 2 | 
 3 | import numpy as np
 4 | import torch as th
 5 | from tonic.torch.agents import MPO
 6 | 
 7 | from .cnrl import ColoredNoiseProcess
 8 | 
 9 | 
10 | class MPO_CN(MPO):
11 |     """MPO with colored noise exploration"""
12 |     def __init__(self, *args, **kwargs):
13 |         super().__init__(*args, **kwargs)
14 | 
15 |     def initialize(self, beta, seq_len, observation_space, action_space, rng=None, seed=None):
16 |         """For documentation of beta, seq_len, rng see `pink.sb3.ColoredNoiseDist`."""
17 |         super().initialize(observation_space, action_space, seed)
18 |         self.seq_len = seq_len
19 |         self.rng = rng
20 |         self.action_space = action_space
21 |         self.set_beta(beta)
22 | 
23 |     def set_beta(self, beta):
24 |         if np.isscalar(beta):
25 |             self.beta = beta
26 |             self.gen = ColoredNoiseProcess(
27 |                 beta=self.beta, size=(self.action_space.shape[0], self.seq_len), rng=self.rng)
28 |         else:
29 |             self.beta = np.asarray(beta)
30 |             self.gen = [ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in self.beta]
31 | 
32 |     def _step(self, observations):
33 |         observations = th.as_tensor(observations, dtype=th.float32)
34 |         if np.isscalar(self.beta):
35 |             cn_sample = th.tensor(self.gen.sample()).float()
36 |         else:
37 |             cn_sample = th.tensor([[cnp.sample() for cnp in self.gen]]).float()
38 |         with th.no_grad():
39 |             loc = self.model.actor(observations).loc
40 |             scale = self.model.actor(observations).scale
41 |             return loc + scale*cn_sample
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Colored Action Noise for Deep RL
  2 | 
  3 | This repository contains easy-to-use implementations of pink noise and general colored noise for use as action noise in deep reinforcement learning. Included are the following classes:
  4 | - `ColoredNoiseProcess` and `PinkNoiseProcess` for general use, based on the [colorednoise](https://github.com/felixpatzelt/colorednoise) library
  5 | - `ColoredActionNoise` and `PinkActionNoise` to be used with deterministic policy algorithms like DDPG and TD3 in Stable Baselines3, both are subclasses of `stable_baselines3.common.noise.ActionNoise`
  6 | - `ColoredNoiseDist`, `PinkNoiseDist` to be used with stochastic policy algorithms like SAC in Stable Baselines3
  7 | - `MPO_CN` for using colored noise (incl. pink noise) with MPO using the Tonic RL library.
  8 | 
  9 | For more information, please see our paper: [Pink Noise Is All You Need: Colored Noise Exploration in Deep Reinforcement Learning](https://bit.ly/pink-noise-rl) (ICLR 2023 Spotlight).
 10 | 
 11 | ## Installation
 12 | You can install the library via pip:
 13 | ```
 14 | pip install pink-noise-rl
 15 | ```
 16 | Note: In Python, the import statement is simply `import pink`.
 17 | 
 18 | ## Usage
 19 | We provide minimal examples for using pink noise on SAC, TD3 and MPO below. An example comparing pink noise with the default action noise of SAC is included in the `examples` directory.
 20 | 
 21 | ### Simple pink noise process
 22 | ```python
 23 | from pink import PinkNoiseProcess
 24 | 
 25 | noise = PinkNoiseProcess((10, 1000))  # 10-dimensional pink noise process with buffer size 1000
 26 | noise.sample()      # Sample a single 10-dimensional time step
 27 | noise.sample(5000)  # Sample 5000 time steps at once. Since the buffer size is 1000, 5 independent signals are stitched together.
 28 | ```
 29 | 
 30 | ### Stable Baselines3: SAC, TD3
 31 | This example only works if Stable Baselines3 is installed.
 32 | ```python
 33 | import gym
 34 | from stable_baselines3 import SAC, TD3
 35 | 
 36 | # All classes mentioned above can be imported from `pink`
 37 | from pink import PinkNoiseDist, PinkActionNoise
 38 | 
 39 | # Initialize environment
 40 | env = gym.make("MountainCarContinuous-v0")
 41 | seq_len = env._max_episode_steps
 42 | action_dim = env.action_space.shape[-1]
 43 | ```
 44 | 
 45 | #### SAC
 46 | ```python
 47 | # Initialize agent
 48 | model = SAC("MlpPolicy", env)
 49 | 
 50 | # Set action noise
 51 | model.actor.action_dist = PinkNoiseDist(seq_len, action_dim)
 52 | 
 53 | # Train agent
 54 | model.learn(total_timesteps=100_000)
 55 | ```
 56 | 
 57 | #### TD3
 58 | ```python
 59 | # Initialize agent
 60 | model = TD3("MlpPolicy", env)
 61 | 
 62 | # Set action noise
 63 | noise_scale = 0.3
 64 | model.action_noise = PinkActionNoise(noise_scale, seq_len, action_dim)
 65 | 
 66 | # Train agent
 67 | model.learn(total_timesteps=100_000)
 68 | ```
 69 | 
 70 | ### Tonic: MPO
 71 | This example only works if Tonic is installed.
 72 | ```python
 73 | import gym
 74 | from tonic import Trainer
 75 | from pink import MPO_CN
 76 | 
 77 | # Initialize environment
 78 | env = gym.make("MountainCarContinuous-v0")
 79 | seq_len = env._max_episode_steps
 80 | 
 81 | # Initialize agent with pink noise
 82 | beta = 1
 83 | model = MPO_CN()
 84 | model.initialize(beta, seq_len, env.observation_space, env.action_space)
 85 | 
 86 | # Train agent
 87 | trainer = tonic.Trainer(steps=100_000)
 88 | trainer.initialize(model, env)
 89 | trainer.run()
 90 | ```
 91 | 
 92 | 
 93 | ## Citing
 94 | If you use this code in your research, please cite our paper:
 95 | ```bibtex
 96 | @inproceedings{eberhard-2023-pink,
 97 |   title = {Pink Noise Is All You Need: Colored Noise Exploration in Deep Reinforcement Learning},
 98 |   author = {Eberhard, Onno and Hollenstein, Jakob and Pinneri, Cristina and Martius, Georg},
 99 |   booktitle = {Proceedings of the Eleventh International Conference on Learning Representations (ICLR 2023)},
100 |   month = may,
101 |   year = {2023},
102 |   url = {https://openreview.net/forum?id=hQ9V5QN27eS}
103 | }
104 | ```
105 | 
106 | If there are any problems, or if you have a question, don't hesitate to open an issue here on GitHub.
107 | 


--------------------------------------------------------------------------------
/pink/colorednoise.py:
--------------------------------------------------------------------------------
  1 | """Colored noise generation script
  2 | Modified from colorednoise package: https://github.com/felixpatzelt/colorednoise
  3 | """
  4 | 
  5 | import numpy as np
  6 | from numpy.fft import irfft, rfftfreq
  7 | 
  8 | 
  9 | def powerlaw_psd_gaussian(exponent, size, fmin=0, rng=None):
 10 |     """Gaussian (1/f)**beta noise.
 11 | 
 12 |     Based on the algorithm in:
 13 |     Timmer, J. and Koenig, M.:
 14 |     On generating power law noise.
 15 |     Astron. Astrophys. 300, 707-710 (1995)
 16 | 
 17 |     Normalised to unit variance
 18 | 
 19 |     Parameters:
 20 |     -----------
 21 | 
 22 |     exponent : float
 23 |         The power-spectrum of the generated noise is proportional to
 24 | 
 25 |         S(f) = (1 / f)**beta
 26 |         flicker / pink noise:   exponent beta = 1
 27 |         brown noise:            exponent beta = 2
 28 | 
 29 |         Furthermore, the autocorrelation decays proportional to lag**-gamma
 30 |         with gamma = 1 - beta for 0 < beta < 1.
 31 |         There may be finite-size issues for beta close to one.
 32 | 
 33 |     shape : int or iterable
 34 |         The output has the given shape, and the desired power spectrum in
 35 |         the last coordinate. That is, the last dimension is taken as time,
 36 |         and all other components are independent.
 37 | 
 38 |     fmin : float, optional
 39 |         Low-frequency cutoff.
 40 |         Default: 0 corresponds to original paper.
 41 | 
 42 |         The power-spectrum below fmin is flat. fmin is defined relative
 43 |         to a unit sampling rate (see numpy's rfftfreq). For convenience,
 44 |         the passed value is mapped to max(fmin, 1/samples) internally
 45 |         since 1/samples is the lowest possible finite frequency in the
 46 |         sample. The largest possible value is fmin = 0.5, the Nyquist
 47 |         frequency. The output for this value is white noise.
 48 | 
 49 |     rng : np.random.Generator, optional
 50 |         Random number generator (for reproducibility). If not passed, a new
 51 |         random number generator is created by calling
 52 |         `np.random.default_rng()`.
 53 | 
 54 | 
 55 |     Returns
 56 |     -------
 57 |     out : array
 58 |         The samples.
 59 | 
 60 | 
 61 |     Examples:
 62 |     ---------
 63 | 
 64 |     >>> # generate 1/f noise == pink noise == flicker noise
 65 |     >>> import colorednoise as cn
 66 |     >>> y = cn.powerlaw_psd_gaussian(1, 5)
 67 |     """
 68 | 
 69 |     # Make sure size is a list so we can iterate it and assign to it.
 70 |     try:
 71 |         size = list(size)
 72 |     except TypeError:
 73 |         size = [size]
 74 | 
 75 |     # The number of samples in each time series
 76 |     samples = size[-1]
 77 | 
 78 |     # Calculate Frequencies (we asume a sample rate of one)
 79 |     # Use fft functions for real output (-> hermitian spectrum)
 80 |     f = rfftfreq(samples)
 81 | 
 82 |     # Validate / normalise fmin
 83 |     if 0 <= fmin <= 0.5:
 84 |         fmin = max(fmin, 1./samples)    # Low frequency cutoff
 85 |     else:
 86 |         raise ValueError("fmin must be chosen between 0 and 0.5.")
 87 | 
 88 |     # Build scaling factors for all frequencies
 89 |     s_scale = f
 90 |     ix = np.sum(s_scale < fmin)   # Index of the cutoff
 91 |     if ix and ix < len(s_scale):
 92 |         s_scale[:ix] = s_scale[ix]
 93 |     s_scale = s_scale**(-exponent/2.)
 94 | 
 95 |     # Calculate theoretical output standard deviation from scaling
 96 |     w = s_scale[1:].copy()
 97 |     w[-1] *= (1 + (samples % 2)) / 2.    # correct f = +-0.5
 98 |     sigma = 2 * np.sqrt(np.sum(w**2)) / samples
 99 | 
100 |     # Adjust size to generate one Fourier component per frequency
101 |     size[-1] = len(f)
102 | 
103 |     # Add empty dimension(s) to broadcast s_scale along last
104 |     # dimension of generated random power + phase (below)
105 |     dims_to_add = len(size) - 1
106 |     s_scale = s_scale[(None,) * dims_to_add + (Ellipsis,)]
107 | 
108 |     # Generate scaled random power + phase
109 |     if rng is None:
110 |         rng = np.random.default_rng()
111 |     sr = rng.normal(scale=s_scale, size=size)
112 |     si = rng.normal(scale=s_scale, size=size)
113 | 
114 |     # If the signal length is even, frequencies +/- 0.5 are equal
115 |     # so the coefficient must be real.
116 |     if not (samples % 2):
117 |         si[..., -1] = 0
118 |         sr[..., -1] *= np.sqrt(2)    # Fix magnitude
119 | 
120 |     # Regardless of signal length, the DC component must be real
121 |     si[..., 0] = 0
122 |     sr[..., 0] *= np.sqrt(2)    # Fix magnitude
123 | 
124 |     # Combine power + corrected phase to Fourier components
125 |     s = sr + 1J * si
126 | 
127 |     # Transform to real time series & scale to unit variance
128 |     y = irfft(s, n=samples, axis=-1) / sigma
129 | 
130 |     return y
131 | 


--------------------------------------------------------------------------------
/pink/cnrl.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from . import colorednoise as cn
  4 | 
  5 | 
  6 | class ColoredNoiseProcess():
  7 |     """Infinite colored noise process.
  8 | 
  9 |     Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences the
 10 |     PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified.
 11 | 
 12 |     Methods
 13 |     -------
 14 |     sample(T=1)
 15 |         Sample `T` timesteps from the colored noise process.
 16 |     reset()
 17 |         Reset the buffer with a new time series.
 18 |     """
 19 |     def __init__(self, beta, size, scale=1, max_period=None, rng=None):
 20 |         """Infinite colored noise process.
 21 | 
 22 |         Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences
 23 |         the PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified.
 24 | 
 25 |         Parameters
 26 |         ----------
 27 |         beta : float
 28 |             Exponent of colored noise power-law spectrum.
 29 |         size : int or tuple of int
 30 |             Shape of the sampled colored noise signals. The last dimension (`size[-1]`) specifies the time range, and
 31 |             is thus ths maximum possible correlation length of the combined signal.
 32 |         scale : int, optional, by default 1
 33 |             Scale parameter with which samples are multiplied
 34 |         max_period : float, optional, by default None
 35 |             Maximum correlation length of sampled colored noise singals (1 / low-frequency cutoff). If None, it is
 36 |             automatically set to `size[-1]` (the sequence length).
 37 |         rng : np.random.Generator, optional
 38 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
 39 |             calling `np.random.default_rng()`.
 40 |         """
 41 |         self.beta = beta
 42 |         if max_period is None:
 43 |             self.minimum_frequency = 0
 44 |         else:
 45 |             self.minimum_frequency = 1 / max_period
 46 |         self.scale = scale
 47 |         self.rng = rng
 48 | 
 49 |         # The last component of size is the time index
 50 |         try:
 51 |             self.size = list(size)
 52 |         except TypeError:
 53 |             self.size = [size]
 54 |         self.time_steps = self.size[-1]
 55 | 
 56 |         # Fill buffer and reset index
 57 |         self.reset()
 58 | 
 59 |     def reset(self):
 60 |         """Reset the buffer with a new time series."""
 61 |         self.buffer = cn.powerlaw_psd_gaussian(
 62 |                 exponent=self.beta, size=self.size, fmin=self.minimum_frequency, rng=self.rng)
 63 |         self.idx = 0
 64 | 
 65 |     def sample(self, T=1):
 66 |         """
 67 |         Sample `T` timesteps from the colored noise process.
 68 | 
 69 |         The buffer is automatically refilled when necessary.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         T : int, optional, by default 1
 74 |             Number of samples to draw
 75 | 
 76 |         Returns
 77 |         -------
 78 |         array_like
 79 |             Sampled vector of shape `(*size[:-1], T)`
 80 |         """
 81 |         n = 0
 82 |         ret = []
 83 |         while n < T:
 84 |             if self.idx >= self.time_steps:
 85 |                 self.reset()
 86 |             m = min(T - n, self.time_steps - self.idx)
 87 |             ret.append(self.buffer[..., self.idx:(self.idx + m)])
 88 |             n += m
 89 |             self.idx += m
 90 | 
 91 |         ret = self.scale * np.concatenate(ret, axis=-1)
 92 |         return ret if n > 1 else ret[..., 0]
 93 | 
 94 | 
 95 | class PinkNoiseProcess(ColoredNoiseProcess):
 96 |     """Infinite pink noise process.
 97 | 
 98 |     Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences the
 99 |     PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified.
100 | 
101 |     Methods
102 |     -------
103 |     sample(T=1)
104 |         Sample `T` timesteps from the pink noise process.
105 |     reset()
106 |         Reset the buffer with a new time series.
107 |     """
108 |     def __init__(self, size, scale=1, max_period=None, rng=None):
109 |         """Infinite pink noise process.
110 | 
111 |         Implemented as a buffer: every `size[-1]` samples, a cut to a new time series starts. As this cut influences
112 |         the PSD of the combined signal, the maximum period (1 / low-frequency cutoff) can be specified.
113 | 
114 |         Parameters
115 |         ----------
116 |         size : int or tuple of int
117 |             Shape of the sampled pink noise signals. The last dimension (`size[-1]`) specifies the time range, and is
118 |             thus ths maximum possible correlation length of the combined signal.
119 |         scale : int, optional, by default 1
120 |             Scale parameter with which samples are multiplied
121 |         max_period : float, optional, by default None
122 |             Maximum correlation length of sampled pink noise singals (1 / low-frequency cutoff). If None, it is
123 |             automatically set to `size[-1]` (the sequence length).
124 |         rng : np.random.Generator, optional
125 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
126 |             calling `np.random.default_rng()`.
127 |         """
128 |         super().__init__(1, size, scale, max_period, rng)
129 | 


--------------------------------------------------------------------------------
/pink/sb3.py:
--------------------------------------------------------------------------------
  1 | """Colored noise implementations for Stable Baselines3"""
  2 | 
  3 | import numpy as np
  4 | import torch as th
  5 | from stable_baselines3.common.distributions import SquashedDiagGaussianDistribution
  6 | from stable_baselines3.common.noise import ActionNoise
  7 | 
  8 | from .cnrl import ColoredNoiseProcess
  9 | 
 10 | 
 11 | class ColoredActionNoise(ActionNoise):
 12 |     def __init__(self, beta, sigma, seq_len, action_dim=None, rng=None):
 13 |         """Action noise from a colored noise process.
 14 | 
 15 |         Parameters
 16 |         ----------
 17 |         beta : float or array_like
 18 |             Exponent(s) of colored noise power-law spectra. If it is a single float, then `action_dim` has to be
 19 |             specified and the noise will be sampled in a vectorized manner for each action dimension. If it is
 20 |             array_like, then it specifies one beta for each action dimension. This allows different betas for different
 21 |             action dimensions, but sampling might be slower for high-dimensional action spaces.
 22 |         sigma : float or array_like
 23 |             Noise scale(s) of colored noise signals. Either a single float to be used for all action dimensions, or
 24 |             an array_like of the same dimensionality as the action space (one scale for each action dimension).
 25 |         seq_len : int
 26 |             Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new
 27 |             colored noise signal of the same length is sampled. Should usually be set to the episode length
 28 |             (horizon) of the RL task.
 29 |         action_dim : int, optional
 30 |             Dimensionality of the action space. If passed, `beta` has to be a single float and the noise will be
 31 |             sampled in a vectorized manner for each action dimension.
 32 |         rng : np.random.Generator, optional
 33 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
 34 |             calling `np.random.default_rng()`.
 35 |         """
 36 |         super().__init__()
 37 |         assert (action_dim is not None) == np.isscalar(beta), \
 38 |             "`action_dim` has to be specified if and only if `beta` is a scalar."
 39 | 
 40 |         self.sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma)
 41 | 
 42 |         if np.isscalar(beta):
 43 |             self.beta = beta
 44 |             self.gen = ColoredNoiseProcess(beta=self.beta, scale=self.sigma, size=(action_dim, seq_len), rng=rng)
 45 |         else:
 46 |             self.beta = np.asarray(beta)
 47 |             self.gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng)
 48 |                         for b, s in zip(self.beta, self.sigma)]
 49 | 
 50 |     def __call__(self) -> np.ndarray:
 51 |         return self.gen.sample() if np.isscalar(self.beta) else np.asarray([g.sample() for g in self.gen])
 52 | 
 53 |     def __repr__(self) -> str:
 54 |         return f"ColoredActionNoise(beta={self.beta}, sigma={self.sigma})"
 55 | 
 56 | 
 57 | class PinkActionNoise(ColoredActionNoise):
 58 |     def __init__(self, sigma, seq_len, action_dim, rng=None):
 59 |         """Action noise from a pink noise process.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         sigma : float or array_like
 64 |             Noise scale(s) of colored noise signals. Either a single float to be used for all action dimensions, or
 65 |             an array_like of the same dimensionality as the action space (one scale for each action dimension).
 66 |         seq_len : int
 67 |             Length of sampled pink noise signals. If sampled for longer than `seq_len` steps, a new
 68 |             pink noise signal of the same length is sampled. Should usually be set to the episode length
 69 |             (horizon) of the RL task.
 70 |         action_dim : int
 71 |             Dimensionality of the action space.
 72 |         rng : np.random.Generator, optional
 73 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
 74 |             calling `np.random.default_rng()`.
 75 |         """
 76 |         super().__init__(1, sigma, seq_len, action_dim, rng)
 77 | 
 78 | 
 79 | class ColoredNoiseDist(SquashedDiagGaussianDistribution):
 80 |     def __init__(self, beta, seq_len, action_dim=None, rng=None, epsilon=1e-6):
 81 |         """
 82 |         Gaussian colored noise distribution for using colored action noise with stochastic policies.
 83 | 
 84 |         The colored noise is only used for sampling actions. In all other respects, this class acts like its parent
 85 |         class (`SquashedDiagGaussianDistribution`).
 86 | 
 87 |         Parameters
 88 |         ----------
 89 |         beta : float or array_like
 90 |             Exponent(s) of colored noise power-law spectra. If it is a single float, then `action_dim` has to be
 91 |             specified and the noise will be sampled in a vectorized manner for each action dimension. If it is
 92 |             array_like, then it specifies one beta for each action dimension. This allows different betas for different
 93 |             action dimensions, but sampling might be slower for high-dimensional action spaces.
 94 |         seq_len : int
 95 |             Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new
 96 |             colored noise signal of the same length is sampled. Should usually be set to the episode length
 97 |             (horizon) of the RL task.
 98 |         action_dim : int, optional
 99 |             Dimensionality of the action space. If passed, `beta` has to be a single float and the noise will be
100 |             sampled in a vectorized manner for each action dimension.
101 |         rng : np.random.Generator, optional
102 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
103 |             calling `np.random.default_rng()`.
104 |         epsilon : float, optional, by default 1e-6
105 |             A small value to avoid NaN due to numerical imprecision.
106 |         """
107 |         assert (action_dim is not None) == np.isscalar(beta), \
108 |             "`action_dim` has to be specified if and only if `beta` is a scalar."
109 | 
110 |         if np.isscalar(beta):
111 |             super().__init__(action_dim, epsilon)
112 |             self.beta = beta
113 |             self.gen = ColoredNoiseProcess(beta=self.beta, size=(action_dim, seq_len), rng=rng)
114 |         else:
115 |             super().__init__(len(beta), epsilon)
116 |             self.beta = np.asarray(beta)
117 |             self.gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self.beta]
118 | 
119 |     def sample(self) -> th.Tensor:
120 |         if np.isscalar(self.beta):
121 |             cn_sample = th.tensor(self.gen.sample()).float()
122 |         else:
123 |             cn_sample = th.tensor([cnp.sample() for cnp in self.gen]).float()
124 |         self.gaussian_actions = self.distribution.mean + self.distribution.stddev*cn_sample
125 |         return th.tanh(self.gaussian_actions)
126 | 
127 |     def __repr__(self) -> str:
128 |         return f"ColoredNoiseDist(beta={self.beta})"
129 | 
130 | 
131 | class PinkNoiseDist(ColoredNoiseDist):
132 |     def __init__(self, seq_len, action_dim, rng=None, epsilon=1e-6):
133 |         """
134 |         Gaussian pink noise distribution for using pink action noise with stochastic policies.
135 | 
136 |         The pink noise is only used for sampling actions. In all other respects, this class acts like its parent
137 |         class (`SquashedDiagGaussianDistribution`).
138 | 
139 |         Parameters
140 |         ----------
141 |         seq_len : int
142 |             Length of sampled colored noise signals. If sampled for longer than `seq_len` steps, a new
143 |             colored noise signal of the same length is sampled. Should usually be set to the episode length
144 |             (horizon) of the RL task.
145 |         action_dim : int
146 |             Dimensionality of the action space.
147 |         rng : np.random.Generator, optional
148 |             Random number generator (for reproducibility). If not passed, a new random number generator is created by
149 |             calling `np.random.default_rng()`.
150 |         epsilon : float, optional, by default 1e-6
151 |             A small value to avoid NaN due to numerical imprecision.
152 |         """
153 |         super().__init__(1, seq_len, action_dim, rng, epsilon)
154 | 


--------------------------------------------------------------------------------