├── .gitignore
├── README.md
├── main.py
├── run.py
├── space_conversion.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | logs_*
2 | *.pyc
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TRPO
2 | 
3 | This repo implements TRPO agent ( http://arxiv.org/abs/1502.05477 ).
4 | 
5 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from utils import *
  2 | import numpy as np
  3 | import random
  4 | import tensorflow as tf
  5 | import time
  6 | import os
  7 | import logging
  8 | import gym
  9 | from gym import envs, scoreboard
 10 | from gym.spaces import Discrete, Box
 11 | import prettytensor as pt
 12 | from space_conversion import SpaceConversionEnv
 13 | import tempfile
 14 | import sys
 15 | 
 16 | class TRPOAgent(object):
 17 | 
 18 |     config = dict2(**{
 19 |         "timesteps_per_batch": 1000,
 20 |         "max_pathlength": 10000,
 21 |         "max_kl": 0.01,
 22 |         "cg_damping": 0.1,
 23 |         "gamma": 0.95})
 24 | 
 25 |     def __init__(self, env):
 26 |         self.env = env
 27 |         if not isinstance(env.observation_space, Box) or \
 28 |            not isinstance(env.action_space, Discrete):
 29 |             print("Incompatible spaces.")
 30 |             exit(-1)
 31 |         print("Observation Space", env.observation_space)
 32 |         print("Action Space", env.action_space)
 33 |         self.session = tf.Session()
 34 |         self.end_count = 0
 35 |         self.train = True
 36 |         self.obs = obs = tf.placeholder(
 37 |             dtype, shape=[
 38 |                 None, 2 * env.observation_space.shape[0] + env.action_space.n], name="obs")
 39 |         self.prev_obs = np.zeros((1, env.observation_space.shape[0]))
 40 |         self.prev_action = np.zeros((1, env.action_space.n))
 41 |         self.action = action = tf.placeholder(tf.int64, shape=[None], name="action")
 42 |         self.advant = advant = tf.placeholder(dtype, shape=[None], name="advant")
 43 |         self.oldaction_dist = oldaction_dist = tf.placeholder(dtype, shape=[None, env.action_space.n], name="oldaction_dist")
 44 | 
 45 |         # Create neural network.
 46 |         action_dist_n, _ = (pt.wrap(self.obs).
 47 |                             fully_connected(64, activation_fn=tf.nn.tanh).
 48 |                             softmax_classifier(env.action_space.n))
 49 |         eps = 1e-6
 50 |         self.action_dist_n = action_dist_n
 51 |         N = tf.shape(obs)[0]
 52 |         p_n = slice_2d(action_dist_n, tf.range(0, N), action)
 53 |         oldp_n = slice_2d(oldaction_dist, tf.range(0, N), action)
 54 |         ratio_n = p_n / oldp_n
 55 |         Nf = tf.cast(N, dtype)
 56 |         surr = -tf.reduce_mean(ratio_n * advant)  # Surrogate loss
 57 |         var_list = tf.trainable_variables()
 58 |         kl = tf.reduce_sum(oldaction_dist * tf.log((oldaction_dist + eps) / (action_dist_n + eps))) / Nf
 59 |         ent = tf.reduce_sum(-action_dist_n * tf.log(action_dist_n + eps)) / Nf
 60 | 
 61 |         self.losses = [surr, kl, ent]
 62 |         self.pg = flatgrad(surr, var_list)
 63 |         # KL divergence where first arg is fixed
 64 |         # replace old->tf.stop_gradient from previous kl
 65 |         kl_firstfixed = tf.reduce_sum(tf.stop_gradient(
 66 |             action_dist_n) * tf.log(tf.stop_gradient(action_dist_n + eps) / (action_dist_n + eps))) / Nf
 67 |         grads = tf.gradients(kl_firstfixed, var_list)
 68 |         self.flat_tangent = tf.placeholder(dtype, shape=[None])
 69 |         shapes = map(var_shape, var_list)
 70 |         start = 0
 71 |         tangents = []
 72 |         for shape in shapes:
 73 |             size = np.prod(shape)
 74 |             param = tf.reshape(self.flat_tangent[start:(start + size)], shape)
 75 |             tangents.append(param)
 76 |             start += size
 77 |         gvp = [tf.reduce_sum(g * t) for (g, t) in zip(grads, tangents)]
 78 |         self.fvp = flatgrad(gvp, var_list)
 79 |         self.gf = GetFlat(self.session, var_list)
 80 |         self.sff = SetFromFlat(self.session, var_list)
 81 |         self.vf = VF(self.session)
 82 |         self.session.run(tf.initialize_all_variables())
 83 | 
 84 |     def act(self, obs, *args):
 85 |         obs = np.expand_dims(obs, 0)
 86 |         self.prev_obs = obs
 87 |         obs_new = np.concatenate([obs, self.prev_obs, self.prev_action], 1)
 88 | 
 89 |         action_dist_n = self.session.run(self.action_dist_n, {self.obs: obs_new})
 90 | 
 91 |         if self.train:
 92 |             action = int(cat_sample(action_dist_n)[0])
 93 |         else:
 94 |             action = int(np.argmax(action_dist_n))
 95 |         self.prev_action *= 0.0
 96 |         self.prev_action[0, action] = 1.0
 97 |         return action, action_dist_n, np.squeeze(obs_new)
 98 | 
 99 |     def learn(self):
100 |         config = self.config
101 |         start_time = time.time()
102 |         numeptotal = 0
103 |         i = 0
104 |         while True:
105 |             # Generating paths.
106 |             print("Rollout")
107 |             paths = rollout(
108 |                 self.env,
109 |                 self,
110 |                 config.max_pathlength,
111 |                 config.timesteps_per_batch)
112 | 
113 |             # Computing returns and estimating advantage function.
114 |             for path in paths:
115 |                 path["baseline"] = self.vf.predict(path)
116 |                 path["returns"] = discount(path["rewards"], config.gamma)
117 |                 path["advant"] = path["returns"] - path["baseline"]
118 | 
119 |             # Updating policy.
120 |             action_dist_n = np.concatenate([path["action_dists"] for path in paths])
121 |             obs_n = np.concatenate([path["obs"] for path in paths])
122 |             action_n = np.concatenate([path["actions"] for path in paths])
123 |             baseline_n = np.concatenate([path["baseline"] for path in paths])
124 |             returns_n = np.concatenate([path["returns"] for path in paths])
125 | 
126 |             # Standardize the advantage function to have mean=0 and std=1.
127 |             advant_n = np.concatenate([path["advant"] for path in paths])
128 |             advant_n -= advant_n.mean()
129 | 
130 |             # Computing baseline function for next iter.
131 | 
132 |             advant_n /= (advant_n.std() + 1e-8)
133 | 
134 |             feed = {self.obs: obs_n,
135 |                     self.action: action_n,
136 |                 self.advant: advant_n,
137 |                     self.oldaction_dist: action_dist_n}
138 | 
139 | 
140 |             episoderewards = np.array(
141 |                 [path["rewards"].sum() for path in paths])
142 | 
143 |             print "\n********** Iteration %i ************" % i
144 |             if episoderewards.mean() > 1.1 * self.env._env.spec.reward_threshold:
145 |                 self.train = False
146 |             if not self.train:
147 |                 print("Episode mean: %f" % episoderewards.mean())
148 |                 self.end_count += 1
149 |                 if self.end_count > 100:
150 |                     break
151 |             if self.train:
152 |                 self.vf.fit(paths)
153 |                 thprev = self.gf()
154 | 
155 |                 def fisher_vector_product(p):
156 |                     feed[self.flat_tangent] = p
157 |                     return self.session.run(self.fvp, feed) + config.cg_damping * p
158 | 
159 |                 g = self.session.run(self.pg, feed_dict=feed)
160 |                 stepdir = conjugate_gradient(fisher_vector_product, -g)
161 |                 shs = .5 * stepdir.dot(fisher_vector_product(stepdir))
162 |                 lm = np.sqrt(shs / config.max_kl)
163 |                 fullstep = stepdir / lm
164 |                 neggdotstepdir = -g.dot(stepdir)
165 | 
166 |                 def loss(th):
167 |                     self.sff(th)
168 |                     return self.session.run(self.losses[0], feed_dict=feed)
169 |                 theta = linesearch(loss, thprev, fullstep, neggdotstepdir / lm)
170 |                 self.sff(theta)
171 | 
172 |                 surrafter, kloldnew, entropy = self.session.run(
173 |                     self.losses, feed_dict=feed)
174 |                 if kloldnew > 2.0 * config.max_kl:
175 |                     self.sff(thprev)
176 | 
177 |                 stats = {}
178 | 
179 |                 numeptotal += len(episoderewards)
180 |                 stats["Total number of episodes"] = numeptotal
181 |                 stats["Average sum of rewards per episode"] = episoderewards.mean()
182 |                 stats["Entropy"] = entropy
183 |                 exp = explained_variance(np.array(baseline_n), np.array(returns_n))
184 |                 stats["Baseline explained"] = exp
185 |                 stats["Time elapsed"] = "%.2f mins" % ((time.time() - start_time) / 60.0)
186 |                 stats["KL between old and new distribution"] = kloldnew
187 |                 stats["Surrogate loss"] = surrafter
188 |                 for k, v in stats.iteritems():
189 |                     print(k + ": " + " " * (40 - len(k)) + str(v))
190 |                 if entropy != entropy:
191 |                     exit(-1)
192 |                 if exp > 0.8:
193 |                     self.train = False
194 |             i += 1
195 | 
196 | training_dir = tempfile.mkdtemp()
197 | logging.getLogger().setLevel(logging.DEBUG)
198 | 
199 | if len(sys.argv) > 1:
200 |     task = sys.argv[1]
201 | else:
202 |     task = "RepeatCopy-v0"
203 | 
204 | env = envs.make(task)
205 | env.monitor.start(training_dir)
206 | 
207 | env = SpaceConversionEnv(env, Box, Discrete)
208 | 
209 | agent = TRPOAgent(env)
210 | agent.learn()
211 | env.monitor.close()
212 | gym.upload(training_dir,
213 |            algorithm_id='trpo_ff')
214 | 
215 | 
216 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | tasks = ["Copy-v0", "DuplicatedInput-v0", "Reverse-v0", "RepeatCopy-v0"]
 4 | 
 5 | os.system("rm logs_*")
 6 | os.system("k screen")
 7 | os.system("screen -wipe")
 8 | 
 9 | 
10 | for t in tasks:
11 |     os.system("screen -dm -S trpo_%s bash -c '. ~/.profile; . ~/.bashrc; CUDA_VISIBLE_DEVICES=[] python main.py %s 2>&1 | tee logs_%s ; bash'" % (t, t, t))
12 | 


--------------------------------------------------------------------------------
/space_conversion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | `SpaceConversionEnv` acts as a wrapper on
  3 | any environment. It allows to convert some action spaces, and observation spaces to others.
  4 | """
  5 | 
  6 | import numpy as np
  7 | from gym.spaces import Discrete, Box, Tuple
  8 | from gym import Env
  9 | 
 10 | 
 11 | def box2box4obj(x, old_space_obj, new_space_obj):
 12 |     assert(old_space_obj.contains(x))
 13 |     action = np.reshape(x, new_space_obj.shape)
 14 |     assert(new_space_obj.contains(action))
 15 |     return action
 16 | 
 17 | def box2box4class(box_space):
 18 |     shape = np.prod(box_space.shape)
 19 |     low = box_space.low
 20 |     high = box_space.high
 21 |     if isinstance(low, np.ndarray):
 22 |         low = np.reshape(low, (shape, ))
 23 |     if isinstance(high, np.ndarray):
 24 |         high = np.reshape(high, (shape, ))
 25 |     return Box(low, high)
 26 | 
 27 | def discrete2tuple4obj(x, discrete_space, tuple_space):
 28 |     assert(discrete_space.contains(x))
 29 |     action = []
 30 |     for space in tuple_space.spaces:
 31 |         assert(isinstance(space, Discrete))
 32 |         action.append(x % space.n)
 33 |         x = int(x / space.n)
 34 |     action = tuple(action)
 35 |     assert(tuple_space.contains(action))
 36 |     return action
 37 | 
 38 | def tuple2discrete4obj(x, old_space_obj, new_space_obj):
 39 |     assert(False)
 40 | 
 41 | def tuple2discrete4class(tuple_space):
 42 |     n = 1
 43 |     for space in tuple_space.spaces:
 44 |         assert(isinstance(space, Discrete))
 45 |         n *= space.n
 46 |     return Discrete(n)
 47 | 
 48 | def box2discrete4obj(x, box_space, discrete_space):
 49 |     assert(False)
 50 | 
 51 | def discrete2box4obj(x, discrete_space, box_space):
 52 |     ret = np.zeros(discrete_space.n)
 53 |     ret[x] = 1.0
 54 |     return ret
 55 | 
 56 | def discrete2box4class(discrete_space):
 57 |     return Box(0.0, 1.0, discrete_space.n)
 58 | 
 59 | def ident4obj(x, old_space_obj, new_space_obj):
 60 |     return x
 61 | 
 62 | class SpaceConversionEnv(Env):
 63 |     convertable = {(Tuple, Discrete): (tuple2discrete4obj, discrete2tuple4obj, tuple2discrete4class), \
 64 |                    (Discrete, Box): (discrete2box4obj, box2discrete4obj, discrete2box4class), \
 65 |                    (Box, Box): (box2box4obj, box2box4obj, box2box4class)}
 66 |     
 67 |     def __init__(self, env, target_observation_space=None, target_action_space=None, verbose=False):
 68 |         self._verbose = verbose
 69 |         self._env = env
 70 |         self.action_convert = None
 71 |         self.observation_convert = None
 72 |         for pairs, convert in self.convertable.iteritems():
 73 |             if env.action_space.__class__ == pairs[0] and \
 74 |                target_action_space == pairs[1] and \
 75 |                self.action_convert is None:
 76 |                 self.action_convert = convert[1]
 77 |                 self._action_space_ = convert[2](env.action_space)
 78 |             if env.observation_space.__class__ == pairs[0] and \
 79 |                target_observation_space == pairs[1] and \
 80 |                self.observation_convert is None:
 81 |                 self.observation_convert = convert[0]
 82 |                 self._observation_space_ = convert[2](env.observation_space)
 83 | 
 84 |         if self.action_convert is None and \
 85 |            (self.action_space.__class__ == target_action_space or 
 86 |              target_action_space is None):
 87 |             self.action_convert = ident4obj
 88 |             self._action_space = env.action_space
 89 |         if self.observation_convert is None and \
 90 |            (self.observation_space.__class__ == target_observation_space or \
 91 |            target_observation_space is None):
 92 |             self.observation_convert = ident4obj
 93 |             self._observation_space = env.observation_space
 94 | 
 95 |         assert(self.action_convert is not None)
 96 |         assert(self.observation_convert is not None)
 97 | 
 98 |     def step(self, action, **kwargs):
 99 |         conv_action = self.action_convert(action, self.action_space, self._env.action_space)
100 |         if self._verbose and self.action_convert != ident4obj:
101 |             print("Input action: %s, converted action: %s" % (action, conv_action))
102 |         step = self._env.step(conv_action, **kwargs)
103 |         observation, reward, done, info = step
104 | 
105 |         conv_observation = self.observation_convert(observation, self._env.observation_space, self.observation_space)  
106 | 
107 |         if self._verbose and self.observation_convert != ident4obj:
108 |             print("Input observation: %s, converted observation: %s" % (observation, conv_observation))
109 |         return conv_observation, reward, done, {}
110 | 
111 |     def reset(self, **kwargs):
112 |         observation = self._env.reset(**kwargs)
113 |         conv_observation = self.observation_convert(observation, self._env.observation_space, self.observation_space)
114 | 
115 |         if self._verbose and self.observation_convert != ident4obj:
116 |             print("Input observation: %s, converted observation: %s" % (observation, conv_observation))
117 |         return conv_observation
118 |   
119 |     @property
120 |     def action_space(self):
121 |         return self._action_space_
122 | 
123 |     @property
124 |     def observation_space(self):
125 |         return self._observation_space_
126 | 
127 |     def __getattr__(self, field):
128 |         """
129 |         proxy everything to underlying env
130 |         """
131 |         if hasattr(self._env, field):
132 |             return getattr(self._env, field)
133 |         raise AttributeError(field)
134 |   
135 |     def __repr__(self):
136 |         if "object at" not in str(self._env):
137 |             env_name = str(env._env)
138 |         else:
139 |             env_name = self._env.__class__.__name__
140 |         return env_name
141 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import random
  4 | import scipy.signal
  5 | import prettytensor as pt
  6 | 
  7 | seed = 1
  8 | random.seed(seed)
  9 | np.random.seed(seed)
 10 | tf.set_random_seed(seed)
 11 | 
 12 | dtype = tf.float32
 13 | 
 14 | def discount(x, gamma):
 15 |     assert x.ndim >= 1
 16 |     return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
 17 | 
 18 | def rollout(env, agent, max_pathlength, n_timesteps):
 19 |     paths = []
 20 |     timesteps_sofar = 0
 21 |     while timesteps_sofar < n_timesteps:
 22 |         obs, actions, rewards, action_dists = [], [], [], []
 23 |         ob = env.reset()
 24 |         agent.prev_action *= 0.0
 25 |         agent.prev_obs *= 0.0
 26 |         for _ in xrange(max_pathlength):
 27 |             action, action_dist, ob = agent.act(ob)
 28 |             obs.append(ob)
 29 |             actions.append(action)
 30 |             action_dists.append(action_dist)
 31 |             res = env.step(action)
 32 |             ob = res[0]
 33 |             rewards.append(res[1])
 34 |             if res[2]:
 35 |                 path = {"obs": np.concatenate(np.expand_dims(obs, 0)),
 36 |                         "action_dists": np.concatenate(action_dists),
 37 |                         "rewards": np.array(rewards),
 38 |                         "actions": np.array(actions)}
 39 |                 paths.append(path)
 40 |                 agent.prev_action *= 0.0
 41 |                 agent.prev_obs *= 0.0
 42 |                 break
 43 |         timesteps_sofar += len(path["rewards"])
 44 |     return paths
 45 | 
 46 | 
 47 | class VF(object):
 48 |     coeffs = None
 49 | 
 50 |     def __init__(self, session):
 51 |         self.net = None
 52 |         self.session = session
 53 | 
 54 |     def create_net(self, shape):
 55 |         print(shape)
 56 |         self.x = tf.placeholder(tf.float32, shape=[None, shape], name="x")
 57 |         self.y = tf.placeholder(tf.float32, shape=[None], name="y")
 58 |         self.net = (pt.wrap(self.x).
 59 |                     fully_connected(64, activation_fn=tf.nn.relu).
 60 |                     fully_connected(64, activation_fn=tf.nn.relu).
 61 |                     fully_connected(1))
 62 |         self.net = tf.reshape(self.net, (-1, ))
 63 |         l2 = (self.net - self.y) * (self.net - self.y)
 64 |         self.train = tf.train.AdamOptimizer().minimize(l2)
 65 |         self.session.run(tf.initialize_all_variables())
 66 |         
 67 | 
 68 |     def _features(self, path):
 69 |         o = path["obs"].astype('float32')
 70 |         o = o.reshape(o.shape[0], -1)
 71 |         act = path["action_dists"].astype('float32')
 72 |         l = len(path["rewards"])
 73 |         al = np.arange(l).reshape(-1, 1) / 10.0
 74 |         ret = np.concatenate([o, act, al, np.ones((l, 1))], axis=1)
 75 |         return ret
 76 | 
 77 |     def fit(self, paths):
 78 |         featmat = np.concatenate([self._features(path) for path in paths])
 79 |         if self.net is None:
 80 |             self.create_net(featmat.shape[1])
 81 |         returns = np.concatenate([path["returns"] for path in paths])
 82 |         for _ in range(50):
 83 |             self.session.run(self.train, {self.x: featmat, self.y: returns})
 84 | 
 85 |     def predict(self, path):
 86 |         if self.net is None:
 87 |             return np.zeros(len(path["rewards"])) 
 88 |         else:
 89 |             ret = self.session.run(self.net, {self.x: self._features(path)})
 90 |             return np.reshape(ret, (ret.shape[0], ))
 91 | 
 92 | 
 93 | def cat_sample(prob_nk):
 94 |     assert prob_nk.ndim == 2
 95 |     N = prob_nk.shape[0]
 96 |     csprob_nk = np.cumsum(prob_nk, axis=1)
 97 |     out = np.zeros(N, dtype='i')
 98 |     for (n, csprob_k, r) in zip(xrange(N), csprob_nk, np.random.rand(N)):
 99 |         for (k, csprob) in enumerate(csprob_k):
100 |             if csprob > r:
101 |                 out[n] = k
102 |                 break
103 |     return out
104 | 
105 | 
106 | def var_shape(x):
107 |     out = [k.value for k in x.get_shape()]
108 |     assert all(isinstance(a, int) for a in out), \
109 |         "shape function assumes that shape is fully known"
110 |     return out
111 | 
112 | 
113 | def numel(x):
114 |     return np.prod(var_shape(x))
115 | 
116 | 
117 | def flatgrad(loss, var_list):
118 |     grads = tf.gradients(loss, var_list)
119 |     return tf.concat(0, [tf.reshape(grad, [numel(v)])
120 |                          for (v, grad) in zip(var_list, grads)])
121 | 
122 | 
123 | class SetFromFlat(object):
124 | 
125 |     def __init__(self, session, var_list):
126 |         self.session = session
127 |         assigns = []
128 |         shapes = map(var_shape, var_list)
129 |         total_size = sum(np.prod(shape) for shape in shapes)
130 |         self.theta = theta = tf.placeholder(dtype, [total_size])
131 |         start = 0
132 |         assigns = []
133 |         for (shape, v) in zip(shapes, var_list):
134 |             size = np.prod(shape)
135 |             assigns.append(
136 |                 tf.assign(
137 |                     v,
138 |                     tf.reshape(
139 |                         theta[
140 |                             start:start +
141 |                             size],
142 |                         shape)))
143 |             start += size
144 |         self.op = tf.group(*assigns)
145 | 
146 |     def __call__(self, theta):
147 |         self.session.run(self.op, feed_dict={self.theta: theta})
148 | 
149 | 
150 | class GetFlat(object):
151 | 
152 |     def __init__(self, session, var_list):
153 |         self.session = session
154 |         self.op = tf.concat(0, [tf.reshape(v, [numel(v)]) for v in var_list])
155 | 
156 |     def __call__(self):
157 |         return self.op.eval(session=self.session)
158 | 
159 | 
160 | def slice_2d(x, inds0, inds1):
161 |     inds0 = tf.cast(inds0, tf.int64)
162 |     inds1 = tf.cast(inds1, tf.int64)
163 |     shape = tf.cast(tf.shape(x), tf.int64)
164 |     ncols = shape[1]
165 |     x_flat = tf.reshape(x, [-1])
166 |     return tf.gather(x_flat, inds0 * ncols + inds1)
167 | 
168 | 
169 | def linesearch(f, x, fullstep, expected_improve_rate):
170 |     accept_ratio = .1
171 |     max_backtracks = 10
172 |     fval = f(x)
173 |     for (_n_backtracks, stepfrac) in enumerate(.5**np.arange(max_backtracks)):
174 |         xnew = x + stepfrac * fullstep
175 |         newfval = f(xnew)
176 |         actual_improve = fval - newfval
177 |         expected_improve = expected_improve_rate * stepfrac
178 |         ratio = actual_improve / expected_improve
179 |         if ratio > accept_ratio and actual_improve > 0:
180 |             return xnew
181 |     return x
182 | 
183 | 
184 | def conjugate_gradient(f_Ax, b, cg_iters=10, residual_tol=1e-10):
185 |     p = b.copy()
186 |     r = b.copy()
187 |     x = np.zeros_like(b)
188 |     rdotr = r.dot(r)
189 |     for i in xrange(cg_iters):
190 |         z = f_Ax(p)
191 |         v = rdotr / p.dot(z)
192 |         x += v * p
193 |         r -= v * z
194 |         newrdotr = r.dot(r)
195 |         mu = newrdotr / rdotr
196 |         p = r + mu * p
197 |         rdotr = newrdotr
198 |         if rdotr < residual_tol:
199 |             break
200 |     return x
201 | 
202 | class dict2(dict):
203 |     def __init__(self, **kwargs):
204 |         dict.__init__(self, kwargs)
205 |         self.__dict__ = self
206 | 
207 | def explained_variance(ypred, y):
208 |     assert y.ndim == 1 and ypred.ndim == 1
209 |     vary = np.var(y)
210 |     return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
211 | 


--------------------------------------------------------------------------------