├── .gitignore ├── README.md ├── code.py ├── curves.py └── decoding ├── HOWTOGENERATEFIGURES.txt ├── code.py └── decoding.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | This is the code for the ICML 2023 paper, [Learning to acquire novel cognitive tasks with evolution, plasticity and meta-meta-learning](https://arxiv.org/abs/2112.08588). 4 | 5 | We evolve a recurrent network, endowed with a reward-modulated Hebbian 6 | plasticity rule, that can automatically learn simple cognitive tasks from 7 | stimuli and rewards alone. The network is tested on a new task, never seen 8 | during evolution (delayed match-to-sample). 9 | 10 | ## How to use 11 | 12 | 1- Run `code.py`. A full run of 1000 generations will take about half a day on a machine with a standard GPU, but you can stop it before that. 13 | 14 | 2- This will generate several log files. The most important is 15 | `blosses_onerun.txt`, which records the main evaluation metric (mean success 16 | rate over the last 100 trials of a block) for the currrent candidate (i.e. 17 | batch element 0, the unmutated genome). Every 10th value in this file is obtained on the withheld test task; others are on various training-set tasks. It will also generate other files, including `w.txt` and `alpha.txt` (the evolved weights and plastiity coefficients). 18 | 19 | 3- Run `curves.py', which will automatically generate curves for training and testing loss, as in Figure 2 of the paper. These curves will only include the one run you just ran, so there will only be one line for each curve with no error interval. 20 | 21 | 4- Repeat the same process as many times as you like, each time saving `blosses_onerun.txt` under a different name. Then uncommment and modify line 11 in `curves.py` to include the names of all these files as a list. Run `curves.py` to generate the same plot as Figure 2 in the paper, with inter-quartile ranges. 22 | 23 | The `decoding` directory includes code and instructions to generate the decoding results in Figure 4 of the paper. 24 | -------------------------------------------------------------------------------- /code.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torch.backends.cudnn as cudnn 6 | import pdb 7 | 8 | import scipy 9 | from scipy import ndimage 10 | from scipy import linalg 11 | 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | 15 | import os 16 | import argparse 17 | import time 18 | 19 | import numpy as np 20 | from numpy import fft 21 | 22 | from scipy import io as spio 23 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 24 | 25 | torch.set_printoptions(precision=5) 26 | np.set_printoptions(precision=5) 27 | 28 | 29 | 30 | # Specify the test task (and its logical negation, which is also withheld from the training set) 31 | # TESTTASK = 'nand'; TESTTASKNEG = 'and' 32 | TESTTASK = 'dms'; TESTTASKNEG = 'dnms' 33 | 34 | 35 | 36 | LR = 1e-2 # Adam (evolutionary) LR. 37 | WDECAY = 3e-4 # Evolutionary weight decay parameter (for the Adam optimizer) 38 | MUTATIONSIZE = 3 * .01 # Std dev of the Gaussian mutations of the evolutionary algorithm 39 | 40 | # ALPHAACTPEN = 3 * 3e-3 41 | ALPHAACTPEN = 3 * 3 * 10 * 3e-3 # When squaring 42 | 43 | NBGEN = 1000 # 1700 # 500 # Number of generations per run 44 | NUMGENCUTLR = 100000 # The generation at which we cut the learning rate. If >NBGEN, we don't. 45 | 46 | N = 70 # Number of neurons in the RNN. 47 | 48 | 49 | 50 | BS = 500 # 500 # 1000 # Batch size, i.e. population size for the evolutionary algorithm. 51 | assert BS % 2 == 0 # Should be even because of antithetic sampling. 52 | 53 | # Same parameters as GR Yang: 54 | TAU = 100 # Neuron membrane constant, in ms 55 | DT = 20 # Duration of a timestep, in ms 56 | 57 | 58 | # All the following times are in *timesteps*, not ms 59 | T = 50 # Number of *timesteps* per trial 60 | STIMTIME = 20 # Duration of stimulus input, total, *in timesteps* (not ms) 61 | REWARDTIME = 10 # Duration of reward signal period 62 | RESPONSETIME = 10 # Duration of responze period 63 | STARTRESPONSETIME = 25 # Timestep at which response period starts 64 | ENDRESPONSETIME = STARTRESPONSETIME + RESPONSETIME 65 | STARTREWARDTIME = 36 # Timsestep at which reward is deliverd and reward signal starts 66 | ENDREWARDTIME = STARTREWARDTIME + REWARDTIME 67 | assert ENDREWARDTIME < T 68 | 69 | 70 | MODULTYPE = 'EXTERNAL' # 'INTERNAL' # EXTERNAL is node-perrturbation. INTERNAL is network-controlled modulation (experimental and untested in this code) 71 | 72 | 73 | 74 | JINIT = 1.5 # Scale constant of initial network weights. See Section 2.7 in the MML paper. 75 | TAU_ET = 1000.0 # Time constant of the eligibility trace (in ms) 76 | PROBAMODUL = .1 # Probability of receiving a random perturbation, for each neuron, at each timestep. 77 | ALPHAMODUL = .5 # Scale of the random perturbations 78 | ETA = .1 * .1 * .03 if MODULTYPE == 'INTERNAL' else .03 # Learning rate for lifetime plasticity 79 | MULOSSTRACE = .9 # Time constant for the trace of previous losses that serves as a baseline for external neuromodulation 80 | MAXDW = 1e-2 # Maximum delta-weight permissible (per time step) for lifetime plasticity 81 | INITALPHA = .5 # 0.0 # .5 # Initial alpha (plasticity parameter) value 82 | 83 | 84 | 85 | # The name of all the tasks. 14 tasks in total, because "always respond 0" and "always respond 1" are not included. 86 | alltasks = ['and', 'nand' , '01', 'anti01' , '10', 'anti10', 'watchstim1', 'watchstim2' ,'dms', 'antiwatchstim2', 'antiwatchstim1', 'or', 'nor', 'dnms'] 87 | 88 | 89 | 90 | NBSTIMNEURONS = 2 # 2 Stimulus neurons. Stimuli are binary, so both neurons receive opposite-valued inputs (or 0) 91 | NBREWARDNEURONS = 2 # 6 # 2 # reward signal for this trial. A value is represented with 2 inputs, as it is for stimulus neurons. 92 | NBBIASNEURONS = 1 # Bias neurons. Activations clamped to BIASVALUE. 93 | NBINPUTNEURONS = NBSTIMNEURONS + NBREWARDNEURONS + NBBIASNEURONS # The first NBINPUTS neurons in the network are neurons (includes the bias, noise and reward inputs) 94 | NBRESPNEURONS = 2 # Response neurons for 0 and 1. 95 | NBMODNEURONS = 2 # Neuromodulatory output neurons 96 | NBOUTPUTNEURONS = NBRESPNEURONS + NBMODNEURONS # The last NBOUTPUTNEURONS neurons in the network are output neurons. Response neurons + Modulatory neuron. 97 | NBRESPSIGNALNEURONS = NBRESPNEURONS # Neurons that receive the response-given signal ("what response did I just give?") 98 | STIMNEURONS = np.arange(NBSTIMNEURONS) 99 | INPUTNEURONS = np.arange(NBINPUTNEURONS) 100 | OUTPUTNEURONS = np.arange(N-NBOUTPUTNEURONS, N) 101 | MODNEURONS = np.arange(N-NBOUTPUTNEURONS, N-NBOUTPUTNEURONS + NBMODNEURONS) 102 | # NUMMODNEURON = N - NBOUTPUTNEURONS # The modulatory neuron is the first output neuron 103 | RESPNEURONS = np.arange(N-NBOUTPUTNEURONS+NBMODNEURONS, N) # Then come the response neurons 104 | REWARDNEURONS = np.arange(NBSTIMNEURONS, NBSTIMNEURONS+NBREWARDNEURONS) # The neurons receiving (and broadcasting) the "reward for this trial" signal are the ones just after the stimulus inputs. 105 | BIASNEURONS = np.arange(NBSTIMNEURONS+NBREWARDNEURONS, NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS) 106 | FIRSTRESPSIGNALNEURON = NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS # The first neuron that receives the response-given signal. We'll need this later 107 | assert FIRSTRESPSIGNALNEURON == NBINPUTNEURONS 108 | assert len(RESPNEURONS) == NBRESPNEURONS 109 | RESPSIGNALNEURONS = np.arange(FIRSTRESPSIGNALNEURON, FIRSTRESPSIGNALNEURON +NBRESPSIGNALNEURONS) 110 | 111 | 112 | BIASVALUE = 1.0 113 | 114 | 115 | 116 | NBTASKSPERGEN = 1 # 2 # 2 task blocks per generation 117 | 118 | 119 | NBTRIALSLOSS = 100 # Evolutionary loss is evaluated over the last NBTRIALSLOSS trials of each block 120 | NBTRIALS = 300 + NBTRIALSLOSS # Total number of trials per block 121 | 122 | 123 | 124 | REWARDSIZE = 3.0 # 3 * 3.0 # Size of the binary-reward signal (correct/incorrect) 125 | STIMSIZE = 3.0 # Size of the stimulus input 126 | RESPSIGNALSIZE = 3.0 # Size of the response-given signal 127 | 128 | 129 | totalnbtasks = 0 130 | ticstart = time.time() 131 | 132 | 133 | # EVALW is to assess the behavior of an evolved network. Run it on a single batch of all tasks, without any mutation 134 | EVALW = False 135 | if EVALW: 136 | NBGEN = 1 137 | NBTASKSPERGEN = 1 138 | BS = 500 139 | MUTATIONSIZE = 0 140 | allresps=[] 141 | allstims=[] 142 | alltgts=[] 143 | 144 | 145 | 146 | 147 | with torch.no_grad(): # We don't need PyTorch to keep track of gradients, since we're computing the gradient outselves (through evolution). 148 | 149 | PRINTING = True # if numgen == 0 or np.random.rand() < .05 else False 150 | 151 | # Initialize innate weights values 152 | w = torch.randn(N,N) * JINIT / np.sqrt(N) 153 | w = w.to(device) 154 | 155 | # Initialize alpha values - the plasticity coefficients (capital-pi in the paper) 156 | alpha = INITALPHA * torch.ones_like(w).to(device) 157 | 158 | # We zero out input weights to input neurons, though it probably doesn't have any effect. 159 | w.data[:NBINPUTNEURONS, :] = 0 # Each *row* of w contains the weights to a single neuron. 160 | # We also zero out the weights to neuromodulatory neurons, which probably does have an effect! 161 | w.data[MODNEURONS, :] = 0 # Each *row* of w contains the weights to a single neuron. 162 | winit = w.clone() 163 | 164 | # We will be using the Adam optimizer to apply our (hand-computed) evolutionary gradients 165 | optimizer = optim.Adam([w, alpha], lr=LR, weight_decay=WDECAY) # Default betas=(0.9, 0.999) 166 | 167 | # Evolosses are real-valued losses used for evolution. Binarylosses are binary 'correct/wrong' signals, also used for logging. 168 | evolosses = [] 169 | responses0 = [] 170 | binarylosses = [] 171 | wgradnorms = [] 172 | mytaskprev = mytaskprevprev = mytaskprevprevprev = -1 173 | 174 | 175 | if not EVALW: 176 | # We save the initial weights and plasticity coefficients 177 | ww = w.cpu().numpy() 178 | aa = alpha.cpu().numpy() 179 | np.savetxt('winit.txt', ww) 180 | np.savetxt('alphainit.txt', aa) 181 | 182 | if EVALW : 183 | # If in Evaluate-Weights mode, we load the weights and plasticity coefficients 184 | w = np.loadtxt('w.txt') 185 | w = torch.from_numpy(w).float().to(device) 186 | winit = w.clone() 187 | 188 | alpha = np.loadtxt('alpha.txt') 189 | alpha = torch.from_numpy(alpha).float().to(device) 190 | 191 | 192 | print("MODULTYPE is:", MODULTYPE) 193 | assert MODULTYPE == 'EXTERNAL' or MODULTYPE == 'INTERNAL', "Modulation type must be 'INTERNAL' or 'EXTERNAL'" 194 | 195 | 196 | 197 | 198 | 199 | # Ready to start the evolutionary loop, iterating over generations (i.e. lifetimes). 200 | 201 | for numgen in range(NBGEN): 202 | 203 | 204 | 205 | if numgen == NUMGENCUTLR: 206 | # Optionally, cut the learning rate after a given number of generations. Note that this point will not be reached in the default version because NUMGENCUTLR > NBGEN. 207 | for param_group in optimizer.param_groups: 208 | param_group['lr'] /= 5.0 209 | 210 | 211 | 212 | # Every 10th generation is for testing on the withheld task (with no weight change) 213 | TESTING = False 214 | if numgen == 0 or numgen == NBGEN-1 or numgen % 10 == 0: 215 | TESTING = True 216 | if PRINTING: 217 | print("TESTING") 218 | if EVALW: 219 | TESTING = False 220 | 221 | 222 | tic = time.time() 223 | responses0thisgen = [] 224 | 225 | 226 | 227 | alpha.clip_(min=0) 228 | 229 | 230 | 231 | # Generating the population of mutated individuals: 232 | 233 | # First, batch the weights. 234 | bw = torch.dstack(BS*[w]).movedim(2,0).to(device) # batched weights 235 | balpha = torch.dstack(BS*[alpha]).movedim(2,0).to(device) # batched alphas 236 | # Generate the mutations, for both w and alpha 237 | # NOTE: batch element 0 (and BS/2, its antithetic pair) are NOT mutated, represent the curent unmutated candidate genotype. 238 | mutations_wandalpha = [] 239 | for n, x in enumerate( (bw, balpha) ): 240 | mutations = torch.randn_like(x, requires_grad=False).to(device) * MUTATIONSIZE 241 | mutations[0,:,:] = 0 # 1st item in batch = current candidate 242 | mutations[BS//2:, :, :] = -mutations[:BS//2, :, :] # Antithetic sampling for mutations ! Really helps. 243 | if TESTING or EVALW: 244 | mutations *= 0.0 # No mutation - results in batch score variance being caused only by randomness in trial order and (possibly) lifetime perturbations 245 | x += mutations 246 | mutations_wandalpha.append(mutations) 247 | 248 | 249 | 250 | bw.data[:, :NBINPUTNEURONS, :] = 0 # Input neurons receive 0 connections. Probably not necessary. 251 | bworig = bw.clone() # Storing the weights for comparison purposes at the gradient step (below). 252 | 253 | lifelosses = torch.zeros(BS, requires_grad=False).to(device) 254 | lifemselosses = torch.zeros(BS, requires_grad=False).to(device) 255 | lifeactpens = torch.zeros(BS, requires_grad=False).to(device) 256 | lifeblosses = torch.zeros(BS, requires_grad=False).to(device) 257 | 258 | 259 | 260 | 261 | # Lifetime loop, iterates over task-blocks: 262 | # In the present version NBTASKSPERGEN is always 1, so this loop is redundant. 263 | for numtask in range(NBTASKSPERGEN): 264 | totalnbtasks += 1 265 | 266 | COLLECTMODOUTSANDREWINS = not EVALW and ( (numtask + numgen * 2) % 7 == 0 ) 267 | 268 | # bpw = batched plastic weights 269 | bpw = torch.zeros_like(bw).to(device) # For now, plastic weights are initialized to 0 at the beginning of each task. 270 | 271 | # Initialize neural states 272 | bstates = .1 * torch.ones(BS, N).to(device) # bstates (batched states) contains the neural activations (before nonlinearity). Dimensionality appropriate for batched matrix multiplication. 273 | bstates[:, INPUTNEURONS] = 0 274 | bresps = 1.0 * bstates # bresps is the actual neural responses, after nonlinearity, and also serves as the input for the next step. 275 | bresps[:, BIASNEURONS] = BIASVALUE 276 | 277 | meanlosstrace = torch.zeros(BS, 2 * 2).to(device) 278 | bls = [] # Will store binary losses of all batch elements, for each trial of this task 279 | bl0s = [] # Same but only for batch element 0 (i.e. the unmutated candidate genome) 280 | ml0s = [] # MSE loss (the one used for evolution) for element 0 (unmutated candidate), of all trials for this task 281 | 282 | 283 | 284 | # Choose the task ! If not testing, makes sure it's different from recently chosen tasks. 285 | 286 | 287 | # if TESTING: 288 | # mytask = TESTTASK 289 | # mytasknum = alltasks.index(mytask) 290 | # else: 291 | # while True: 292 | # mytasknum = np.random.randint(len(alltasks)) 293 | 294 | # mytask = alltasks[mytasknum] 295 | 296 | # if ( (mytask!= TESTTASK) 297 | # and (mytask != TESTTASKNEG) # We withhold both the test task and its logical negation 298 | # and (mytask != mytaskprev) 299 | # and (mytask != mytaskprevprev) 300 | # ): 301 | 302 | # break 303 | 304 | # mytaskprevprev = mytaskprev; mytaskprev= mytask 305 | 306 | 307 | # We pick the tasks for this generation, for the whole population (i.e. the whole batch) 308 | 309 | # # Only use AND and NAND as tasks 310 | # mytasknum = numtask % 4 311 | # mytask = alltasks[mytasknum] 312 | # mytaskprevprev = mytaskprev; mytaskprev= mytask 313 | 314 | 315 | btasks = [] # Tasks for the whole batch 316 | for ii in range(BS//2): 317 | if TESTING: 318 | # On 'testing' generations, we only show the withheld test task to everyone (this will not result in any parameter change and is only used for traking evolutionary progress) 319 | cand_task = TESTTASK 320 | cand_tasknum = alltasks.index(TESTTASK) 321 | else: 322 | while True: 323 | cand_tasknum = np.random.randint(len(alltasks)) 324 | cand_task = alltasks[cand_tasknum] 325 | if ( (cand_task!= TESTTASK) 326 | and (cand_task != TESTTASKNEG) # We withhold both the test task and its logical negation 327 | 328 | 329 | and (cand_tasknum % 2 == (numgen // 2) % 2) # Training on alternate halves of the training set at successive (pairs of) generations (not sure if this helps) 330 | 331 | 332 | ): 333 | break 334 | btasks.append(cand_task) 335 | 336 | btasks = btasks * 2 # Duplicating the list, so each antithetic pair (batch elements K and K + BS/2) has the same tasks. 337 | 338 | 339 | 340 | if EVALW: 341 | btasks = [TESTTASK] * BS 342 | 343 | 344 | 345 | 346 | 347 | assert(len(btasks) == BS) 348 | 349 | 350 | # Cumulative MSE and binary losses for this task, over the last NBLOSSTRIALS of the block: 351 | taskmselosses = torch.zeros_like(lifemselosses).to(device) 352 | taskblosses = torch.zeros_like(lifemselosses).to(device) 353 | 354 | respz = [] # Response neuron outputs 355 | stimz = [] # Stimulus neurons outputs 356 | modouts = [] # Neuromodulatory output - not used here, because we use node-perturbation (i.e. modulation is EXTERNAL) 357 | rewins = [] # Received rewards (reward neuron outputs) 358 | 359 | 360 | if PRINTING: 361 | print("task[0]:", btasks[0], "task[1]:", btasks[1]) 362 | 363 | 364 | 365 | # OK, ready to start the task. 366 | 367 | 368 | eligtraces = torch.zeros_like(bw, requires_grad=False).to(device) # Initialize the eligibility traces at the start of each block/task. 369 | 370 | 371 | # Task loop, iterating over trials 372 | # You do NOT erase memory (neural activations or plastic weights) between successive trials ! 373 | for numtrial in range(NBTRIALS): 374 | 375 | # First, some preparation for the trial to come. 376 | 377 | # Initializations 378 | mselossesthistrial = torch.zeros(BS, requires_grad=False).to(device) # MSE losses for this trial 379 | totalresps = torch.zeros(BS, NBRESPNEURONS, requires_grad=False).to(device) # Will accumulate the total outputs of each network over the trial, so we can compute the network's response for this trial. 380 | 381 | 382 | # Before we start the trial, we need to generate the inputs and targets for this trial, for the whole population (i.e. the whole batch): 383 | 384 | # Pick stimulus 1 and stimulus 2 for this trial (and for each batch member): 385 | stims1 = (torch.rand(BS, 1) > .5).float() 386 | stims2 = (torch.rand(BS, 1) > .5).float() 387 | 388 | 389 | # Antithetic pairs share the exact same stimuli 390 | stims1[BS//2:, :] = stims1[:BS//2, :] 391 | stims2[BS//2:, :] = stims2[:BS//2, :] 392 | 393 | 394 | 395 | # Actual temporal inputs: 396 | inpts = np.zeros((BS, NBSTIMNEURONS, STIMTIME)) 397 | StimDur = STIMTIME 398 | StartStim = 0 399 | # The two stimuli are presented in succession, with both input neurons locked in opposite values to each other: 400 | inpts[:, 0, StartStim:StartStim+StimDur//2 - 2] = 2.0 * stims1 - 1.0 401 | inpts[:, 0, StartStim+StimDur//2:StartStim+StimDur - 2] = 2.0 * stims2 - 1.0 402 | inpts[:, 1, StartStim:StartStim+StimDur] = -inpts[:, 0, StartStim:StartStim+StimDur] 403 | 404 | inputs = torch.from_numpy(inpts).float().to(device) 405 | 406 | 407 | 408 | # Now we compute the targets for this trial, that is, the expected values of the output neurons, depending on inputs and tasks 409 | tgts = -100 * np.ones((BS, NBRESPNEURONS, RESPONSETIME)) 410 | 411 | for ii in range(BS): 412 | # First we generate the expected output for the non-null response neuron, based on inputs and task: 413 | if btasks[ii] == 'watchstim1': 414 | tgts[ii, 1, :] = stims1[ii, 0] 415 | elif btasks[ii] == 'watchstim2': 416 | tgts[ii, 1, :] = stims2[ii, 0] 417 | elif btasks[ii] == 'antiwatchstim1': 418 | tgts[ii, 1, :] = 1.0 - stims1[ii, 0] 419 | elif btasks[ii] == 'antiwatchstim2': 420 | tgts[ii, 1, :] = 1.0 - stims2[ii, 0] 421 | elif btasks[ii] == 'and': 422 | tgts[ii, 1, :] = (stims1[ii, 0] * stims2[ii, 0]) 423 | elif btasks[ii] == 'nand': 424 | tgts[ii, 1, :] = 1.0 - (stims1[ii, 0] * stims2[ii, 0]) 425 | # These two lines add 25% running time to the entire program! looks like np.clip is *slow*. 426 | # elif btasks[ii] == 'or': 427 | # tgts[ii, 1, :] = np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0) 428 | # elif btasks[ii] == 'nor': 429 | # tgts[ii, 1, :] = 1.0 - np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0) 430 | # Instead, we will clip after the full array is done. This should still work out the same. 431 | elif btasks[ii] == 'or': 432 | tgts[ii, 1, :] = stims1[ii, 0] + stims2[ii, 0] 433 | elif btasks[ii] == 'nor': 434 | tgts[ii, 1, :] = 1.0 - stims1[ii, 0] + stims2[ii, 0] 435 | elif btasks[ii] == '10': 436 | tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0]) 437 | elif btasks[ii] == 'anti10': 438 | tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0]) 439 | elif btasks[ii] == '01': 440 | tgts[ii, 1, :] = (1.0 - stims1[ii, 0]) * stims2[ii, 0] 441 | elif btasks[ii] == 'anti01': 442 | tgts[ii, 1, :] = 1.0 - (1.0 - stims1[ii, 0]) * stims2[ii, 0] 443 | elif btasks[ii] == 'dms': 444 | tgts[ii, 1, :] = (stims1[ii, 0] == stims2[ii, 0]) 445 | elif btasks[ii] == 'dnms': 446 | tgts[ii, 1, :] = (stims1[ii, 0] != stims2[ii, 0]) 447 | else: 448 | tgts[ii, 1, :] = (stims1[ii, 0] == stims2[ii, 0]) 449 | 450 | tgts[:, 1, :] = np.clip(tgts[:, 1, :], 0.0, 1.0) 451 | 452 | 453 | 454 | # The target responses of the two output neurons are mirror images of each other (network response is binary) 455 | tgts[:, 0, :] = 1.0 - tgts[:, 1, :] 456 | 457 | assert np.all(np.logical_or(tgts == 0.0 , tgts == 1.0)) 458 | 459 | if EVALW: 460 | alltgts.append(tgts[:,1, 0]) 461 | allstims.append(np.hstack((stims1, stims2))) 462 | 463 | 464 | 465 | 466 | targets = torch.from_numpy(tgts).float().to(device) 467 | 468 | # In practice, we clip targets to 0.1/0.9 instead of actually 0.0/1.0. This may or may not help. 469 | targets.clip_(min=0.1, max=0.9) 470 | 471 | 472 | 473 | 474 | # Now the inputs and targets are prepared, we are ready to actually start the trial! 475 | 476 | 477 | # Run the network. Trial loop, iterating over timesteps 478 | for numstep in range(T): 479 | 480 | # Update neural activations, using previous-step bresps (actual neural outputs) as input. 481 | # 'bstates' are the neural activations before nonlinearity 482 | # 'bresps' are the actual firing rates, i.e. bstates after nonliniearity (or clamped values for input neurons) 483 | # bresps is the lateral input to bstates, which is then used to compute brates for the next step 484 | 485 | # This implements the equation dx = dt/tau * (-x(t) + (W + PI .* P(t)) @ y(t) ) - standard continuous-time RNN, with plastic weights. 'alpha' is PI in the ICML paper. 486 | bstates += (DT / TAU) * (-bstates + torch.bmm((bw + balpha * bpw), bresps[:, :, None])[:,:,0] ) 487 | 488 | 489 | # Applying the random perturbations on neural activations, both for noise and for the lifetime plasticity algorithm (node-perturbation) 490 | # And also updating the eligibility trace appropriately 491 | # This is a very non-optimal implementation! 492 | if numstep > 1 : 493 | perturbindices = (torch.rand(1, N) < PROBAMODUL).int() # Which neurons get perturbed? 494 | 495 | # perturbindices[0, MODNEURONS] = 0 # We disable perturbations on neuromodulatory neurons for debugging... 496 | 497 | 498 | perturbations = (ALPHAMODUL * perturbindices * (2 * torch.rand(1, N) - 1.0)).to(device) # Note the dimensions: the same noise vector is applied to all elements in the batch (to save time!) 499 | 500 | 501 | 502 | 503 | if numtrial > NBTRIALS - 20: 504 | perturbations.fill_(0) # Again, not sure if that helps 505 | 506 | 507 | 508 | bstates += perturbations 509 | 510 | # Node-perturbation: Hebbian eligibility trace = product between inputs (bresps from previous time step) and *perturbations* in outputs. dH = X * deltaY 511 | # We do this with a (batched) outer product between the (column) vector of perturbations (1 per neuron) and the (row) vector of inputs 512 | # Note that here, since we have an RNN, the input is bresps - the network's responses from the previous time step 513 | if torch.sum(perturbindices) > 0: 514 | eligtraces += torch.bmm( perturbations.expand(BS, -1)[:, :, None], bresps[:, None, :] ) 515 | 516 | # Eligibility traces, unlike actual plastic weights, are decaying 517 | eligtraces -= (DT / TAU_ET) * eligtraces 518 | 519 | 520 | # We can now compute the actual neural responses (firing rates) for this time step, applying the appropriate nonlinearity to each neuron 521 | bresps = bstates.clone() # F.leaky_relu(bstates) 522 | # The following assumes that response neurons are the last neurons of the network ! 523 | bresps[:,N-NBRESPNEURONS:].sigmoid_() # The response neurons (NOT output neurons - modulatory neuron not included!) are sigmoids, all others are tanh. An arbitrary design choice. 524 | bresps[:,:N-NBRESPNEURONS].tanh_() 525 | 526 | 527 | # Are we in the input presentation period? Then apply the inputs. 528 | # Inputs are clamping, fixing the response of the input neurons. 529 | if numstep < STIMTIME: 530 | # bresps[:, STIMNEURONS] = STIMSIZE * inputs[:, :, numstep] 531 | bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = STIMSIZE * inputs[:, :, numstep] 532 | else: 533 | bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = 0 534 | # bresps[:, STIMNEURONS] = 0 535 | 536 | # Bias input is always-on, always clamping. 537 | # bresps[:, BIASNEURONS] = BIASVALUE 538 | bresps[:, BIASNEURONS[0]] = BIASVALUE 539 | 540 | # All the responses have now been computed for this step 541 | 542 | # Are we in the response period? Then collect network response. 543 | if numstep >= STARTRESPONSETIME and numstep < ENDRESPONSETIME: 544 | 545 | assert numstep < STARTREWARDTIME 546 | # Accumulate the total activation of each output neuron, so that we can compute the network's actual response at the end of response period: 547 | # totalresps += bresps[:, RESPNEURONS] 548 | totalresps += bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] 549 | # Accumulate the MSE error between actual and expected outputs: 550 | # mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME 551 | mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME 552 | 553 | else: 554 | bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] = 0.0 555 | # bresps[:, RESPNEURONS] = 0.0 556 | 557 | 558 | # Is the response period for this trial finished, or equivalently, are we at the first step of the reward / feedback period? 559 | # If so, compute the network's response (i.e. which neuron fired most) 560 | # Also, if using external neuromodulation, with compute the neuromodulation (based on baselined rewards for this trial) and apply plasticity 561 | if numstep == STARTREWARDTIME: 562 | # The network's response for this trial (0 or 1) is the index of the output neuron that had the highest cumulative output over the response period 563 | responses = torch.argmax(totalresps, dim=1) # responses is a 1D, integer-valued array of size BS. totalresps is a 2D real-vlued array of size BS, NBRESPS+1 564 | 565 | # blosses (binary losses) is a 1/-1 "correct/wrong" signal for each batch element for this trial. 566 | blosses = 2.0 * (responses == torch.argmax(targets[:, :, 0], dim=1)).float() - 1.0 567 | responses0thisgen.append(float(responses[0])) 568 | 569 | # We also want the 1-hot version of the response for each neuron. This will be used as the response signal below. 570 | if numtrial > 0: 571 | responses1hot_prev = responses1hot.clone() 572 | responses1hot = F.one_hot(responses, 2) 573 | 574 | # Now we apply lifetime plasticity, with node-perturbation, based on eligibility trace and suitably baselined reward/loss 575 | 576 | 577 | # Baseline computation - only used for node-perturbation 578 | # We compute separate baseline (running average) losses for different types of trials, as defined by their inputs (as in Miconi, eLife 2017). 579 | # So we need to find out the trial type for each element in batch. 580 | # input1 = inputs[:, 0, 0]; input2 = inputs[:, 1, 0] # Uh, what was that? 581 | input1 = stims1[:, 0]; input2 = stims2[:, 0] 582 | trialtypes = (input1>0).long() * 2 + (input2>0).long() 583 | 584 | if MODULTYPE == 'EXTERNAL' and numtrial > 30: # + (300 if EVALW else 0): 585 | dw = - (ETA * eligtraces * ( meanlosstrace[np.arange(BS), trialtypes] * (mselossesthistrial - meanlosstrace[np.arange(BS), trialtypes]) )[:, None, None]).clamp(-MAXDW, MAXDW) 586 | bpw += dw 587 | 588 | 589 | 590 | # Updating the baseline - running average of losses, for each batch element, for the trial type just seen 591 | meanlosstrace[torch.arange(BS).long(), trialtypes] *= MULOSSTRACE 592 | meanlosstrace[torch.arange(BS).long(), trialtypes] += (1.0 - MULOSSTRACE) * mselossesthistrial 593 | 594 | 595 | 596 | 597 | # Plasticity computation for internal (network-controlled) neuromodulation (not used in node-perturbation experiments -highly experimental, do not trust). 598 | # Note that it is applied at every time step, unlike external neuromodulation experiments which only apply plasticity once per trial, at the beginning of the reward period (see above). 599 | if numtrial > 10 and MODULTYPE == 'INTERNAL': # Lifetime plasticity is only applied after a few burn-in trials. 600 | # eligtraces: BS x N x N (1 per connection & batch element) mselossesthhistrial: BS. meanlosstrace: BS x (N.N). trialtypes: BS bresps/bstates: BS x N 601 | # dw should have shape BS x N x N, i.e. one for each connection and batch element. Do not sum over batch dimension! The batch is purely evolutionary ! 602 | 603 | # Compute and apply the plasticity, based on accumulated eligibility traces and output of a certain neuron 604 | if numstep > 0: 605 | modulsprev = moduls.clone() 606 | moduls = bresps[:, MODNEURONS[0]] - bresps[:, MODNEURONS[1]] 607 | # lifeactpens += torch.abs(moduls) 608 | if numstep > 0 : 609 | lifeactpens += (modulsprev - moduls) ** 2 610 | 611 | dw = (ETA * eligtraces * moduls[:, None, None] ).clamp(-MAXDW, MAXDW) 612 | 613 | 614 | bpw += dw 615 | 616 | 617 | 618 | # Are we in the reward signal period? 619 | # This is just to inform the network of its own performance. The actual lifetime plasticity and neuromodulation is computed above. 620 | if numstep >= STARTREWARDTIME and numstep < ENDREWARDTIME: # Note that by this time, the loss has been computed and is fixed 621 | 622 | # We duplicate the reward signal across many neurons to (maybe) increase its potnetial impact and exploitability (?...) 623 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = REWARDSIZE * mselossesthistrial[:, None] # Reward input is also clamping 624 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1].clip_(min=0) # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve. 625 | 626 | 627 | # We provide the network with a signal indicating the actual response it chose for this trial. Not sure if needed. 628 | # bresps[:, RESPSIGNALNEURONS] = responses1hot.float() * RESPSIGNALSIZE 629 | bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = responses1hot.float() * RESPSIGNALSIZE 630 | 631 | 632 | 633 | else: 634 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = 0 635 | bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = 0 636 | 637 | 638 | 639 | if COLLECTMODOUTSANDREWINS: 640 | stimz.append(bresps[0, STIMNEURONS[0]]) 641 | respz.append(bresps[0, RESPNEURONS[1]] - bresps[0, RESPNEURONS[0]]) 642 | if MODULTYPE == 'INTERNAL': # Doesn't make sense for external modulation 643 | modouts.append(moduls[0]) 644 | rewins.append(bresps[0, REWARDNEURONS[0]]) 645 | 646 | 647 | if EVALW: 648 | allresps.append(bresps.cpu().numpy().astype('float32')) 649 | 650 | 651 | 652 | 653 | # Now all steps done for this trial: 654 | 655 | if PRINTING: 656 | if np.random.rand() < .1: 657 | print("|", int(responses[0]), int(blosses[0]), end=' ') 658 | 659 | ml0s.append(float(mselossesthistrial[0])) 660 | bl0s.append(float(blosses[0])) 661 | bls.append(blosses.cpu().numpy()) 662 | 663 | 664 | # If this trial is part of the last NBTRIALSLOSS, we accumulate its trial loss into the agent's total loss for this task. 665 | if numtrial >= NBTRIALS - NBTRIALSLOSS: # Lifetime losses are only estimated over the last NBTRIALSLOSS trials 666 | # taskmselosses += 2 * mselossesthistrial / NBTRIALSLOSS # the 2* doesn't mean anything 667 | taskmselosses += mselossesthistrial / NBTRIALSLOSS 668 | taskblosses += blosses / NBTRIALSLOSS 669 | 670 | 671 | # Now all trials done for this task: 672 | if PRINTING: 673 | # print("Med task mseloss:", "{:.4f}".format(float(torch.median(taskmselosses)))) 674 | print("\nTASK BLOSS[0]:", "{:.4f}".format(float(taskblosses[0])), "Med task bloss:", "{:.4f}".format(float(torch.median(taskblosses))), 675 | "Med-abs totaldw[0]:", "{:.4f}".format(float(torch.median(torch.abs(bpw[0,:,:])))), 676 | "Max-abs totaldw[0]:", "{:.4f}".format(float(torch.max(torch.abs(bpw[0,:,:])))) 677 | ) 678 | 679 | 680 | 681 | 682 | if COLLECTMODOUTSANDREWINS: 683 | print("Saving Resps, Stims, RI, MO") 684 | 685 | np.savetxt('stims.txt', np.array([float(x) for x in stimz])) 686 | np.savetxt('resps.txt', np.array([float(x) for x in respz])) 687 | np.savetxt('modouts.txt', np.array([float(x) for x in modouts])) 688 | np.savetxt('rewins.txt', np.array([float(x) for x in rewins])) 689 | 690 | # print("") 691 | lifemselosses += taskmselosses / NBTASKSPERGEN 692 | lifeblosses += taskblosses / NBTASKSPERGEN 693 | 694 | if (TESTING or numgen == 0) and numtask == 0: 695 | # These files contain respectively the first and *latest* Testing block of the *current* run only. 696 | FNAME = 'bl_1standLastBlock_gen0.txt' if numgen == 0 else 'bl_1standLastBlock_lastgen.txt' 697 | np.savetxt(FNAME, np.vstack(bls)) 698 | 699 | 700 | 701 | # After all tasks done for this lifetime / generation: 702 | 703 | lifeactpens /= (NBTASKSPERGEN * NBTRIALS) 704 | # lifeactpens -= torch.mean(lifeactpens); lifeactpens /= torch.std(lifeactpens) 705 | # lifeactpens += torch.mean(lifemselosses); lifeactpens *= torch.std(lifemselosses) 706 | 707 | lifelosses = lifemselosses + ALPHAACTPEN * lifeactpens 708 | 709 | binarylosses.append(float(lifeblosses[0])) 710 | evolosses.append(float(lifemselosses[0])) 711 | 712 | 713 | if TESTING and not EVALW: 714 | np.savetxt('blosses_onerun.txt', np.array(binarylosses)) # This is the main evaluation metric: The mean success rate over the last NBTESTTRIALS of each generation, for batch element 0 (the unmutated candidate genome) 715 | np.savetxt('mselosses_onerun.txt', np.array(evolosses)) 716 | ww = w.cpu().numpy() 717 | pw0 = bpw[0,:,:].cpu().numpy() 718 | aa = alpha.cpu().numpy() 719 | np.savetxt('w.txt', ww) 720 | np.savetxt('pw0.txt', pw0) 721 | np.savetxt('alpha.txt', aa) 722 | 723 | 724 | if EVALW and True: 725 | # Note: we use .npy format, because multi-dimensional. 726 | 727 | np.save('allstims.npy', np.stack(allstims, -1)) 728 | np.save('alltgts.npy', np.stack(alltgts, -1)) 729 | 730 | # print(len(allresps), len(allstims), len(alltgts)) 731 | assert len(allresps) == NBTRIALS * T 732 | # print(allresps[0].shape, allstims[0].shape, alltgts[0].shape) 733 | print("Rearranging saved responses into appropriate shape...") 734 | z1 = np.dstack(allresps) 735 | z2 = np.stack(np.split(z1, NBTRIALS, axis=2), axis=-1) 736 | print("Final shape of the saved responses:", z2.shape) 737 | assert(z2.shape == (BS, N, T, NBTRIALS)) 738 | np.save('allresps.npy', z2[:,:,:,[29,-1]]) # We only store response data for 29th (before plasticity starts) and last trial (to keep file size manageable) 739 | 740 | 741 | 742 | # Now we're ready to perform evolution (by computing gradients by hand, and then applying the optimizer with these gradients) 743 | optimizer.zero_grad() 744 | 745 | # Gradient is just loss x mutation (remember we use antithetic sampling) 746 | # gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) # / BS 747 | gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) / (BS * MUTATIONSIZE * MUTATIONSIZE) 748 | 749 | 750 | # gradient = gradient / 100 751 | 752 | 753 | wgradnorm = float(torch.norm(gradient)) 754 | wgradnorms.append(wgradnorm) 755 | if PRINTING: 756 | print("norm w:", "{:.4f}".format(float(torch.norm(w))), "norm gradient:", "{:.4f}".format(wgradnorm), 757 | "med-abs w:", "{:.4f}".format(float(torch.median(torch.abs(w)))), 758 | "max-abs w:", "{:.4f}".format(float(torch.max(torch.abs(w)))), 759 | "norm a:", "{:.4f}".format(float(torch.norm(alpha))), "mean a:", "{:.4f}".format(float(torch.mean(alpha)))) 760 | 761 | 762 | w.grad = gradient 763 | wprev = w.clone() 764 | 765 | # gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) # / BS 766 | gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) / (BS * MUTATIONSIZE * MUTATIONSIZE) 767 | 768 | 769 | # gradientalpha = gradientalpha / 100 770 | 771 | 772 | alpha.grad = gradientalpha 773 | alphaprev = alpha.clone() 774 | 775 | if numgen > 0 and not TESTING and not EVALW: 776 | optimizer.step() 777 | 778 | 779 | wdiff = w - wprev 780 | adiff = alpha - alphaprev 781 | if PRINTING: 782 | print("Norm w-wprev:", "{:.4f}".format(float(torch.norm(wdiff))), "Max abs w-wprev:", "{:.4f}".format(float(torch.max(torch.abs(wdiff)))), 783 | "Norm a-aprev:", "{:.4f}".format(float(torch.norm(adiff))), "Max abs a-aprev:", "{:.4f}".format(float(torch.max(torch.abs(adiff)))) ) 784 | 785 | 786 | 787 | if PRINTING: 788 | print("Med/min/max/Half-Nth/0th loss in batch:", float(torch.median(lifelosses)), float(torch.min(lifelosses)), float(torch.max(lifelosses)), 789 | float(lifelosses[BS//2]), float(lifelosses[0])) 790 | print("Med/min/max/Half-Nth/0th life mse loss in batch:", float(torch.median(lifemselosses)), float(torch.min(lifemselosses)), float(torch.max(lifemselosses)), 791 | float(lifemselosses[BS//2]), float(lifemselosses[0])) 792 | print("Med/min/max/Half-Nth/0th activity penalty in batch:", float(torch.median(lifeactpens)), float(torch.min(lifeactpens)), float(torch.max(lifeactpens)), 793 | float(lifeactpens[BS//2]), float(lifeactpens[0])) 794 | print("Gen", numgen, "done in", time.time()-tic) 795 | 796 | 797 | 798 | 799 | 800 | 801 | print("Time taken:", time.time()-ticstart) 802 | 803 | 804 | -------------------------------------------------------------------------------- /curves.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | 6 | TESTTASK = 'DMS' 7 | 8 | lt = np.loadtxt 9 | 10 | bls = [lt('blosses_onerun.txt')] 11 | #bls = [lt('bl1.txt'), lt('bl2.txt'), lt('bl3.txt'), lt('bl4.txt'), lt('bl5.txt'), lt('bl6.txt')] 12 | 13 | 14 | LEN = np.min([x.size for x in bls]) 15 | bl = np.vstack( [x[:LEN] for x in bls] ) 16 | print(LEN) 17 | 18 | 19 | bl = .5 + .5 * bl 20 | 21 | #bl = .5 + .5 * np.loadtxt('blosses_onerun.txt') 22 | 23 | if(len(bl.shape)<2): # If there is only a single run, add a singleton dimension 24 | bl = bl[None, :] 25 | print(bl.shape) 26 | ss = bl.shape[1] # Number of generations 27 | 28 | plt.figure(figsize=(4,4)) 29 | 30 | xr = np.arange(len(bl[0,:])) 31 | plt.fill_between(xr[xr%10 != 0], np.quantile(bl, .25, axis=0).T[xr % 10 != 0], np.quantile(bl, .75, axis=0).T[xr % 10 != 0], color='b', alpha=.3) 32 | plt.plot(xr[xr % 10 != 0], np.quantile(bl, .5, axis=0).T[xr % 10 != 0], 'b', label='Training tasks'); 33 | plt.fill_between(xr[::10], np.quantile(bl, .25, axis=0).T[0::10], np.quantile(bl, .75, axis=0).T[0::10], color='r', alpha=.3) 34 | plt.plot(xr[::10], np.quantile(bl, .5, axis=0).T[0::10], 'r', label='Test task') 35 | 36 | 37 | plt.xlabel('Generations') 38 | plt.ylabel('% correct over last 100 trials') 39 | plt.legend(loc='lower right') 40 | 41 | plt.title('Test task: '+str(TESTTASK).upper()) 42 | 43 | plt.tight_layout() 44 | plt.show() 45 | -------------------------------------------------------------------------------- /decoding/HOWTOGENERATEFIGURES.txt: -------------------------------------------------------------------------------- 1 | - Upload w.txt and alpha.txt from a completed run. 2 | 3 | - Run code.py with EVALW=True (line 144), twice: once with line 192 set to "EVALW and False" (to run it with initialized, random weights/alpha), and once with line 192 set to "EVALW and True" (to run it with the actual uploaded weights and alpha). 4 | 5 | - On the first run (with randomly initialized weights), download allstims.npy, allresps.npy, alltgts.npy, and rename them with the same filenames but with ".0" suffix (allstims.npy.0, allresps.npy.0, alltgts.npy.0). 6 | 7 | - On the second run (with actual uploaded w and alpha), just keep the output files under their unmodified name. 8 | 9 | - Just run decoding.py. It should generate all decoding figures on its own, as PNG files. 10 | -------------------------------------------------------------------------------- /decoding/code.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torch.backends.cudnn as cudnn 6 | import pdb 7 | 8 | import scipy 9 | from scipy import ndimage 10 | from scipy import linalg 11 | 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | 15 | import os 16 | import argparse 17 | import time 18 | 19 | import numpy as np 20 | from numpy import fft 21 | 22 | from scipy import io as spio 23 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 24 | 25 | torch.set_printoptions(precision=5) 26 | np.set_printoptions(precision=5) 27 | 28 | 29 | 30 | # Specify the test task (and its logical negation, which is also withheld from the training set) 31 | # TESTTASK = 'nand'; TESTTASKNEG = 'and' 32 | TESTTASK = 'dms'; TESTTASKNEG = 'dnms' 33 | 34 | 35 | 36 | LR = 1e-2 # Adam (evolutionary) LR. 37 | WDECAY = 3e-4 # Evolutionary weight decay parameter (for the Adam optimizer) 38 | MUTATIONSIZE = 3 * .01 # Std dev of the Gaussian mutations of the evolutionary algorithm 39 | 40 | # ALPHAACTPEN = 3 * 3e-3 41 | ALPHAACTPEN = 3 * 3 * 10 * 3e-3 # When squaring 42 | 43 | NBGEN = 5000 # 1700 # 500 # Number of generations per run 44 | NUMGENCUTLR = 100000 45 | 46 | N = 70 # Number of neurons in the RNN. 47 | 48 | 49 | 50 | BS = 500 # 500 # 1000 # Batch size, i.e. population size for the evolutionary algorithm. 51 | assert BS % 2 == 0 # Should be even because of antithetic sampling. 52 | 53 | # Same parameters as GR Yang: 54 | TAU = 100 # Neuron membrane constant, in ms 55 | DT = 20 # Duration of a timestep, in ms 56 | 57 | 58 | # All the following times are in *timesteps*, not ms 59 | T = 50 # Number of *timesteps* per trial 60 | STIMTIME = 20 # Duration of stimulus input, total, *in timesteps* (not ms) 61 | REWARDTIME = 10 # Duration of reward signal period 62 | RESPONSETIME = 10 # Duration of responze period 63 | STARTRESPONSETIME = 25 # Timestep at which response period starts 64 | ENDRESPONSETIME = STARTRESPONSETIME + RESPONSETIME 65 | STARTREWARDTIME = 36 # Timsestep at which reward is deliverd and reward signal starts 66 | ENDREWARDTIME = STARTREWARDTIME + REWARDTIME 67 | assert ENDREWARDTIME < T 68 | 69 | 70 | MODULTYPE = 'EXTERNAL' # 'INTERNAL' 71 | 72 | # JINIT = 1.5 # Scale constant of initial network weights. See Section 2.7 in the MML paper. 73 | # TAU_ET = 1000.0 # Time constant of the eligibility trace (in ms) 74 | # PROBAMODUL = .03 # .1 # Probability of receiving a random perturbation, for each neuron, at each timestep. 75 | # ALPHAMODUL = 1.0 # .5 # Scale of the random perturbations 76 | # ETA = .1 * .1 * .03 if MODULTYPE == 'INTERNAL' else .03 # Learning rate for lifetime plasticity 77 | # MULOSSTRACE = .9 # Time constant for the trace of previous losses that serves as a baseline for neuromodulation 78 | # MAXDW = 1e-2 # Maximum delta-weight permissible (per time step) for lifetime plasticity 79 | # INITALPHA = .5 # 0.0 # .5 # Initial alpha (plasticity parameter) value 80 | 81 | 82 | JINIT = 1.5 # Scale constant of initial network weights. See Section 2.7 in the MML paper. 83 | TAU_ET = 1000.0 # Time constant of the eligibility trace (in ms) 84 | PROBAMODUL = .1 # Probability of receiving a random perturbation, for each neuron, at each timestep. 85 | ALPHAMODUL = .5 # Scale of the random perturbations 86 | ETA = .1 * .1 * .03 if MODULTYPE == 'INTERNAL' else .03 # Learning rate for lifetime plasticity 87 | MULOSSTRACE = .9 # Time constant for the trace of previous losses that serves as a baseline for external neuromodulation 88 | MAXDW = 1e-2 # Maximum delta-weight permissible (per time step) for lifetime plasticity 89 | INITALPHA = .5 # 0.0 # .5 # Initial alpha (plasticity parameter) value 90 | 91 | 92 | 93 | # The name of all the tasks. 14 tasks in total, because "always respond 0" and "always respond 1" are not included. 94 | alltasks = ['and', 'nand' , '01', 'anti01' , '10', 'anti10', 'watchstim1', 'watchstim2' ,'dms', 'antiwatchstim2', 'antiwatchstim1', 'or', 'nor', 'dnms'] 95 | 96 | 97 | 98 | NBSTIMNEURONS = 2 # 2 Stimulus neurons. Stimuli are binary, so both neurons receive opposite-valued inputs (or 0) 99 | NBREWARDNEURONS = 2 # 6 # 2 # reward signal for this trial. A value is represented with 2 inputs, as it is for stimulus neurons. 100 | NBBIASNEURONS = 1 # Bias neurons. Activations clamped to BIASVALUE. 101 | NBINPUTNEURONS = NBSTIMNEURONS + NBREWARDNEURONS + NBBIASNEURONS # The first NBINPUTS neurons in the network are neurons (includes the bias, noise and reward inputs) 102 | NBRESPNEURONS = 2 # Response neurons for 0 and 1. 103 | NBMODNEURONS = 2 # Neuromodulatory output neurons 104 | NBOUTPUTNEURONS = NBRESPNEURONS + NBMODNEURONS # The last NBOUTPUTNEURONS neurons in the network are output neurons. Response neurons + Modulatory neuron. 105 | NBRESPSIGNALNEURONS = NBRESPNEURONS # Neurons that receive the response-given signal ("what response did I just give?") 106 | STIMNEURONS = np.arange(NBSTIMNEURONS) 107 | INPUTNEURONS = np.arange(NBINPUTNEURONS) 108 | OUTPUTNEURONS = np.arange(N-NBOUTPUTNEURONS, N) 109 | MODNEURONS = np.arange(N-NBOUTPUTNEURONS, N-NBOUTPUTNEURONS + NBMODNEURONS) 110 | # NUMMODNEURON = N - NBOUTPUTNEURONS # The modulatory neuron is the first output neuron 111 | RESPNEURONS = np.arange(N-NBOUTPUTNEURONS+NBMODNEURONS, N) # Then come the response neurons 112 | REWARDNEURONS = np.arange(NBSTIMNEURONS, NBSTIMNEURONS+NBREWARDNEURONS) # The neurons receiving (and broadcasting) the "reward for this trial" signal are the ones just after the stimulus inputs. 113 | BIASNEURONS = np.arange(NBSTIMNEURONS+NBREWARDNEURONS, NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS) 114 | FIRSTRESPSIGNALNEURON = NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS # The first neuron that receives the response-given signal. We'll need this later 115 | assert FIRSTRESPSIGNALNEURON == NBINPUTNEURONS 116 | assert len(RESPNEURONS) == NBRESPNEURONS 117 | RESPSIGNALNEURONS = np.arange(FIRSTRESPSIGNALNEURON, FIRSTRESPSIGNALNEURON +NBRESPSIGNALNEURONS) 118 | 119 | 120 | BIASVALUE = 1.0 121 | 122 | 123 | 124 | NBTASKSPERGEN = 1 # 2 # 2 task blocks per generation 125 | 126 | 127 | NBTRIALSLOSS = 100 # Evolutionary loss is evaluated over the last 100 trials of each block 128 | NBTRIALS = 300 + NBTRIALSLOSS # Total number of trials per block 129 | # NBTRIALSLOSS = 100 # Evolutionary loss is evaluated over the last 100 trials of each block 130 | # NBTRIALS = 150 + NBTRIALSLOSS # Total number of trials per block 131 | 132 | 133 | 134 | REWARDSIZE = 3.0 # 3 * 3.0 # Size of the binary-reward signal (correct/incorrect) 135 | STIMSIZE = 3.0 # Size of the stimulus input 136 | RESPSIGNALSIZE = 3.0 # Size of the response-given signal 137 | 138 | 139 | totalnbtasks = 0 140 | ticstart = time.time() 141 | 142 | 143 | # EVALW is to assess the behavior of an evolved network. Run it on a single batch of all tasks, without any mutation 144 | EVALW = True 145 | if EVALW: 146 | # NBTRIALS = NBTRIALS # more "burn-in" trials? doesn't seem to change anything 147 | NBGEN = 1 148 | NBTASKSPERGEN = 1 149 | BS = 500 150 | MUTATIONSIZE = 0 151 | allresps=[] 152 | allstims=[] 153 | alltgts=[] 154 | 155 | 156 | 157 | 158 | with torch.no_grad(): # We don't need PyTorch to keep track of gradients, since we're computing the gradient outselves (through evolution). 159 | 160 | PRINTING = True # if numgen == 0 or np.random.rand() < .05 else False 161 | 162 | # Initialize innate weights values 163 | w = torch.randn(N,N) * JINIT / np.sqrt(N) 164 | w = w.to(device) 165 | 166 | # Initialize alpha values - the plasticity parameters (capital-pi in the paper) 167 | alpha = INITALPHA * torch.ones_like(w).to(device) 168 | 169 | # We zero out input weights to input neurons, though it probably doesn't have any effect. 170 | w.data[:NBINPUTNEURONS, :] = 0 # Each *row* of w contains the weights to a single neuron. 171 | # We also zero out the weights to neuromodulatory neurons, which probably does have an effect! 172 | w.data[MODNEURONS, :] = 0 # Each *row* of w contains the weights to a single neuron. 173 | winit = w.clone() 174 | 175 | # We will be using the Adam optimizer to apply our (hand-computed) evolutionary gradients 176 | optimizer = optim.Adam([w, alpha], lr=LR, weight_decay=WDECAY) # Default betas=(0.9, 0.999) 177 | 178 | # Evolosses are real-valued losses used for evolution. Binarylosses are binary 'correct/wrong' signals, also used for logging. 179 | evolosses = [] 180 | responses0 = [] 181 | binarylosses = [] 182 | wgradnorms = [] 183 | mytaskprev = mytaskprevprev = mytaskprevprevprev = -1 184 | 185 | 186 | if not EVALW: 187 | ww = w.cpu().numpy() 188 | aa = alpha.cpu().numpy() 189 | np.savetxt('winit.txt', ww) 190 | np.savetxt('alphainit.txt', aa) 191 | 192 | if EVALW and True: 193 | w = np.loadtxt('w.txt') 194 | w = torch.from_numpy(w).float().to(device) 195 | winit = w.clone() 196 | 197 | alpha = np.loadtxt('alpha.txt') 198 | alpha = torch.from_numpy(alpha).float().to(device) 199 | # alpha.fill_(torch.mean(alpha)) 200 | 201 | 202 | print("MODULTYPE is:", MODULTYPE) 203 | assert MODULTYPE == 'EXTERNAL' or MODULTYPE == 'INTERNAL', "Modulation type must be 'INTERNAL' or 'EXTERNAL'" 204 | 205 | 206 | 207 | # Ready to start the evolutionary loop, iterating over generations (i.e. lifetimes). 208 | 209 | for numgen in range(NBGEN): 210 | 211 | 212 | 213 | if numgen == NUMGENCUTLR: 214 | for param_group in optimizer.param_groups: 215 | param_group['lr'] /= 5.0 216 | 217 | 218 | 219 | # Every 10th generation is for testing on the withheld task (with no weight change) 220 | TESTING = False 221 | if numgen == 0 or numgen == NBGEN-1 or numgen % 10 == 0: 222 | TESTING = True 223 | if PRINTING: 224 | print("TESTING") 225 | if EVALW: 226 | TESTING = False 227 | 228 | 229 | tic = time.time() 230 | responses0thisgen = [] 231 | 232 | 233 | 234 | alpha.clip_(min=0) 235 | 236 | 237 | 238 | # Generating the population of mutated individuals: 239 | 240 | # First, batch the weights. 241 | bw = torch.dstack(BS*[w]).movedim(2,0).to(device) # batched weights 242 | balpha = torch.dstack(BS*[alpha]).movedim(2,0).to(device) # batched alphas 243 | # Generate the mutations, for both w and alpha 244 | # NOTE: batch element 0 (and BS/2, its antithetic pair) are NOT mutated, represent the curent unmutated candidate genotype. 245 | mutations_wandalpha = [] 246 | for n, x in enumerate( (bw, balpha) ): 247 | mutations = torch.randn_like(x, requires_grad=False).to(device) * MUTATIONSIZE 248 | mutations[0,:,:] = 0 # 1st item in batch = current candidate 249 | mutations[BS//2:, :, :] = -mutations[:BS//2, :, :] # Antithetic sampling for mutations ! Really helps. 250 | if TESTING or EVALW: 251 | mutations *= 0.0 # No mutation - results in batch score variance being caused only by randomness in trial order and (possibly) lifetime perturbations 252 | x += mutations 253 | mutations_wandalpha.append(mutations) 254 | 255 | 256 | 257 | bw.data[:, :NBINPUTNEURONS, :] = 0 # Input neurons receive 0 connections. Probably not necessary. 258 | bworig = bw.clone() # Storing the weights for comparison purposes at the gradient step (below). 259 | 260 | lifelosses = torch.zeros(BS, requires_grad=False).to(device) 261 | lifemselosses = torch.zeros(BS, requires_grad=False).to(device) 262 | lifeactpens = torch.zeros(BS, requires_grad=False).to(device) 263 | lifeblosses = torch.zeros(BS, requires_grad=False).to(device) 264 | 265 | 266 | 267 | 268 | # Lifetime loop, iterates over task-blocks: 269 | for numtask in range(NBTASKSPERGEN): 270 | totalnbtasks += 1 271 | 272 | COLLECTMODOUTSANDREWINS = not EVALW and ( (numtask + numgen * 2) % 7 == 0 ) 273 | 274 | # bpw = batched plastic weights 275 | bpw = torch.zeros_like(bw).to(device) # For now, plastic weights are initialized to 0 at the beginning of each task. 276 | 277 | # Initialize neural states 278 | bstates = .1 * torch.ones(BS, N).to(device) # bstates (batched states) contains the neural activations (before nonlinearity). Dimensionality appropriate for batched matrix multiplication. 279 | bstates[:, INPUTNEURONS] = 0 280 | bresps = 1.0 * bstates # bresps is the actual neural responses, after nonlinearity, and also serves as the input for the next step. 281 | bresps[:, BIASNEURONS] = BIASVALUE 282 | 283 | meanlosstrace = torch.zeros(BS, 2 * 2).to(device) 284 | bls = [] # Will store binary losses of all batch elements, for each trial of this task 285 | bl0s = [] # Same but only for batch element 0 (i.e. the unmutated candidate genome) 286 | ml0s = [] # MSE loss (the one used for evolution) for element 0 (unmutated candidate), of all trials for this task 287 | 288 | 289 | 290 | # Choose the task ! If not testing, makes sure it's different from recently chosen tasks. 291 | 292 | 293 | if TESTING: 294 | mytask = TESTTASK 295 | mytasknum = alltasks.index(mytask) 296 | else: 297 | while True: 298 | mytasknum = np.random.randint(len(alltasks)) 299 | 300 | mytask = alltasks[mytasknum] 301 | 302 | if ( (mytask!= TESTTASK) 303 | and (mytask != TESTTASKNEG) # We withhold both the test task and its logical negation 304 | and (mytask != mytaskprev) 305 | and (mytask != mytaskprevprev) 306 | ): 307 | 308 | break 309 | 310 | mytaskprevprev = mytaskprev; mytaskprev= mytask 311 | 312 | 313 | 314 | # # Only use AND and NAND as tasks 315 | # mytasknum = numtask % 4 316 | # mytask = alltasks[mytasknum] 317 | # mytaskprevprev = mytaskprev; mytaskprev= mytask 318 | 319 | 320 | btasks = [] # Tasks for the whole batch 321 | for ii in range(BS//2): 322 | if TESTING: 323 | cand_task = TESTTASK 324 | cand_tasknum = alltasks.index(TESTTASK) 325 | else: 326 | while True: 327 | cand_tasknum = np.random.randint(len(alltasks)) 328 | cand_task = alltasks[cand_tasknum] 329 | if ( (cand_task!= TESTTASK) 330 | and (cand_task != TESTTASKNEG) # We withhold both the test task and its logical negation 331 | 332 | 333 | and (cand_tasknum % 2 == (numgen // 2) % 2) # Training on alternate halves of the training set at successive (pairs of) generations 334 | # and (cand_tasknum % 4 == numgen % 4) # Training on alternate quarters of the training set at successive generations 335 | 336 | 337 | ): 338 | break 339 | btasks.append(cand_task) 340 | 341 | btasks = btasks * 2 # Duplicating the list, so each antithetic pair has the same tasks. 342 | 343 | 344 | 345 | if EVALW: 346 | btasks = [TESTTASK] * BS 347 | # btasks = alltasks * (BS // len(alltasks) + 1) 348 | # btasks = btasks[:BS] 349 | # with open('btasks.txt', 'w') as f: 350 | # for item in btasks: 351 | # f.write("%s\n" % item) 352 | 353 | 354 | # btasks = [mytask] * BS 355 | 356 | 357 | assert(len(btasks) == BS) 358 | 359 | 360 | # Cumulative MSE and binary losses for this task, over the last NBLOSSTRIALS of the block: 361 | taskmselosses = torch.zeros_like(lifemselosses).to(device) 362 | taskblosses = torch.zeros_like(lifemselosses).to(device) 363 | 364 | respz = [] # Response neuron outputs 365 | stimz = [] # Stimulus neurons outputs 366 | modouts = [] # Neuromodulatory output 367 | rewins = [] # Received rewards (reward neuron outputs) 368 | 369 | 370 | if PRINTING: 371 | print("task[0]:", btasks[0], "task[1]:", btasks[1]) 372 | 373 | # OK, ready to start the task. 374 | 375 | # Generate the task data (inputs and targets) for all trials: 376 | # taskdata = generateInputsAndTargetsForTask(mytask=mytask) 377 | 378 | eligtraces = torch.zeros_like(bw, requires_grad=False).to(device) # Initialize the eligibility traces at the start of each block/task. 379 | 380 | 381 | # Task loop, iterating over trials 382 | # You do NOT erase memory (neural activations or plastic weights) between successive trials ! 383 | for numtrial in range(NBTRIALS): 384 | 385 | 386 | # # Akshully do initialize network activations for each trial - THIS IS ONLY FOR DEBUGGING / SIMPLER TEST TASK! 387 | # bresps.fill_(0) 388 | # bstates.fill_(0) 389 | 390 | # # We reinitialize only modulatory neuron activations for each trial - THIS IS ONLY FOR DEBUGGING / SIMPLER TEST TASK! 391 | # bresps[:, MODNEURONS] = 0 392 | # bstates[:, MODNEURONS] = 0 393 | 394 | 395 | 396 | # Initializations 397 | mselossesthistrial = torch.zeros(BS, requires_grad=False).to(device) # MSE losses for this trial 398 | totalresps = torch.zeros(BS, NBRESPNEURONS, requires_grad=False).to(device) # Will accumulate the total outputs of each network over the trial, so we can compute the network's response for this trial. 399 | 400 | # Generate the inputs and targets for this trial: 401 | 402 | # Pick stimulus 1 and stimulus 2 for this trial (and for each batch member): 403 | stims1 = (torch.rand(BS, 1) > .5).float() 404 | stims2 = (torch.rand(BS, 1) > .5).float() 405 | 406 | 407 | 408 | 409 | # Antithetic pairs share the exact same stimuli 410 | stims1[BS//2:, :] = stims1[:BS//2, :] 411 | stims2[BS//2:, :] = stims2[:BS//2, :] 412 | 413 | 414 | 415 | # Actual temporal inputs: 416 | inpts = np.zeros((BS, NBSTIMNEURONS, STIMTIME)) 417 | StimDur = STIMTIME 418 | StartStim = 0 419 | # The two stimuli are presented in succession, with both input neurons locked in opposite values to each other: 420 | inpts[:, 0, StartStim:StartStim+StimDur//2 - 2] = 2.0 * stims1 - 1.0 421 | inpts[:, 0, StartStim+StimDur//2:StartStim+StimDur - 2] = 2.0 * stims2 - 1.0 422 | inpts[:, 1, StartStim:StartStim+StimDur] = -inpts[:, 0, StartStim:StartStim+StimDur] 423 | 424 | inputs = torch.from_numpy(inpts).float().to(device) 425 | 426 | 427 | 428 | # Now we compute the targets, that is, the expected values of the output neurons, depending on inputs and tasks 429 | tgts = -100 * np.ones((BS, NBRESPNEURONS, RESPONSETIME)) 430 | 431 | for ii in range(BS): 432 | # First we generate the expected output for the non-null response neuron, based on inputs and task: 433 | if btasks[ii] == 'watchstim1': 434 | tgts[ii, 1, :] = stims1[ii, 0] 435 | elif btasks[ii] == 'watchstim2': 436 | tgts[ii, 1, :] = stims2[ii, 0] 437 | elif btasks[ii] == 'antiwatchstim1': 438 | tgts[ii, 1, :] = 1.0 - stims1[ii, 0] 439 | elif btasks[ii] == 'antiwatchstim2': 440 | tgts[ii, 1, :] = 1.0 - stims2[ii, 0] 441 | elif btasks[ii] == 'and': 442 | tgts[ii, 1, :] = (stims1[ii, 0] * stims2[ii, 0]) 443 | elif btasks[ii] == 'nand': 444 | tgts[ii, 1, :] = 1.0 - (stims1[ii, 0] * stims2[ii, 0]) 445 | # These two lines add 25% running time to the entire program! looks like np.clip is *slow*. 446 | # elif btasks[ii] == 'or': 447 | # tgts[ii, 1, :] = np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0) 448 | # elif btasks[ii] == 'nor': 449 | # tgts[ii, 1, :] = 1.0 - np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0) 450 | # Instead, we will clip after the full array is done. This should still work out the same. 451 | elif btasks[ii] == 'or': 452 | tgts[ii, 1, :] = stims1[ii, 0] + stims2[ii, 0] 453 | elif btasks[ii] == 'nor': 454 | tgts[ii, 1, :] = 1.0 - stims1[ii, 0] + stims2[ii, 0] 455 | elif btasks[ii] == '10': 456 | tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0]) 457 | elif btasks[ii] == 'anti10': 458 | tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0]) 459 | elif btasks[ii] == '01': 460 | tgts[ii, 1, :] = (1.0 - stims1[ii, 0]) * stims2[ii, 0] 461 | elif btasks[ii] == 'anti01': 462 | tgts[ii, 1, :] = 1.0 - (1.0 - stims1[ii, 0]) * stims2[ii, 0] 463 | elif btasks[ii] == 'dms': 464 | tgts[ii, 1, :] = (stims1[ii, 0] == stims2[ii, 0]) 465 | elif btasks[ii] == 'dnms': 466 | tgts[ii, 1, :] = (stims1[ii, 0] != stims2[ii, 0]) 467 | else: 468 | tgts[ii, 1, :] = (stims1[ii, 0] == stims2[ii, 0]) 469 | 470 | tgts[:, 1, :] = np.clip(tgts[:, 1, :], 0.0, 1.0) 471 | 472 | 473 | # tgts[:, 1, :] = 1.0 474 | 475 | # The null-response neuron's expected output is just the opposite of the non-null response neuron output (response is either 0 or 1). 476 | tgts[:, 0, :] = 1.0 - tgts[:, 1, :] 477 | 478 | assert np.all(np.logical_or(tgts == 0.0 , tgts == 1.0)) 479 | 480 | if EVALW: 481 | alltgts.append(tgts[:,1, 0]) 482 | allstims.append(np.hstack((stims1, stims2))) 483 | 484 | # assert numgen < 2 or numtrial < 15 485 | 486 | 487 | 488 | targets = torch.from_numpy(tgts).float().to(device) 489 | 490 | # In practice, we clip targets to 0.1/0.9 instead of actually 0.0/1.0. This may or may not help. 491 | targets.clip_(min=0.1, max=0.9) 492 | 493 | 494 | 495 | # raise ValueError 496 | 497 | 498 | # Run the network. Trial loop, iterating over timesteps 499 | for numstep in range(T): 500 | 501 | # Update neural activations, using previous-step bresps (actual neural outputs) as input: 502 | bstates += (DT / TAU) * (-bstates + torch.bmm((bw + balpha * bpw), bresps[:, :, None])[:,:,0] ) 503 | 504 | 505 | # Applying the random perturbations on neural activations, both for noise and for the lifetime plasticity algorithm (node-perturbation) 506 | # And also updating the eligibility trace appropriately 507 | if numstep > 1 : 508 | perturbindices = (torch.rand(1, N) < PROBAMODUL).int() # Which neurons get perturbed? 509 | 510 | # perturbindices[0, MODNEURONS] = 0 # We disable perturbations on neuromodulatory neurons for debugging... 511 | 512 | 513 | perturbations = (ALPHAMODUL * perturbindices * (2 * torch.rand(1, N) - 1.0)).to(device) # Note the dimensions: the same noise vector is applied to all elements in the batch (to save time!) 514 | 515 | 516 | 517 | 518 | if numtrial > NBTRIALS - 20: 519 | perturbations.fill_(0) 520 | 521 | 522 | 523 | bstates += perturbations 524 | 525 | # Node-perturbation: Hebbian eligibility trace = product between inputs (bresps from previous time step) and *perturbations* in outputs. dH = X * deltaY 526 | # We do this with a (batched) outer product between the (column) vector of perturbations (1 per neuron) and the (row) vector of inputs 527 | # Note that here, since we have an RNN, the input is bresps - the network's responses from the previous time step 528 | if torch.sum(perturbindices) > 0: 529 | eligtraces += torch.bmm( perturbations.expand(BS, -1)[:, :, None], bresps[:, None, :] ) 530 | 531 | # Eligibility traces, unlike actual plastic weights, are decaying 532 | eligtraces -= (DT / TAU_ET) * eligtraces 533 | 534 | 535 | # We can now compute the actual neural responses for this time step, applying the appropriate nonlinearity to each neuron 536 | bresps = bstates.clone() # F.leaky_relu(bstates) 537 | # The following assumes that response neurons are the last neurons of the network ! 538 | bresps[:,N-NBRESPNEURONS:].sigmoid_() # The response neurons (NOT output neurons - modulatory neuron not included!) are sigmoids, all others are tanh. An arbitrary design choice. 539 | bresps[:,:N-NBRESPNEURONS].tanh_() 540 | 541 | 542 | # Are we in the input presentation period? Then apply the inputs. 543 | # Inputs are clamping, fixing the response of the input neurons. 544 | if numstep < STIMTIME: 545 | # bresps[:, STIMNEURONS] = STIMSIZE * inputs[:, :, numstep] 546 | bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = STIMSIZE * inputs[:, :, numstep] 547 | else: 548 | bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = 0 549 | # bresps[:, STIMNEURONS] = 0 550 | 551 | # Bias input is always-on, always clamping. 552 | # bresps[:, BIASNEURONS] = BIASVALUE 553 | bresps[:, BIASNEURONS[0]] = BIASVALUE 554 | 555 | # All the responses have now been computed for this step 556 | 557 | # Are we in the response period? Then collect network response. 558 | if numstep >= STARTRESPONSETIME and numstep < ENDRESPONSETIME: 559 | 560 | assert numstep < STARTREWARDTIME 561 | # Accumulate the total activation of each output neuron, so that we can compute the network's actual response at the end of response period: 562 | # totalresps += bresps[:, RESPNEURONS] 563 | totalresps += bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] 564 | # Accumulate the MSE error between actual and expected outputs: 565 | # mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME 566 | mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME 567 | 568 | else: 569 | bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] = 0.0 570 | # bresps[:, RESPNEURONS] = 0.0 571 | 572 | 573 | # Is the response period for this trial finished, or equivalently, are we at the first step of the reward / feedback period? 574 | # If so, compute the network's response (i.e. which neuron fired most) 575 | # Also, if using external neuromodulation, with compute the neuromodulation (based on baselined rewards for this trial) and apply plasticity 576 | if numstep == STARTREWARDTIME: 577 | # The network's response for this trial (0 or 1) is the index of the output neuron that had the highest cumulative output over the response period 578 | responses = torch.argmax(totalresps, dim=1) # responses is a 1D, integer-valued array of size BS. totalresps is a 2D real-vlued array of size BS, NBRESPS+1 579 | 580 | # blosses (binary losses) is a 1/-1 "correct/wrong" signal for each batch element for this trial. 581 | blosses = 2.0 * (responses == torch.argmax(targets[:, :, 0], dim=1)).float() - 1.0 582 | responses0thisgen.append(float(responses[0])) 583 | 584 | # We also want the 1-hot version of the response for each neuron. This will be used as the response signal below. 585 | if numtrial > 0: 586 | responses1hot_prev = responses1hot.clone() 587 | responses1hot = F.one_hot(responses, 2) 588 | 589 | # Now we apply lifetime plasticity, with node-perturbation, based on eligibility trace and suitably baselined reward/loss 590 | 591 | 592 | # Baseline computation - only used for external neuromodulation experiments 593 | # We compute separate baseline (running average) losses for different types of trials, as defined by their inputs (as in Miconi, eLife 2017). 594 | # So we need to find out the trial type for each element in batch. 595 | # input1 = inputs[:, 0, 0]; input2 = inputs[:, 1, 0] # Uh, what was that? 596 | input1 = stims1[:, 0]; input2 = stims2[:, 0] 597 | trialtypes = (input1>0).long() * 2 + (input2>0).long() 598 | 599 | if MODULTYPE == 'EXTERNAL' and numtrial > 30: # + (300 if EVALW else 0): 600 | dw = - (ETA * eligtraces * ( meanlosstrace[np.arange(BS), trialtypes] * (mselossesthistrial - meanlosstrace[np.arange(BS), trialtypes]) )[:, None, None]).clamp(-MAXDW, MAXDW) 601 | bpw += dw 602 | 603 | 604 | 605 | # Updating the baseline - running average of losses, for each batch element, for the trial type just seen 606 | meanlosstrace[torch.arange(BS).long(), trialtypes] *= MULOSSTRACE 607 | meanlosstrace[torch.arange(BS).long(), trialtypes] += (1.0 - MULOSSTRACE) * mselossesthistrial 608 | 609 | 610 | 611 | 612 | # Plasticity computation for internal (network-controlled) neuromodulation. 613 | # Note that it is applied at every time step, unlike external neuromodulation experiments which only apply plasticity once per trial, at the beginning of the reward period (see above). 614 | if numtrial > 10 and MODULTYPE == 'INTERNAL': # Lifetime plasticity is only applied after a few burn-in trials. 615 | # eligtraces: BS x N x N (1 per connection & batch element) mselossesthhistrial: BS. meanlosstrace: BS x (N.N). trialtypes: BS bresps/bstates: BS x N 616 | # dw should have shape BS x N x N, i.e. one for each connection and batch element. Do not sum over batch dimension! The batch is purely evolutionary ! 617 | 618 | # Compute and apply the plasticity, based on accumulated eligibility traces and output of a certain neuron 619 | if numstep > 0: 620 | modulsprev = moduls.clone() 621 | moduls = bresps[:, MODNEURONS[0]] - bresps[:, MODNEURONS[1]] 622 | # lifeactpens += torch.abs(moduls) 623 | if numstep > 0 : 624 | lifeactpens += (modulsprev - moduls) ** 2 625 | 626 | 627 | # If we use only the first neuromodulatory neuron's (tanh) output as the actual neuromodulatory output: 628 | # dw = (ETA * eligtraces * bresps[:, MODNEURONS[0]][:, None, None] ).clamp(-MAXDW, MAXDW) 629 | 630 | dw = (ETA * eligtraces * moduls[:, None, None] ).clamp(-MAXDW, MAXDW) 631 | 632 | 633 | bpw += dw 634 | 635 | 636 | 637 | # Are we in the reward signal period? 638 | # Note: the actual neuromodulatory reward signal (which influences plasticity) is applied just once per trial, above. Here we provide a binary "correct/ incorrect" signal to the network, 639 | # i.e. "was my response right or wrong for this trial?" 640 | # We also provide a signal indicating which response it gave in this trial (in theory it should be able to calculate it itself if needed, but this may help). 641 | if numstep >= STARTREWARDTIME and numstep < ENDREWARDTIME: # Note that by this time, the loss has been computed and is fixed 642 | 643 | # # We provide a binary, "correct/incorrect" signal to the network 644 | # bresps[:,REWARDNEURONS[0]] = REWARDSIZE * blosses[:] # Reward input is also clamping 645 | # bresps[:,REWARDNEURONS[1]] = -REWARDSIZE * blosses[:] # Reward input is also clamping 646 | 647 | # Akshully, we provide the same MSE loss that is used to guide evolution 648 | # bresps[:,REWARDNEURONS[0]] = REWARDSIZE * mselossesthistrial[:] # Reward input is also clamping 649 | # bresps[:,REWARDNEURONS[1]] = -REWARDSIZE * mselossesthistrial[:] # Reward input is also clamping 650 | 651 | # Akshully^2, we duplicate the reward signal across many neurons to (maybe) increase its potnetial impact and exploitability (?...) 652 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = REWARDSIZE * mselossesthistrial[:, None] # Reward input is also clamping 653 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1].clip_(min=0) # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve. 654 | # bresps[:,REWARDNEURONS] = REWARDSIZE * mselossesthistrial[:, None] # Reward input is also clamping 655 | # bresps[:,REWARDNEURONS].clip_(min=0) # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve. 656 | 657 | 658 | 659 | 660 | # We provide the network with a signal indicating the actual response it chose for this trial. Not sure if needed. 661 | # bresps[:, RESPSIGNALNEURONS] = responses1hot.float() * RESPSIGNALSIZE 662 | bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = responses1hot.float() * RESPSIGNALSIZE 663 | 664 | 665 | 666 | else: 667 | bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = 0 668 | bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = 0 669 | # bresps[:,REWARDNEURONS] = 0 670 | # bresps[:, RESPSIGNALNEURONS] = 0 671 | 672 | 673 | 674 | # modouts.append(float(moduls[0])) 675 | # rewins.append(float(bresps[0, REWARDNEURONS[0]])) 676 | if COLLECTMODOUTSANDREWINS: 677 | stimz.append(bresps[0, STIMNEURONS[0]]) 678 | respz.append(bresps[0, RESPNEURONS[1]] - bresps[0, RESPNEURONS[0]]) 679 | if MODULTYPE == 'INTERNAL': # Doesn't make sense for external modulation 680 | modouts.append(moduls[0]) 681 | rewins.append(bresps[0, REWARDNEURONS[0]]) 682 | 683 | 684 | if EVALW: 685 | allresps.append(bresps.cpu().numpy().astype('float32')) 686 | # if EVALW and numtrial >= NBTRIALS - 50: 687 | # stimz.append(bresps[:, STIMNEURONS[0]]) 688 | # respz.append(bresps[:, RESPNEURONS[1]] - bresps[:, RESPNEURONS[0]]) 689 | # if MODULTYPE == 'INTERNAL': # Doesn't make sense for external modulation 690 | # modouts.append(moduls[:]) 691 | # rewins.append(bresps[:, REWARDNEURONS[0]]) 692 | 693 | 694 | 695 | # Now all steps done for this trial: 696 | 697 | if PRINTING: 698 | if np.random.rand() < .1: 699 | print("|", int(responses[0]), int(blosses[0]), end=' ') 700 | 701 | ml0s.append(float(mselossesthistrial[0])) 702 | bl0s.append(float(blosses[0])) 703 | bls.append(blosses.cpu().numpy()) 704 | 705 | 706 | # If this trial is part of the last NBTRIALSLOSS, we accumulate its trial loss into the agent's total loss for this task. 707 | if numtrial >= NBTRIALS - NBTRIALSLOSS: # Lifetime losses are only estimated over the last NBTRIALSLOSS trials 708 | # taskmselosses += 2 * mselossesthistrial / NBTRIALSLOSS # the 2* doesn't mean anything 709 | taskmselosses += mselossesthistrial / NBTRIALSLOSS 710 | taskblosses += blosses / NBTRIALSLOSS 711 | 712 | 713 | # Now all trials done for this task: 714 | if PRINTING: 715 | # print("Med task mseloss:", "{:.4f}".format(float(torch.median(taskmselosses)))) 716 | print("\nTASK BLOSS[0]:", "{:.4f}".format(float(taskblosses[0])), "Med task bloss:", "{:.4f}".format(float(torch.median(taskblosses))), 717 | "Med-abs totaldw[0]:", "{:.4f}".format(float(torch.median(torch.abs(bpw[0,:,:])))), 718 | "Max-abs totaldw[0]:", "{:.4f}".format(float(torch.max(torch.abs(bpw[0,:,:])))) 719 | ) 720 | 721 | 722 | 723 | 724 | if COLLECTMODOUTSANDREWINS: 725 | print("Saving Resps, Stims, RI, MO") 726 | 727 | np.savetxt('stims.txt', np.array([float(x) for x in stimz])) 728 | np.savetxt('resps.txt', np.array([float(x) for x in respz])) 729 | np.savetxt('modouts.txt', np.array([float(x) for x in modouts])) 730 | np.savetxt('rewins.txt', np.array([float(x) for x in rewins])) 731 | 732 | # print("") 733 | lifemselosses += taskmselosses / NBTASKSPERGEN 734 | lifeblosses += taskblosses / NBTASKSPERGEN 735 | 736 | if (TESTING or numgen == 0) and numtask == 0: 737 | # These files contain respectively the first and *latest* Testing block of the *current* run only. 738 | FNAME = 'bl_1standLastBlock_gen0.txt' if numgen == 0 else 'bl_1standLastBlock_lastgen.txt' 739 | # np.savetxt(FNAME, np.array(bl0s)) 740 | np.savetxt(FNAME, np.vstack(bls)) 741 | 742 | 743 | 744 | # After all tasks done for this lifetime / generation: 745 | 746 | lifeactpens /= (NBTASKSPERGEN * NBTRIALS) 747 | # lifeactpens -= torch.mean(lifeactpens); lifeactpens /= torch.std(lifeactpens) 748 | # lifeactpens += torch.mean(lifemselosses); lifeactpens *= torch.std(lifemselosses) 749 | 750 | lifelosses = lifemselosses + ALPHAACTPEN * lifeactpens 751 | 752 | binarylosses.append(float(lifeblosses[0])) 753 | evolosses.append(float(lifemselosses[0])) 754 | 755 | 756 | if TESTING and not EVALW: 757 | np.savetxt('blosses_onerun.txt', np.array(binarylosses)) 758 | np.savetxt('mselosses_onerun.txt', np.array(evolosses)) 759 | ww = w.cpu().numpy() 760 | pw0 = bpw[0,:,:].cpu().numpy() 761 | aa = alpha.cpu().numpy() 762 | np.savetxt('w.txt', ww) 763 | np.savetxt('pw0.txt', pw0) 764 | np.savetxt('alpha.txt', aa) 765 | 766 | 767 | if EVALW and True: 768 | # Note: we use .npy format, because multi-dimensional. 769 | 770 | np.save('allstims.npy', np.stack(allstims, -1)) 771 | np.save('alltgts.npy', np.stack(alltgts, -1)) 772 | 773 | # print(len(allresps), len(allstims), len(alltgts)) 774 | assert len(allresps) == NBTRIALS * T 775 | # print(allresps[0].shape, allstims[0].shape, alltgts[0].shape) 776 | print("Rearranging saved responses into appropriate shape...") 777 | z1 = np.dstack(allresps) 778 | z2 = np.stack(np.split(z1, NBTRIALS, axis=2), axis=-1) 779 | print("Final shape of the saved responses:", z2.shape) 780 | assert(z2.shape == (BS, N, T, NBTRIALS)) 781 | np.save('allresps.npy', z2[:,:,:,[29,-1]]) # We only store response data for 29th (before plasticity starts) and last trial (to keep file size manageable) 782 | 783 | 784 | 785 | # Now we're ready to perform evolution (by computing gradients by hand, and then applying the optimizer with these gradients) 786 | optimizer.zero_grad() 787 | 788 | # Gradient is just loss x mutation (remember we use antithetic sampling) 789 | # gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) # / BS 790 | gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) / (BS * MUTATIONSIZE * MUTATIONSIZE) 791 | 792 | 793 | # gradient = gradient / 100 794 | 795 | 796 | wgradnorm = float(torch.norm(gradient)) 797 | wgradnorms.append(wgradnorm) 798 | if PRINTING: 799 | print("norm w:", "{:.4f}".format(float(torch.norm(w))), "norm gradient:", "{:.4f}".format(wgradnorm), 800 | "med-abs w:", "{:.4f}".format(float(torch.median(torch.abs(w)))), 801 | "max-abs w:", "{:.4f}".format(float(torch.max(torch.abs(w)))), 802 | "norm a:", "{:.4f}".format(float(torch.norm(alpha))), "mean a:", "{:.4f}".format(float(torch.mean(alpha)))) 803 | 804 | 805 | w.grad = gradient 806 | wprev = w.clone() 807 | 808 | # gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) # / BS 809 | gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) / (BS * MUTATIONSIZE * MUTATIONSIZE) 810 | 811 | 812 | # gradientalpha = gradientalpha / 100 813 | 814 | 815 | alpha.grad = gradientalpha 816 | alphaprev = alpha.clone() 817 | 818 | if numgen > 0 and not TESTING and not EVALW: 819 | optimizer.step() 820 | 821 | 822 | wdiff = w - wprev 823 | adiff = alpha - alphaprev 824 | if PRINTING: 825 | print("Norm w-wprev:", "{:.4f}".format(float(torch.norm(wdiff))), "Max abs w-wprev:", "{:.4f}".format(float(torch.max(torch.abs(wdiff)))), 826 | "Norm a-aprev:", "{:.4f}".format(float(torch.norm(adiff))), "Max abs a-aprev:", "{:.4f}".format(float(torch.max(torch.abs(adiff)))) ) 827 | 828 | 829 | 830 | if PRINTING: 831 | print("Med/min/max/Half-Nth/0th loss in batch:", float(torch.median(lifelosses)), float(torch.min(lifelosses)), float(torch.max(lifelosses)), 832 | float(lifelosses[BS//2]), float(lifelosses[0])) 833 | print("Med/min/max/Half-Nth/0th life mse loss in batch:", float(torch.median(lifemselosses)), float(torch.min(lifemselosses)), float(torch.max(lifemselosses)), 834 | float(lifemselosses[BS//2]), float(lifemselosses[0])) 835 | print("Med/min/max/Half-Nth/0th activity penalty in batch:", float(torch.median(lifeactpens)), float(torch.min(lifeactpens)), float(torch.max(lifeactpens)), 836 | float(lifeactpens[BS//2]), float(lifeactpens[0])) 837 | print("Gen", numgen, "done in", time.time()-tic) 838 | 839 | 840 | 841 | 842 | 843 | 844 | print("Time taken:", time.time()-ticstart) 845 | 846 | -------------------------------------------------------------------------------- /decoding/decoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np; import matplotlib.pyplot as plt 2 | import sklearn 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.linear_model import LinearRegression 5 | 6 | print("This shows clipped graphs, BUT the image files will be OK!") 7 | 8 | print('ATTENTION: we create graphs for decoding target, stimulus 1, and stimulus 2!') 9 | 10 | #print(r.shape, s.shape, t.shape) 11 | # (500, 70, 50, 2) (500, 2, 400) (500, 400) 12 | # r has only the first and last trial 13 | 14 | NBINPUTNEURONS = 7 15 | NBOUTPUTNEURONS = 6 16 | NBTRIALS = 400 17 | 18 | 19 | for numfig, figname in enumerate(['target', 'stim1', 'stim2']): 20 | fig = plt.figure(figsize=(5,5)); 21 | 22 | ff, axes = plt.subplots(2,2) 23 | ax = axes[0,0] 24 | 25 | print("Making figure", figname) 26 | 27 | for numgen in range(2): 28 | 29 | if numgen == 1: 30 | r = np.load('allresps.npy') ; s = np.load('allstims.npy') ; t = np.load('alltgts.npy') 31 | else: 32 | r = np.load('allresps.npy.0'); s = np.load('allstims.npy.0'); t = np.load('alltgts.npy.0') 33 | 34 | for numtrial in range(2): 35 | 36 | numplot = 1 + 2*numgen + numtrial 37 | print(numplot, "/", 2*2) 38 | plt.subplot(2,2, numplot) 39 | plt.gca().set_title('Gen '+str(numgen*1000)+' / Trial '+str(numtrial*NBTRIALS), fontsize=10) 40 | 41 | # Which trial are we looking at - first (well, actually 9th or 29th - last before onset of plasticity) or last? 42 | if numtrial == 0: 43 | rt = 0; st = 29; tt = 29 # Older ones use 29 as the "first" trial 44 | #rt = 0; st = 9; tt = 9 45 | else: 46 | rt = 1; st = NBTRIALS - 1; tt = NBTRIALS - 1 47 | 48 | allvals = [] 49 | for timepoint_train in range(50): 50 | if timepoint_train % 10 == 9: 51 | print(timepoint_train+1, '/ 50') 52 | vals_thistrainpoint = [] 53 | 54 | #for timepoint_test in range(5): # faster, for debugging 55 | for timepoint_test in range(50): 56 | 57 | 58 | if numfig == 0: 59 | # predicting target 60 | y = (t[:, tt] - .5) * 2 61 | elif numfig == 1: 62 | # predicting first stimulus 63 | y = (s[:, 0, st] - .5) * 2 64 | elif numfig == 2: 65 | # predicting second stimulus 66 | y = (s[:, 1, st] - .5) * 2 67 | 68 | 69 | x_test = r[125:250, NBINPUTNEURONS:-NBOUTPUTNEURONS, timepoint_test, rt] 70 | y_test = y[125:250] 71 | x_test = x_test - np.mean(x_test, axis=0) 72 | x_test = x_test / (1e-8 + np.std(x_test, axis=0)) 73 | 74 | score = 0 75 | 76 | nbtrainsets = 3 # ideally but not necessarily a divisor of 125, smaller=faster 77 | for numtrain in range(nbtrainsets): 78 | 79 | setsize = 125 // nbtrainsets 80 | 81 | x_train = r[numtrain*setsize:(numtrain+1)*setsize, NBINPUTNEURONS:-NBOUTPUTNEURONS, timepoint_train, rt] 82 | y_train = y[numtrain*setsize:(numtrain+1)*setsize] 83 | 84 | # Normalizing data to allow sklearn fitting 85 | x_train = x_train - np.mean(x_train, axis=0) 86 | x_train = x_train / (1e-8 + np.std(x_train, axis=0)) 87 | 88 | traind1 = np.mean(x_train[y_train>0, :], axis=0) 89 | traind2 = np.mean(x_train[y_train<0, :], axis=0) 90 | 91 | 92 | cc1 = np.corrcoef(np.vstack((traind1, x_test)))[0, 1:] 93 | cc2 = np.corrcoef(np.vstack((traind2, x_test)))[0, 1:] 94 | choice = 2.0 * (cc1 > cc2) - 1.0 95 | 96 | score = score + np.mean(y_test == choice) 97 | score = score / nbtrainsets 98 | 99 | 100 | vals_thistrainpoint.append(score) 101 | allvals.append(vals_thistrainpoint) 102 | 103 | allvals = np.array(allvals) 104 | 105 | 106 | plt.imshow(allvals); plt.axhline(y=25,color='b', ls=":"); plt.axhline(y=35,color='b', ls=":"); plt.axvline(x=25,color='b', ls=":"); plt.axvline(x=35,color='b', ls=":") 107 | 108 | 109 | plt.xticks(np.arange(9,50,10), labels=[str(z) for z in 20*(1 + np.arange(9,50,10))] ) 110 | plt.yticks(np.arange(9,50,10), labels=[str(z) for z in 20*(1 + np.arange(9,50,10))] ) 111 | plt.clim(0, 1) 112 | if numgen == 1: 113 | plt.xlabel('Train time (ms)') 114 | if numtrial == 0: 115 | plt.ylabel('Test time (ms)') 116 | if numtrial == 1: 117 | plt.colorbar(); 118 | 119 | #fig.suptitle('Changes due to lifetime learning') 120 | #fig.supylabel('Changes due to evolution') 121 | plt.tight_layout() 122 | if numfig == 0: 123 | ax.text(60, -20 ,'Changes due to lifetime learning', ha="center", va="center",size=10, color='b') 124 | ax.text(-38, 56 ,'Changes due to evolution', rotation=90, ha="center", va="center",size=10, color='b') 125 | ax.annotate('', xytext=(-.2, 1.3), xycoords='axes fraction', xy=(3, 1.3), 126 | arrowprops=dict(arrowstyle="->", color='b')) 127 | ax.annotate('', xytext=(-.6, 1), xycoords='axes fraction', xy=(-.6, -1.3), 128 | arrowprops=dict(arrowstyle="->", color='b')) 129 | plt.show() 130 | plt.savefig("image_"+figname+".png",bbox_inches='tight',dpi=200) 131 | 132 | 133 | --------------------------------------------------------------------------------