├── .gitignore
├── README.md
├── code.py
├── curves.py
└── decoding
    ├── HOWTOGENERATEFIGURES.txt
    ├── code.py
    └── decoding.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | */.DS_Store
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | 
 3 | This is the code for the ICML 2023 paper, [Learning to acquire novel cognitive tasks with evolution, plasticity and meta-meta-learning](https://arxiv.org/abs/2112.08588).
 4 | 
 5 | We evolve a recurrent  network, endowed with a reward-modulated Hebbian
 6 | plasticity rule, that can  automatically learn simple cognitive tasks from
 7 | stimuli and rewards alone. The network is tested on a new task, never seen
 8 | during evolution (delayed match-to-sample).
 9 | 
10 | ## How to use
11 | 
12 | 1- Run  `code.py`. A full  run  of 1000 generations will take about half a day on a machine with a standard GPU, but you can stop it before that.
13 | 
14 | 2- This will  generate several log files. The most important  is
15 | `blosses_onerun.txt`, which records the main evaluation metric (mean success
16 | rate over the last 100 trials of a  block) for the currrent candidate (i.e.
17 | batch element 0, the unmutated genome). Every 10th value in this file is obtained on the withheld test task; others are on various training-set tasks. It will also generate other files, including `w.txt` and `alpha.txt` (the evolved weights and plastiity coefficients).
18 | 
19 | 3- Run `curves.py', which  will automatically generate curves for training and testing loss, as in Figure 2 of the paper. These curves will only include the one run you just ran, so there will only be one line for each curve with no error interval.
20 | 
21 | 4- Repeat the same process as many times as you like, each time saving `blosses_onerun.txt` under a different name. Then uncommment and modify line 11 in `curves.py` to include the names of all these files as a list.  Run `curves.py` to generate the same plot as Figure 2 in the paper, with inter-quartile ranges.
22 | 
23 | The `decoding` directory includes code and instructions to generate the decoding results in Figure 4 of the paper.
24 | 


--------------------------------------------------------------------------------
/code.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torch.backends.cudnn as cudnn
  6 | import pdb
  7 | 
  8 | import scipy
  9 | from scipy import ndimage
 10 | from scipy import linalg
 11 | 
 12 | import torchvision
 13 | import torchvision.transforms as transforms
 14 | 
 15 | import os
 16 | import argparse
 17 | import time
 18 | 
 19 | import numpy as np
 20 | from numpy import fft 
 21 | 
 22 | from scipy import io as spio
 23 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 24 | 
 25 | torch.set_printoptions(precision=5) 
 26 | np.set_printoptions(precision=5) 
 27 | 
 28 | 
 29 | 
 30 | # Specify the test task (and its logical negation, which is also withheld from the training set)
 31 | # TESTTASK = 'nand'; TESTTASKNEG = 'and'
 32 | TESTTASK = 'dms'; TESTTASKNEG = 'dnms'
 33 | 
 34 | 
 35 | 
 36 | LR =   1e-2         # Adam (evolutionary) LR. 
 37 | WDECAY =  3e-4 # Evolutionary weight decay parameter (for the Adam optimizer)
 38 | MUTATIONSIZE =  3 * .01 #  Std dev of the Gaussian mutations of the evolutionary algorithm
 39 | 
 40 | # ALPHAACTPEN =  3 * 3e-3
 41 | ALPHAACTPEN =  3 * 3 *  10 * 3e-3   # When squaring
 42 | 
 43 | NBGEN =  1000 # 1700 # 500      # Number of generations per run
 44 | NUMGENCUTLR = 100000            # The generation at which we cut the learning rate. If >NBGEN, we don't.  
 45 | 
 46 | N = 70  # Number of neurons in the RNN.
 47 | 
 48 | 
 49 | 
 50 | BS =  500 #  500 # 1000         # Batch size, i.e. population size for the evolutionary algorithm. 
 51 | assert BS % 2 == 0      # Should be even because of antithetic sampling.
 52 | 
 53 | # Same parameters as GR Yang:
 54 | TAU =  100  # Neuron membrane constant, in ms
 55 | DT = 20     # Duration of a timestep, in ms
 56 | 
 57 | 
 58 | # All the following times are in *timesteps*, not ms
 59 | T =  50      # Number of *timesteps* per trial
 60 | STIMTIME = 20       # Duration of stimulus input, total, *in timesteps* (not ms)
 61 | REWARDTIME = 10     # Duration of reward signal period
 62 | RESPONSETIME = 10   # Duration of responze period  
 63 | STARTRESPONSETIME = 25  # Timestep  at which response period starts
 64 | ENDRESPONSETIME = STARTRESPONSETIME + RESPONSETIME
 65 | STARTREWARDTIME = 36    # Timsestep at which reward is deliverd and reward signal starts
 66 | ENDREWARDTIME = STARTREWARDTIME + REWARDTIME
 67 | assert ENDREWARDTIME < T
 68 | 
 69 | 
 70 | MODULTYPE = 'EXTERNAL' # 'INTERNAL' # EXTERNAL is node-perrturbation. INTERNAL is network-controlled modulation (experimental and untested in this code)
 71 | 
 72 | 
 73 | 
 74 | JINIT = 1.5 #   Scale constant of initial network weights. See Section 2.7 in the MML paper.
 75 | TAU_ET = 1000.0    # Time constant of the eligibility trace (in ms)
 76 | PROBAMODUL =  .1 #       Probability of receiving a random perturbation, for each neuron, at each timestep.
 77 | ALPHAMODUL =   .5 #      Scale of the random perturbations
 78 | ETA =   .1 *   .1  * .03  if MODULTYPE == 'INTERNAL' else .03 #             Learning rate for lifetime plasticity
 79 | MULOSSTRACE = .9    #   Time constant for the trace of previous losses that serves as a baseline for external neuromodulation
 80 | MAXDW =  1e-2 #          Maximum delta-weight permissible (per time step) for lifetime plasticity 
 81 | INITALPHA = .5 # 0.0 #  .5 #        Initial alpha (plasticity parameter) value
 82 | 
 83 | 
 84 | 
 85 | # The name of all the tasks. 14 tasks in total, because "always respond 0" and "always respond 1" are not included.
 86 | alltasks = ['and', 'nand' , '01', 'anti01' , '10', 'anti10', 'watchstim1', 'watchstim2' ,'dms',  'antiwatchstim2', 'antiwatchstim1', 'or', 'nor', 'dnms']
 87 | 
 88 | 
 89 | 
 90 | NBSTIMNEURONS = 2   # 2 Stimulus neurons. Stimuli are binary, so both neurons receive opposite-valued inputs (or 0)
 91 | NBREWARDNEURONS = 2 # 6 # 2 # reward signal for this trial. A  value is represented with 2 inputs, as it is for stimulus neurons.
 92 | NBBIASNEURONS = 1   # Bias neurons. Activations clamped to BIASVALUE.
 93 | NBINPUTNEURONS = NBSTIMNEURONS + NBREWARDNEURONS  + NBBIASNEURONS    # The first NBINPUTS neurons in the network are neurons (includes the bias, noise and reward inputs)
 94 | NBRESPNEURONS = 2  # Response neurons for 0 and 1.
 95 | NBMODNEURONS = 2    # Neuromodulatory output neurons
 96 | NBOUTPUTNEURONS = NBRESPNEURONS  + NBMODNEURONS   # The last NBOUTPUTNEURONS neurons in the network are output neurons. Response neurons + Modulatory neuron.
 97 | NBRESPSIGNALNEURONS = NBRESPNEURONS     # Neurons that receive the response-given signal ("what response did I just give?")
 98 | STIMNEURONS = np.arange(NBSTIMNEURONS)
 99 | INPUTNEURONS = np.arange(NBINPUTNEURONS)
100 | OUTPUTNEURONS = np.arange(N-NBOUTPUTNEURONS, N)
101 | MODNEURONS = np.arange(N-NBOUTPUTNEURONS, N-NBOUTPUTNEURONS + NBMODNEURONS)
102 | # NUMMODNEURON = N - NBOUTPUTNEURONS      # The modulatory neuron is the first output neuron
103 | RESPNEURONS = np.arange(N-NBOUTPUTNEURONS+NBMODNEURONS, N) # Then come the response neurons
104 | REWARDNEURONS = np.arange(NBSTIMNEURONS, NBSTIMNEURONS+NBREWARDNEURONS) # The neurons receiving (and broadcasting) the "reward for this trial" signal are the ones just after the stimulus inputs.
105 | BIASNEURONS = np.arange(NBSTIMNEURONS+NBREWARDNEURONS, NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS)
106 | FIRSTRESPSIGNALNEURON = NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS   # The first neuron that receives the response-given signal. We'll need this later
107 | assert FIRSTRESPSIGNALNEURON == NBINPUTNEURONS
108 | assert len(RESPNEURONS) == NBRESPNEURONS
109 | RESPSIGNALNEURONS = np.arange(FIRSTRESPSIGNALNEURON, FIRSTRESPSIGNALNEURON +NBRESPSIGNALNEURONS)
110 | 
111 | 
112 | BIASVALUE = 1.0
113 | 
114 | 
115 | 
116 | NBTASKSPERGEN = 1 # 2 #  2 task blocks per generation
117 | 
118 | 
119 | NBTRIALSLOSS = 100              # Evolutionary loss is evaluated over the last NBTRIALSLOSS trials of each block
120 | NBTRIALS =  300 + NBTRIALSLOSS  # Total number of trials per block
121 | 
122 | 
123 | 
124 | REWARDSIZE = 3.0 #  3 * 3.0 # Size of the binary-reward signal (correct/incorrect)
125 | STIMSIZE = 3.0 # Size of the stimulus input
126 | RESPSIGNALSIZE = 3.0 # Size of the response-given signal
127 | 
128 | 
129 | totalnbtasks = 0
130 | ticstart = time.time()
131 | 
132 | 
133 | # EVALW is to assess the behavior of an evolved network. Run it on a single batch of all tasks, without any mutation
134 | EVALW = False
135 | if EVALW:
136 |     NBGEN = 1
137 |     NBTASKSPERGEN = 1
138 |     BS = 500
139 |     MUTATIONSIZE = 0
140 |     allresps=[]
141 |     allstims=[]
142 |     alltgts=[]
143 | 
144 | 
145 | 
146 | 
147 | with torch.no_grad():  # We don't need PyTorch to keep track of gradients, since we're computing the gradient outselves (through evolution).
148 |  
149 |     PRINTING = True # if numgen == 0 or np.random.rand() < .05 else False
150 | 
151 |     # Initialize innate weights values
152 |     w =  torch.randn(N,N)  * JINIT / np.sqrt(N) 
153 |     w = w.to(device)
154 |     
155 |     # Initialize alpha values - the plasticity coefficients (capital-pi in the paper)
156 |     alpha = INITALPHA * torch.ones_like(w).to(device)
157 | 
158 |     # We zero out input weights to input neurons, though it probably doesn't have any effect.
159 |     w.data[:NBINPUTNEURONS, :] = 0   # Each *row* of w contains the weights to a single neuron.
160 |     # We also zero out the weights to neuromodulatory neurons, which probably does have an effect!
161 |     w.data[MODNEURONS, :] = 0   # Each *row* of w contains the weights to a single neuron.
162 |     winit = w.clone()
163 | 
164 |     # We will be using the Adam optimizer to apply our (hand-computed) evolutionary gradients
165 |     optimizer = optim.Adam([w, alpha], lr=LR, weight_decay=WDECAY)  # Default betas=(0.9, 0.999)
166 | 
167 |     # Evolosses are real-valued losses used for evolution. Binarylosses are binary 'correct/wrong' signals, also used for logging.
168 |     evolosses = []
169 |     responses0 = []
170 |     binarylosses = []
171 |     wgradnorms = []
172 |     mytaskprev = mytaskprevprev = mytaskprevprevprev = -1
173 | 
174 | 
175 |     if  not EVALW:
176 |         # We save the initial weights and plasticity coefficients
177 |         ww = w.cpu().numpy()
178 |         aa = alpha.cpu().numpy()
179 |         np.savetxt('winit.txt', ww)
180 |         np.savetxt('alphainit.txt', aa)
181 | 
182 |     if EVALW :
183 |         # If in Evaluate-Weights mode, we load the weights and plasticity coefficients 
184 |         w = np.loadtxt('w.txt')
185 |         w = torch.from_numpy(w).float().to(device)
186 |         winit = w.clone()
187 | 
188 |         alpha = np.loadtxt('alpha.txt')
189 |         alpha = torch.from_numpy(alpha).float().to(device)
190 | 
191 | 
192 |     print("MODULTYPE is:",  MODULTYPE)
193 |     assert MODULTYPE == 'EXTERNAL' or MODULTYPE == 'INTERNAL', "Modulation type must be 'INTERNAL' or 'EXTERNAL'"
194 | 
195 | 
196 | 
197 | 
198 | 
199 |     # Ready to start the evolutionary loop, iterating over generations (i.e. lifetimes). 
200 | 
201 |     for numgen in range(NBGEN):
202 | 
203 | 
204 | 
205 |         if numgen == NUMGENCUTLR:
206 |             # Optionally, cut the learning rate after a given number of generations. Note that this point will not be reached in the default version because NUMGENCUTLR > NBGEN.
207 |             for param_group in optimizer.param_groups:
208 |                 param_group['lr']  /=  5.0
209 | 
210 | 
211 | 
212 |         # Every 10th generation is for testing on the withheld task (with no weight change)
213 |         TESTING = False
214 |         if numgen == 0 or numgen == NBGEN-1 or numgen % 10 == 0:
215 |             TESTING = True
216 |             if PRINTING:
217 |                 print("TESTING")
218 |         if EVALW:
219 |             TESTING = False
220 | 
221 | 
222 |         tic = time.time()   
223 |         responses0thisgen = []
224 | 
225 |         
226 |         
227 |         alpha.clip_(min=0)
228 |     
229 | 
230 | 
231 |         # Generating the population of mutated individuals:
232 | 
233 |         # First, batch the weights.
234 |         bw = torch.dstack(BS*[w]).movedim(2,0).to(device)     # batched weights
235 |         balpha = torch.dstack(BS*[alpha]).movedim(2,0).to(device)     # batched alphas
236 |         # Generate the mutations, for both w and alpha
237 |         # NOTE: batch element 0 (and BS/2, its antithetic pair) are NOT mutated, represent the curent unmutated candidate genotype.
238 |         mutations_wandalpha = []
239 |         for  n, x in enumerate( (bw, balpha) ):
240 |             mutations = torch.randn_like(x, requires_grad=False).to(device) *  MUTATIONSIZE
241 |             mutations[0,:,:] = 0  # 1st item in batch = current candidate
242 |             mutations[BS//2:, :, :] = -mutations[:BS//2, :, :]    # Antithetic sampling for mutations ! Really helps.
243 |             if TESTING or EVALW:
244 |                 mutations *= 0.0  # No mutation - results in batch score variance being caused only by randomness in trial order and (possibly) lifetime perturbations
245 |             x += mutations  
246 |             mutations_wandalpha.append(mutations)
247 | 
248 | 
249 |         
250 |         bw.data[:, :NBINPUTNEURONS, :] = 0  # Input neurons receive 0 connections. Probably not necessary.
251 |         bworig = bw.clone()                 # Storing the weights for comparison purposes at the gradient step (below).
252 | 
253 |         lifelosses = torch.zeros(BS, requires_grad=False).to(device)
254 |         lifemselosses = torch.zeros(BS, requires_grad=False).to(device)
255 |         lifeactpens = torch.zeros(BS, requires_grad=False).to(device)
256 |         lifeblosses = torch.zeros(BS, requires_grad=False).to(device)
257 | 
258 |         
259 |         
260 | 
261 |         # Lifetime loop, iterates over task-blocks:
262 |         # In the present version NBTASKSPERGEN is always 1, so this loop is redundant. 
263 |         for numtask in range(NBTASKSPERGEN):
264 |             totalnbtasks += 1
265 | 
266 |             COLLECTMODOUTSANDREWINS = not EVALW and ( (numtask + numgen * 2) % 7 == 0  )
267 | 
268 |             # bpw = batched plastic weights
269 |             bpw = torch.zeros_like(bw).to(device)  # For now, plastic weights are initialized to 0 at the beginning of each task.
270 | 
271 |             # Initialize neural states
272 |             bstates = .1 * torch.ones(BS, N).to(device)  # bstates (batched states) contains the neural activations (before nonlinearity). Dimensionality appropriate for batched matrix multiplication. 
273 |             bstates[:, INPUTNEURONS] = 0
274 |             bresps = 1.0 * bstates  # bresps is the actual neural responses, after nonlinearity, and also serves as the input for the next step.
275 |             bresps[:, BIASNEURONS] = BIASVALUE
276 | 
277 |             meanlosstrace = torch.zeros(BS, 2 * 2).to(device)
278 |             bls = []    # Will store binary losses of all batch elements, for each trial of this task
279 |             bl0s = []   # Same but only for batch element 0 (i.e. the unmutated candidate genome)
280 |             ml0s = []   # MSE loss (the one used for evolution) for element 0 (unmutated candidate), of all trials for this task
281 | 
282 | 
283 | 
284 |             # Choose the task ! If not testing, makes sure it's different from recently chosen tasks.
285 | 
286 | 
287 |             # if TESTING: 
288 |             #     mytask = TESTTASK 
289 |             #     mytasknum = alltasks.index(mytask)
290 |             # else:
291 |             #     while True:
292 |             #         mytasknum = np.random.randint(len(alltasks))
293 | 
294 |             #         mytask = alltasks[mytasknum]
295 | 
296 |             #         if ( (mytask!= TESTTASK)  
297 |             #             and (mytask != TESTTASKNEG)  # We withhold both the test task and its logical negation
298 |             #             and     (mytask != mytaskprev) 
299 |             #                 and (mytask != mytaskprevprev) 
300 |             #         ):
301 | 
302 |             #             break
303 | 
304 |             #     mytaskprevprev = mytaskprev; mytaskprev= mytask
305 | 
306 | 
307 |             # We pick the tasks for this generation, for the whole population (i.e. the whole batch)
308 | 
309 |             # # Only use AND and NAND as tasks
310 |             # mytasknum = numtask % 4
311 |             # mytask = alltasks[mytasknum]
312 |             # mytaskprevprev = mytaskprev; mytaskprev= mytask
313 | 
314 | 
315 |             btasks = []  # Tasks for the whole batch
316 |             for ii in range(BS//2):
317 |                 if TESTING: 
318 |                     # On 'testing' generations, we only show the withheld test task to everyone  (this will not result in any parameter change and is only used for traking evolutionary progress)
319 |                     cand_task = TESTTASK
320 |                     cand_tasknum = alltasks.index(TESTTASK)
321 |                 else:
322 |                     while True:
323 |                         cand_tasknum = np.random.randint(len(alltasks))
324 |                         cand_task = alltasks[cand_tasknum]
325 |                         if ( (cand_task!= TESTTASK)  
326 |                             and (cand_task != TESTTASKNEG)  # We withhold both the test task and its logical negation
327 | 
328 | 
329 |                             and (cand_tasknum % 2 == (numgen // 2) % 2)  # Training on alternate halves of the training set at successive (pairs of) generations (not sure if this helps)
330 | 
331 | 
332 |                             ): 
333 |                             break
334 |                 btasks.append(cand_task)
335 | 
336 |             btasks = btasks * 2     # Duplicating the list, so each antithetic pair (batch elements K and K + BS/2) has the same tasks. 
337 | 
338 | 
339 | 
340 |             if EVALW:
341 |                 btasks = [TESTTASK] * BS
342 | 
343 | 
344 | 
345 | 
346 | 
347 |             assert(len(btasks) == BS)
348 | 
349 | 
350 |             # Cumulative MSE and binary losses for this task, over the last NBLOSSTRIALS of the block:
351 |             taskmselosses = torch.zeros_like(lifemselosses).to(device)
352 |             taskblosses = torch.zeros_like(lifemselosses).to(device)
353 | 
354 |             respz = []      # Response neuron outputs
355 |             stimz = []      # Stimulus neurons  outputs
356 |             modouts = []    # Neuromodulatory output - not used here, because we use node-perturbation  (i.e. modulation is EXTERNAL)
357 |             rewins = []     # Received rewards (reward neuron outputs)
358 | 
359 | 
360 |             if PRINTING:
361 |                 print("task[0]:", btasks[0], "task[1]:", btasks[1])
362 |             
363 | 
364 | 
365 |             # OK, ready to start the task.
366 | 
367 | 
368 |             eligtraces =   torch.zeros_like(bw, requires_grad=False).to(device)  # Initialize the eligibility traces at the start of each block/task.
369 | 
370 | 
371 |             # Task loop, iterating over trials
372 |             # You do NOT erase memory (neural activations or plastic weights) between successive trials ! 
373 |             for numtrial in range(NBTRIALS):
374 | 
375 |                 # First, some preparation for the trial to come.
376 |                 
377 |                 # Initializations
378 |                 mselossesthistrial = torch.zeros(BS, requires_grad=False).to(device)     # MSE losses for this trial
379 |                 totalresps = torch.zeros(BS, NBRESPNEURONS, requires_grad=False).to(device)     # Will accumulate the total outputs of each network over the trial, so we can compute the network's response for this trial.  
380 | 
381 | 
382 |                 # Before we start the trial, we need to generate the inputs and targets for this trial, for the whole population (i.e. the whole batch):
383 | 
384 |                 # Pick stimulus 1 and stimulus 2 for this trial (and for each batch member):
385 |                 stims1 = (torch.rand(BS, 1) > .5).float()
386 |                 stims2 = (torch.rand(BS, 1) > .5).float()
387 | 
388 | 
389 |                 # Antithetic pairs share the exact same stimuli
390 |                 stims1[BS//2:, :] = stims1[:BS//2, :]
391 |                 stims2[BS//2:, :] = stims2[:BS//2, :]
392 | 
393 | 
394 | 
395 |                 # Actual temporal inputs:
396 |                 inpts = np.zeros((BS, NBSTIMNEURONS, STIMTIME)) 
397 |                 StimDur = STIMTIME 
398 |                 StartStim = 0
399 |                 # The two stimuli are presented in succession, with both input neurons locked in opposite values to each other:
400 |                 inpts[:, 0, StartStim:StartStim+StimDur//2 - 2] = 2.0 * stims1 - 1.0
401 |                 inpts[:, 0, StartStim+StimDur//2:StartStim+StimDur - 2] = 2.0 * stims2 - 1.0
402 |                 inpts[:, 1, StartStim:StartStim+StimDur] = -inpts[:, 0, StartStim:StartStim+StimDur] 
403 | 
404 |                 inputs = torch.from_numpy(inpts).float().to(device)
405 | 
406 | 
407 |             
408 |                 # Now we compute the targets for this trial, that is, the expected values of the output neurons, depending on inputs and tasks
409 |                 tgts = -100 * np.ones((BS, NBRESPNEURONS, RESPONSETIME)) 
410 |           
411 |                 for ii in range(BS):
412 |                     # First we generate the expected output for the non-null response neuron, based on inputs and task:
413 |                     if btasks[ii] == 'watchstim1':
414 |                         tgts[ii, 1, :] = stims1[ii, 0]
415 |                     elif btasks[ii] == 'watchstim2':
416 |                         tgts[ii, 1, :] = stims2[ii, 0]
417 |                     elif btasks[ii] == 'antiwatchstim1':
418 |                         tgts[ii, 1, :] = 1.0 - stims1[ii, 0]
419 |                     elif btasks[ii] == 'antiwatchstim2':
420 |                         tgts[ii, 1, :] = 1.0 - stims2[ii, 0]
421 |                     elif btasks[ii] == 'and':
422 |                         tgts[ii, 1, :] = (stims1[ii, 0] * stims2[ii, 0])
423 |                     elif btasks[ii] == 'nand':
424 |                         tgts[ii, 1, :] = 1.0 - (stims1[ii, 0] * stims2[ii, 0])
425 |                     # These two lines add  25% running time to the entire program! looks like np.clip is *slow*.
426 |                     # elif btasks[ii] == 'or':
427 |                     #     tgts[ii, 1, :] = np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0)
428 |                     # elif btasks[ii] == 'nor':
429 |                     #     tgts[ii, 1, :] = 1.0 - np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0)
430 |                     # Instead, we will clip after the full array is done. This should still work out the same.
431 |                     elif btasks[ii] == 'or':
432 |                         tgts[ii, 1, :] = stims1[ii, 0] + stims2[ii, 0]
433 |                     elif btasks[ii] == 'nor':
434 |                         tgts[ii, 1, :] = 1.0 - stims1[ii, 0] + stims2[ii, 0]
435 |                     elif btasks[ii] == '10':
436 |                         tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0])
437 |                     elif btasks[ii] == 'anti10':
438 |                         tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0])
439 |                     elif btasks[ii] == '01':
440 |                         tgts[ii, 1, :] = (1.0 - stims1[ii, 0]) * stims2[ii, 0]
441 |                     elif btasks[ii] == 'anti01':
442 |                         tgts[ii, 1, :] = 1.0 - (1.0 - stims1[ii, 0]) * stims2[ii, 0]
443 |                     elif btasks[ii] == 'dms':
444 |                         tgts[ii, 1, :] = (stims1[ii, 0]  == stims2[ii, 0])
445 |                     elif btasks[ii] == 'dnms':
446 |                         tgts[ii, 1, :] = (stims1[ii, 0]  != stims2[ii, 0])
447 |                     else:
448 |                         tgts[ii, 1, :] = (stims1[ii, 0]  == stims2[ii, 0])
449 |                     
450 |                 tgts[:, 1, :] = np.clip(tgts[:, 1, :], 0.0, 1.0)
451 | 
452 | 
453 | 
454 |                 # The target responses of the two output neurons are mirror images of each other (network response is binary)
455 |                 tgts[:, 0, :] = 1.0 - tgts[:, 1, :]
456 | 
457 |                 assert np.all(np.logical_or(tgts == 0.0 , tgts == 1.0))
458 | 
459 |                 if EVALW:
460 |                     alltgts.append(tgts[:,1, 0])
461 |                     allstims.append(np.hstack((stims1, stims2)))
462 | 
463 | 
464 | 
465 | 
466 |                 targets = torch.from_numpy(tgts).float().to(device)     
467 | 
468 |                 # In practice, we clip targets to 0.1/0.9 instead of actually 0.0/1.0. This may or may not help.
469 |                 targets.clip_(min=0.1, max=0.9)
470 | 
471 | 
472 | 
473 | 
474 |                 # Now the inputs and targets are prepared, we are ready to actually start the trial!
475 | 
476 | 
477 |                 # Run the network. Trial loop, iterating over timesteps
478 |                 for numstep in range(T):          
479 | 
480 |                     # Update neural activations, using previous-step bresps (actual neural outputs) as input.
481 |                     # 'bstates' are the neural activations before nonlinearity
482 |                     # 'bresps' are the actual firing rates, i.e. bstates after nonliniearity (or clamped values for input neurons)
483 |                     # bresps is the lateral input to bstates, which is  then used to compute brates for the next step
484 | 
485 |                     # This implements the equation dx = dt/tau * (-x(t) + (W + PI .* P(t)) @  y(t) ) - standard continuous-time RNN, with plastic weights. 'alpha' is PI in the ICML paper.
486 |                     bstates += (DT / TAU) * (-bstates +  torch.bmm((bw + balpha * bpw), bresps[:, :, None])[:,:,0] )  
487 | 
488 | 
489 |                     # Applying the random perturbations on neural activations, both for noise and for the lifetime plasticity algorithm (node-perturbation)
490 |                     # And also updating the eligibility trace appropriately
491 |                     # This is a very non-optimal implementation!
492 |                     if numstep > 1 : 
493 |                         perturbindices =  (torch.rand(1, N) < PROBAMODUL).int()   # Which neurons get perturbed?
494 | 
495 |                         # perturbindices[0, MODNEURONS] = 0 # We disable perturbations on neuromodulatory neurons for debugging...
496 | 
497 | 
498 |                         perturbations = (ALPHAMODUL * perturbindices * (2 * torch.rand(1, N) - 1.0)).to(device)  # Note the dimensions: the same noise vector is applied to all elements in the batch (to save time!)
499 |                         
500 | 
501 | 
502 | 
503 |                         if numtrial > NBTRIALS - 20:
504 |                             perturbations.fill_(0)  # Again, not sure if that helps
505 | 
506 | 
507 | 
508 |                         bstates += perturbations
509 |                         
510 |                         # Node-perturbation: Hebbian eligibility trace = product between inputs (bresps from previous time step) and *perturbations* in outputs. dH = X * deltaY 
511 |                         # We do this with a (batched) outer product between the (column) vector of perturbations (1 per neuron) and the (row) vector of inputs
512 |                         # Note that here, since we have an RNN, the input is bresps - the network's responses from the previous time step
513 |                         if torch.sum(perturbindices) > 0:
514 |                             eligtraces += torch.bmm( perturbations.expand(BS, -1)[:, :, None],  bresps[:, None, :] ) 
515 | 
516 |                     # Eligibility traces, unlike actual plastic weights, are decaying
517 |                     eligtraces -=  (DT / TAU_ET) * eligtraces
518 | 
519 | 
520 |                     # We can now compute the actual neural responses (firing rates) for this time step, applying the appropriate nonlinearity to each neuron
521 |                     bresps = bstates.clone() # F.leaky_relu(bstates)
522 |                     # The following assumes that response neurons are the last neurons of the network !                        
523 |                     bresps[:,N-NBRESPNEURONS:].sigmoid_()     # The response neurons (NOT output neurons - modulatory neuron not included!) are sigmoids, all others are tanh. An arbitrary design choice.
524 |                     bresps[:,:N-NBRESPNEURONS].tanh_()
525 |                     
526 | 
527 |                     # Are we in the input presentation period? Then apply the inputs.
528 |                     # Inputs are clamping, fixing the response of the input neurons.
529 |                     if numstep < STIMTIME:
530 |                         # bresps[:, STIMNEURONS] = STIMSIZE * inputs[:, :, numstep]        
531 |                         bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = STIMSIZE * inputs[:, :, numstep]        
532 |                     else:
533 |                         bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = 0
534 |                         # bresps[:, STIMNEURONS] = 0
535 | 
536 |                     # Bias input is always-on, always clamping.
537 |                     # bresps[:, BIASNEURONS] = BIASVALUE
538 |                     bresps[:, BIASNEURONS[0]] = BIASVALUE
539 | 
540 |                     # All the responses have now been computed  for this step
541 | 
542 |                     # Are we in the response period? Then collect network response.
543 |                     if numstep >= STARTRESPONSETIME and numstep < ENDRESPONSETIME:
544 | 
545 |                         assert numstep < STARTREWARDTIME
546 |                         # Accumulate the total activation of each output neuron, so that we can compute the network's actual response at the end of response period:
547 |                         # totalresps +=  bresps[:, RESPNEURONS] 
548 |                         totalresps +=  bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] 
549 |                         # Accumulate the MSE error between actual and expected outputs:
550 |                         # mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME
551 |                         mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME
552 | 
553 |                     else:
554 |                         bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] = 0.0
555 |                         # bresps[:, RESPNEURONS] = 0.0
556 | 
557 | 
558 |                     # Is the response period for this trial finished, or equivalently, are we at the first step of the reward / feedback period?
559 |                     # If so, compute the network's response (i.e. which neuron fired most)
560 |                     # Also, if using external neuromodulation, with compute the neuromodulation (based on baselined rewards for this trial) and apply plasticity
561 |                     if numstep == STARTREWARDTIME:
562 |                         # The network's response for this trial (0 or 1) is the index of the output neuron that had the highest cumulative output over the response period
563 |                         responses = torch.argmax(totalresps, dim=1)  # responses is a 1D, integer-valued array of size BS. totalresps is a 2D real-vlued array of size BS, NBRESPS+1                           
564 |                         
565 |                         # blosses (binary losses) is a 1/-1 "correct/wrong" signal for each batch element for this trial.
566 |                         blosses = 2.0 * (responses == torch.argmax(targets[:, :, 0], dim=1)).float() - 1.0    
567 |                         responses0thisgen.append(float(responses[0]))
568 | 
569 |                         # We also want the 1-hot version of the response for each neuron. This will be used as the response signal below.
570 |                         if numtrial > 0:
571 |                             responses1hot_prev = responses1hot.clone()
572 |                         responses1hot = F.one_hot(responses, 2)
573 | 
574 |                         # Now we apply lifetime plasticity, with node-perturbation, based on eligibility trace and suitably baselined reward/loss
575 | 
576 | 
577 |                         # Baseline computation - only used for node-perturbation
578 |                         # We compute separate baseline (running average) losses for different types of trials, as defined by their inputs (as in Miconi, eLife 2017). 
579 |                         # So we need to find out the trial type for each element in batch.
580 |                         # input1 = inputs[:, 0, 0]; input2 = inputs[:, 1, 0]  # Uh, what was that?
581 |                         input1 = stims1[:,  0]; input2 = stims2[:, 0]  
582 |                         trialtypes = (input1>0).long() * 2 + (input2>0).long()
583 | 
584 |                         if MODULTYPE == 'EXTERNAL' and numtrial > 30: #  + (300 if EVALW else 0):                        
585 |                             dw =  - (ETA * eligtraces  * (  meanlosstrace[np.arange(BS), trialtypes] * (mselossesthistrial - meanlosstrace[np.arange(BS), trialtypes]) )[:, None, None]).clamp(-MAXDW, MAXDW)
586 |                             bpw += dw
587 | 
588 | 
589 | 
590 |                         # Updating the baseline - running average of losses, for each batch element, for the trial type just seen
591 |                         meanlosstrace[torch.arange(BS).long(), trialtypes] *= MULOSSTRACE
592 |                         meanlosstrace[torch.arange(BS).long(), trialtypes] +=  (1.0 - MULOSSTRACE) * mselossesthistrial
593 | 
594 | 
595 | 
596 | 
597 |                     # Plasticity computation for internal (network-controlled) neuromodulation (not used in node-perturbation experiments -highly experimental, do not trust).
598 |                     # Note that it is applied at every time step, unlike external neuromodulation experiments which only apply plasticity once per  trial, at the beginning of the reward period (see above).
599 |                     if numtrial > 10 and MODULTYPE == 'INTERNAL':  # Lifetime plasticity is only applied after a few burn-in trials.
600 |                         # eligtraces: BS x N x N (1 per connection & batch element)  mselossesthhistrial:  BS.    meanlosstrace: BS x (N.N).    trialtypes: BS    bresps/bstates:  BS x N 
601 |                         # dw should have shape BS x N x N, i.e. one for each connection and batch element. Do not sum over batch dimension! The batch is purely evolutionary !
602 | 
603 |                         # Compute and apply the plasticity, based on accumulated eligibility traces and output of a certain neuron
604 |                         if  numstep > 0:
605 |                             modulsprev = moduls.clone()
606 |                         moduls = bresps[:, MODNEURONS[0]] - bresps[:, MODNEURONS[1]]
607 |                         # lifeactpens += torch.abs(moduls)
608 |                         if numstep > 0 :
609 |                             lifeactpens += (modulsprev - moduls) ** 2
610 | 
611 |                         dw =   (ETA * eligtraces * moduls[:, None, None] ).clamp(-MAXDW, MAXDW)
612 | 
613 | 
614 |                         bpw += dw
615 | 
616 | 
617 | 
618 |                     # Are we in the reward signal period?
619 |                     # This is just to inform the network of its own performance. The actual lifetime plasticity  and neuromodulation is computed above. 
620 |                     if numstep >= STARTREWARDTIME and numstep < ENDREWARDTIME: # Note that by this time, the loss has been computed and is fixed
621 |                         
622 |                         # We duplicate the reward signal across many neurons to (maybe) increase its potnetial impact and exploitability (?...)
623 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = REWARDSIZE * mselossesthistrial[:, None]         # Reward input is also clamping
624 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1].clip_(min=0)            # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve.
625 | 
626 | 
627 |                         # We provide the network with a signal indicating the actual response it chose for this trial. Not sure if needed.  
628 |                         # bresps[:, RESPSIGNALNEURONS] = responses1hot.float() * RESPSIGNALSIZE
629 |                         bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = responses1hot.float() * RESPSIGNALSIZE
630 | 
631 | 
632 |                         
633 |                     else:
634 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = 0
635 |                         bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = 0
636 | 
637 | 
638 | 
639 |                     if COLLECTMODOUTSANDREWINS:
640 |                         stimz.append(bresps[0, STIMNEURONS[0]])
641 |                         respz.append(bresps[0, RESPNEURONS[1]] - bresps[0, RESPNEURONS[0]])
642 |                         if MODULTYPE == 'INTERNAL': #  Doesn't make sense  for external modulation
643 |                             modouts.append(moduls[0])   
644 |                         rewins.append(bresps[0, REWARDNEURONS[0]])
645 |                     
646 | 
647 |                     if EVALW: 
648 |                         allresps.append(bresps.cpu().numpy().astype('float32'))
649 | 
650 | 
651 | 
652 | 
653 |                 # Now all steps done for this trial:
654 |             
655 |                 if PRINTING:
656 |                     if np.random.rand() < .1: 
657 |                         print("|", int(responses[0]), int(blosses[0]), end=' ')
658 |                 
659 |                 ml0s.append(float(mselossesthistrial[0]))
660 |                 bl0s.append(float(blosses[0]))
661 |                 bls.append(blosses.cpu().numpy())
662 | 
663 | 
664 |                 # If this trial is part of the last NBTRIALSLOSS, we accumulate its trial loss into the agent's total loss for this task.
665 |                 if numtrial >= NBTRIALS - NBTRIALSLOSS:     # Lifetime  losses are only estimated over the last NBTRIALSLOSS trials
666 |                     # taskmselosses += 2 * mselossesthistrial / NBTRIALSLOSS   # the 2* doesn't mean anything
667 |                     taskmselosses +=  mselossesthistrial / NBTRIALSLOSS 
668 |                     taskblosses += blosses / NBTRIALSLOSS
669 |                     
670 | 
671 |             # Now all trials done for this task:
672 |             if PRINTING:
673 |                 # print("Med task mseloss:", "{:.4f}".format(float(torch.median(taskmselosses))))
674 |                 print("\nTASK BLOSS[0]:", "{:.4f}".format(float(taskblosses[0])), "Med task bloss:", "{:.4f}".format(float(torch.median(taskblosses))), 
675 |                 "Med-abs totaldw[0]:", "{:.4f}".format(float(torch.median(torch.abs(bpw[0,:,:])))),
676 |                 "Max-abs totaldw[0]:", "{:.4f}".format(float(torch.max(torch.abs(bpw[0,:,:]))))
677 |                 )
678 |             
679 |             
680 |             
681 | 
682 |             if COLLECTMODOUTSANDREWINS:
683 |                 print("Saving Resps, Stims,  RI, MO")
684 | 
685 |                 np.savetxt('stims.txt', np.array([float(x) for x in stimz]))
686 |                 np.savetxt('resps.txt', np.array([float(x) for x in respz]))
687 |                 np.savetxt('modouts.txt', np.array([float(x) for x in modouts]))
688 |                 np.savetxt('rewins.txt', np.array([float(x) for x in rewins]))
689 | 
690 |                 # print("")
691 |             lifemselosses += taskmselosses / NBTASKSPERGEN 
692 |             lifeblosses += taskblosses / NBTASKSPERGEN 
693 |         
694 |             if (TESTING or numgen == 0) and numtask == 0:
695 |                 # These files contain respectively the first and *latest* Testing block of the *current* run only. 
696 |                 FNAME = 'bl_1standLastBlock_gen0.txt' if numgen == 0 else 'bl_1standLastBlock_lastgen.txt'
697 |                 np.savetxt(FNAME, np.vstack(bls))
698 | 
699 | 
700 | 
701 |         # After all tasks done for this lifetime / generation:
702 | 
703 |         lifeactpens /= (NBTASKSPERGEN * NBTRIALS)
704 |         # lifeactpens -= torch.mean(lifeactpens); lifeactpens /= torch.std(lifeactpens)
705 |         # lifeactpens += torch.mean(lifemselosses); lifeactpens *= torch.std(lifemselosses)
706 |         
707 |         lifelosses = lifemselosses + ALPHAACTPEN * lifeactpens
708 | 
709 |         binarylosses.append(float(lifeblosses[0]))
710 |         evolosses.append(float(lifemselosses[0]))
711 | 
712 | 
713 |         if TESTING and not EVALW:
714 |             np.savetxt('blosses_onerun.txt', np.array(binarylosses)) # This is the main evaluation metric: The mean success rate over the last NBTESTTRIALS of each generation, for batch element 0 (the unmutated candidate genome)
715 |             np.savetxt('mselosses_onerun.txt', np.array(evolosses))
716 |             ww = w.cpu().numpy()
717 |             pw0 = bpw[0,:,:].cpu().numpy()
718 |             aa = alpha.cpu().numpy()
719 |             np.savetxt('w.txt', ww)
720 |             np.savetxt('pw0.txt', pw0)
721 |             np.savetxt('alpha.txt', aa)
722 | 
723 | 
724 |         if EVALW  and True:
725 |             # Note: we use .npy format, because multi-dimensional.
726 |             
727 |             np.save('allstims.npy',  np.stack(allstims, -1))
728 |             np.save('alltgts.npy',  np.stack(alltgts, -1))
729 | 
730 |             # print(len(allresps), len(allstims), len(alltgts))
731 |             assert len(allresps) == NBTRIALS * T
732 |             # print(allresps[0].shape, allstims[0].shape, alltgts[0].shape)
733 |             print("Rearranging saved responses into appropriate shape...")
734 |             z1 = np.dstack(allresps)
735 |             z2 = np.stack(np.split(z1, NBTRIALS, axis=2), axis=-1)
736 |             print("Final shape of the saved responses:", z2.shape)
737 |             assert(z2.shape == (BS, N, T, NBTRIALS))
738 |             np.save('allresps.npy', z2[:,:,:,[29,-1]]) # We only store response data for 29th (before plasticity starts) and last trial (to keep file size manageable) 
739 | 
740 | 
741 | 
742 |         # Now we're ready to perform evolution (by computing gradients by hand, and then applying the optimizer with these gradients)
743 |         optimizer.zero_grad()
744 | 
745 |         # Gradient is just loss x mutation (remember we use antithetic sampling)
746 |         # gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) # / BS
747 |         gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0)  / (BS * MUTATIONSIZE * MUTATIONSIZE)
748 | 
749 |         
750 |         # gradient = gradient / 100
751 | 
752 | 
753 |         wgradnorm = float(torch.norm(gradient))
754 |         wgradnorms.append(wgradnorm)
755 |         if PRINTING:
756 |             print("norm w:", "{:.4f}".format(float(torch.norm(w))), "norm gradient:", "{:.4f}".format(wgradnorm), 
757 |                   "med-abs w:", "{:.4f}".format(float(torch.median(torch.abs(w)))), 
758 |                   "max-abs w:", "{:.4f}".format(float(torch.max(torch.abs(w)))), 
759 |                     "norm a:", "{:.4f}".format(float(torch.norm(alpha))), "mean a:",  "{:.4f}".format(float(torch.mean(alpha))))
760 | 
761 | 
762 |         w.grad = gradient
763 |         wprev = w.clone()
764 | 
765 |         # gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) # / BS
766 |         gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0)  / (BS * MUTATIONSIZE * MUTATIONSIZE)
767 | 
768 | 
769 |         # gradientalpha = gradientalpha / 100
770 | 
771 | 
772 |         alpha.grad = gradientalpha
773 |         alphaprev = alpha.clone()
774 | 
775 |         if numgen > 0 and not TESTING and not EVALW:
776 |             optimizer.step()
777 | 
778 |         
779 |         wdiff = w - wprev
780 |         adiff = alpha - alphaprev
781 |         if PRINTING:
782 |             print("Norm w-wprev:", "{:.4f}".format(float(torch.norm(wdiff))), "Max abs w-wprev:", "{:.4f}".format(float(torch.max(torch.abs(wdiff)))), 
783 |                 "Norm a-aprev:", "{:.4f}".format(float(torch.norm(adiff))), "Max abs a-aprev:", "{:.4f}".format(float(torch.max(torch.abs(adiff))))  )
784 | 
785 |     
786 | 
787 |         if PRINTING:
788 |             print("Med/min/max/Half-Nth/0th loss in batch:", float(torch.median(lifelosses)), float(torch.min(lifelosses)), float(torch.max(lifelosses)),
789 |                                     float(lifelosses[BS//2]), float(lifelosses[0]))
790 |             print("Med/min/max/Half-Nth/0th life mse loss in batch:", float(torch.median(lifemselosses)), float(torch.min(lifemselosses)), float(torch.max(lifemselosses)),
791 |                                     float(lifemselosses[BS//2]), float(lifemselosses[0]))
792 |             print("Med/min/max/Half-Nth/0th activity penalty in batch:", float(torch.median(lifeactpens)), float(torch.min(lifeactpens)), float(torch.max(lifeactpens)),
793 |                                     float(lifeactpens[BS//2]), float(lifeactpens[0]))
794 |             print("Gen", numgen, "done in", time.time()-tic)
795 |     
796 | 
797 | 
798 | 
799 | 
800 | 
801 | print("Time taken:", time.time()-ticstart)
802 | 
803 | 
804 | 


--------------------------------------------------------------------------------
/curves.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | 
 6 | TESTTASK = 'DMS'
 7 | 
 8 | lt = np.loadtxt
 9 | 
10 | bls = [lt('blosses_onerun.txt')]
11 | #bls = [lt('bl1.txt'), lt('bl2.txt'), lt('bl3.txt'), lt('bl4.txt'), lt('bl5.txt'), lt('bl6.txt')]
12 | 
13 | 
14 | LEN = np.min([x.size for x in bls])
15 | bl = np.vstack( [x[:LEN] for x in bls] )
16 | print(LEN)
17 | 
18 | 
19 | bl = .5 + .5 * bl
20 | 
21 | #bl = .5 + .5 * np.loadtxt('blosses_onerun.txt')
22 | 
23 | if(len(bl.shape)<2):   # If there is only a single run, add a singleton dimension
24 |     bl = bl[None, :]
25 | print(bl.shape)
26 | ss = bl.shape[1]  # Number of generations
27 | 
28 | plt.figure(figsize=(4,4))
29 | 
30 | xr = np.arange(len(bl[0,:]))
31 | plt.fill_between(xr[xr%10 != 0], np.quantile(bl, .25, axis=0).T[xr % 10 != 0], np.quantile(bl, .75, axis=0).T[xr % 10 != 0], color='b', alpha=.3)
32 | plt.plot(xr[xr % 10 != 0], np.quantile(bl, .5, axis=0).T[xr % 10 != 0], 'b', label='Training tasks');
33 | plt.fill_between(xr[::10], np.quantile(bl, .25, axis=0).T[0::10], np.quantile(bl, .75, axis=0).T[0::10], color='r', alpha=.3)
34 | plt.plot(xr[::10], np.quantile(bl, .5, axis=0).T[0::10], 'r', label='Test task')
35 | 
36 | 
37 | plt.xlabel('Generations')
38 | plt.ylabel('% correct over last 100 trials')
39 | plt.legend(loc='lower right')
40 | 
41 | plt.title('Test task: '+str(TESTTASK).upper())
42 | 
43 | plt.tight_layout()
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/decoding/HOWTOGENERATEFIGURES.txt:
--------------------------------------------------------------------------------
 1 | - Upload w.txt and alpha.txt from a completed run.
 2 | 
 3 | - Run code.py with EVALW=True (line 144), twice: once with line 192 set to "EVALW and False" (to run it with initialized, random weights/alpha), and once with line 192 set to "EVALW and True" (to run it with the actual uploaded weights and alpha).
 4 | 
 5 | - On the first run (with randomly initialized weights), download allstims.npy, allresps.npy, alltgts.npy, and rename them with the same filenames but with ".0" suffix  (allstims.npy.0, allresps.npy.0, alltgts.npy.0).
 6 | 
 7 | - On the second run (with actual uploaded w and alpha), just keep the output files under their unmodified name.
 8 | 
 9 | - Just run decoding.py. It should generate all decoding figures on its own, as PNG files.
10 | 


--------------------------------------------------------------------------------
/decoding/code.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torch.backends.cudnn as cudnn
  6 | import pdb
  7 | 
  8 | import scipy
  9 | from scipy import ndimage
 10 | from scipy import linalg
 11 | 
 12 | import torchvision
 13 | import torchvision.transforms as transforms
 14 | 
 15 | import os
 16 | import argparse
 17 | import time
 18 | 
 19 | import numpy as np
 20 | from numpy import fft 
 21 | 
 22 | from scipy import io as spio
 23 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 24 | 
 25 | torch.set_printoptions(precision=5) 
 26 | np.set_printoptions(precision=5) 
 27 | 
 28 | 
 29 | 
 30 | # Specify the test task (and its logical negation, which is also withheld from the training set)
 31 | # TESTTASK = 'nand'; TESTTASKNEG = 'and'
 32 | TESTTASK = 'dms'; TESTTASKNEG = 'dnms'
 33 | 
 34 | 
 35 | 
 36 | LR =   1e-2         # Adam (evolutionary) LR. 
 37 | WDECAY =  3e-4 # Evolutionary weight decay parameter (for the Adam optimizer)
 38 | MUTATIONSIZE =  3 * .01 #  Std dev of the Gaussian mutations of the evolutionary algorithm
 39 | 
 40 | # ALPHAACTPEN =  3 * 3e-3
 41 | ALPHAACTPEN =  3 * 3 *  10 * 3e-3   # When squaring
 42 | 
 43 | NBGEN =  5000 # 1700 # 500      # Number of generations per run
 44 | NUMGENCUTLR = 100000
 45 | 
 46 | N = 70  # Number of neurons in the RNN.
 47 | 
 48 | 
 49 | 
 50 | BS =  500 #  500 # 1000         # Batch size, i.e. population size for the evolutionary algorithm. 
 51 | assert BS % 2 == 0      # Should be even because of antithetic sampling.
 52 | 
 53 | # Same parameters as GR Yang:
 54 | TAU =  100  # Neuron membrane constant, in ms
 55 | DT = 20     # Duration of a timestep, in ms
 56 | 
 57 | 
 58 | # All the following times are in *timesteps*, not ms
 59 | T =  50      # Number of *timesteps* per trial
 60 | STIMTIME = 20       # Duration of stimulus input, total, *in timesteps* (not ms)
 61 | REWARDTIME = 10     # Duration of reward signal period
 62 | RESPONSETIME = 10   # Duration of responze period  
 63 | STARTRESPONSETIME = 25  # Timestep  at which response period starts
 64 | ENDRESPONSETIME = STARTRESPONSETIME + RESPONSETIME
 65 | STARTREWARDTIME = 36    # Timsestep at which reward is deliverd and reward signal starts
 66 | ENDREWARDTIME = STARTREWARDTIME + REWARDTIME
 67 | assert ENDREWARDTIME < T
 68 | 
 69 | 
 70 | MODULTYPE = 'EXTERNAL' # 'INTERNAL'
 71 | 
 72 | # JINIT = 1.5 #   Scale constant of initial network weights. See Section 2.7 in the MML paper.
 73 | # TAU_ET = 1000.0    # Time constant of the eligibility trace (in ms)
 74 | # PROBAMODUL =  .03 # .1 #       Probability of receiving a random perturbation, for each neuron, at each timestep.
 75 | # ALPHAMODUL =  1.0 # .5 #      Scale of the random perturbations
 76 | # ETA =   .1 *   .1  * .03  if MODULTYPE == 'INTERNAL' else .03 #             Learning rate for lifetime plasticity
 77 | # MULOSSTRACE = .9    #   Time constant for the trace of previous losses that serves as a baseline for neuromodulation
 78 | # MAXDW =  1e-2 #          Maximum delta-weight permissible (per time step) for lifetime plasticity 
 79 | # INITALPHA = .5 # 0.0 #  .5 #        Initial alpha (plasticity parameter) value
 80 | 
 81 | 
 82 | JINIT = 1.5 #   Scale constant of initial network weights. See Section 2.7 in the MML paper.
 83 | TAU_ET = 1000.0    # Time constant of the eligibility trace (in ms)
 84 | PROBAMODUL =  .1 #       Probability of receiving a random perturbation, for each neuron, at each timestep.
 85 | ALPHAMODUL =   .5 #      Scale of the random perturbations
 86 | ETA =   .1 *   .1  * .03  if MODULTYPE == 'INTERNAL' else .03 #             Learning rate for lifetime plasticity
 87 | MULOSSTRACE = .9    #   Time constant for the trace of previous losses that serves as a baseline for external neuromodulation
 88 | MAXDW =  1e-2 #          Maximum delta-weight permissible (per time step) for lifetime plasticity 
 89 | INITALPHA = .5 # 0.0 #  .5 #        Initial alpha (plasticity parameter) value
 90 | 
 91 | 
 92 | 
 93 | # The name of all the tasks. 14 tasks in total, because "always respond 0" and "always respond 1" are not included.
 94 | alltasks = ['and', 'nand' , '01', 'anti01' , '10', 'anti10', 'watchstim1', 'watchstim2' ,'dms',  'antiwatchstim2', 'antiwatchstim1', 'or', 'nor', 'dnms']
 95 | 
 96 | 
 97 | 
 98 | NBSTIMNEURONS = 2   # 2 Stimulus neurons. Stimuli are binary, so both neurons receive opposite-valued inputs (or 0)
 99 | NBREWARDNEURONS = 2 # 6 # 2 # reward signal for this trial. A  value is represented with 2 inputs, as it is for stimulus neurons.
100 | NBBIASNEURONS = 1   # Bias neurons. Activations clamped to BIASVALUE.
101 | NBINPUTNEURONS = NBSTIMNEURONS + NBREWARDNEURONS  + NBBIASNEURONS    # The first NBINPUTS neurons in the network are neurons (includes the bias, noise and reward inputs)
102 | NBRESPNEURONS = 2  # Response neurons for 0 and 1.
103 | NBMODNEURONS = 2    # Neuromodulatory output neurons
104 | NBOUTPUTNEURONS = NBRESPNEURONS  + NBMODNEURONS   # The last NBOUTPUTNEURONS neurons in the network are output neurons. Response neurons + Modulatory neuron.
105 | NBRESPSIGNALNEURONS = NBRESPNEURONS     # Neurons that receive the response-given signal ("what response did I just give?")
106 | STIMNEURONS = np.arange(NBSTIMNEURONS)
107 | INPUTNEURONS = np.arange(NBINPUTNEURONS)
108 | OUTPUTNEURONS = np.arange(N-NBOUTPUTNEURONS, N)
109 | MODNEURONS = np.arange(N-NBOUTPUTNEURONS, N-NBOUTPUTNEURONS + NBMODNEURONS)
110 | # NUMMODNEURON = N - NBOUTPUTNEURONS      # The modulatory neuron is the first output neuron
111 | RESPNEURONS = np.arange(N-NBOUTPUTNEURONS+NBMODNEURONS, N) # Then come the response neurons
112 | REWARDNEURONS = np.arange(NBSTIMNEURONS, NBSTIMNEURONS+NBREWARDNEURONS) # The neurons receiving (and broadcasting) the "reward for this trial" signal are the ones just after the stimulus inputs.
113 | BIASNEURONS = np.arange(NBSTIMNEURONS+NBREWARDNEURONS, NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS)
114 | FIRSTRESPSIGNALNEURON = NBSTIMNEURONS+NBREWARDNEURONS+NBBIASNEURONS   # The first neuron that receives the response-given signal. We'll need this later
115 | assert FIRSTRESPSIGNALNEURON == NBINPUTNEURONS
116 | assert len(RESPNEURONS) == NBRESPNEURONS
117 | RESPSIGNALNEURONS = np.arange(FIRSTRESPSIGNALNEURON, FIRSTRESPSIGNALNEURON +NBRESPSIGNALNEURONS)
118 | 
119 | 
120 | BIASVALUE = 1.0
121 | 
122 | 
123 | 
124 | NBTASKSPERGEN = 1 # 2 #  2 task blocks per generation
125 | 
126 | 
127 | NBTRIALSLOSS = 100              # Evolutionary loss is evaluated over the last 100 trials of each block
128 | NBTRIALS =  300 + NBTRIALSLOSS  # Total number of trials per block
129 | # NBTRIALSLOSS = 100              # Evolutionary loss is evaluated over the last 100 trials of each block
130 | # NBTRIALS =  150 + NBTRIALSLOSS  # Total number of trials per block
131 | 
132 | 
133 | 
134 | REWARDSIZE = 3.0 #  3 * 3.0 # Size of the binary-reward signal (correct/incorrect)
135 | STIMSIZE = 3.0 # Size of the stimulus input
136 | RESPSIGNALSIZE = 3.0 # Size of the response-given signal
137 | 
138 | 
139 | totalnbtasks = 0
140 | ticstart = time.time()
141 | 
142 | 
143 | # EVALW is to assess the behavior of an evolved network. Run it on a single batch of all tasks, without any mutation
144 | EVALW = True
145 | if EVALW:
146 |     # NBTRIALS =  NBTRIALS   #  more "burn-in" trials? doesn't seem to change anything
147 |     NBGEN = 1
148 |     NBTASKSPERGEN = 1
149 |     BS = 500
150 |     MUTATIONSIZE = 0
151 |     allresps=[]
152 |     allstims=[]
153 |     alltgts=[]
154 | 
155 | 
156 | 
157 | 
158 | with torch.no_grad():  # We don't need PyTorch to keep track of gradients, since we're computing the gradient outselves (through evolution).
159 |  
160 |     PRINTING = True # if numgen == 0 or np.random.rand() < .05 else False
161 | 
162 |     # Initialize innate weights values
163 |     w =  torch.randn(N,N)  * JINIT / np.sqrt(N) 
164 |     w = w.to(device)
165 |     
166 |     # Initialize alpha values - the plasticity parameters (capital-pi in the paper)
167 |     alpha = INITALPHA * torch.ones_like(w).to(device)
168 | 
169 |     # We zero out input weights to input neurons, though it probably doesn't have any effect.
170 |     w.data[:NBINPUTNEURONS, :] = 0   # Each *row* of w contains the weights to a single neuron.
171 |     # We also zero out the weights to neuromodulatory neurons, which probably does have an effect!
172 |     w.data[MODNEURONS, :] = 0   # Each *row* of w contains the weights to a single neuron.
173 |     winit = w.clone()
174 | 
175 |     # We will be using the Adam optimizer to apply our (hand-computed) evolutionary gradients
176 |     optimizer = optim.Adam([w, alpha], lr=LR, weight_decay=WDECAY)  # Default betas=(0.9, 0.999)
177 | 
178 |     # Evolosses are real-valued losses used for evolution. Binarylosses are binary 'correct/wrong' signals, also used for logging.
179 |     evolosses = []
180 |     responses0 = []
181 |     binarylosses = []
182 |     wgradnorms = []
183 |     mytaskprev = mytaskprevprev = mytaskprevprevprev = -1
184 | 
185 | 
186 |     if  not EVALW:
187 |         ww = w.cpu().numpy()
188 |         aa = alpha.cpu().numpy()
189 |         np.savetxt('winit.txt', ww)
190 |         np.savetxt('alphainit.txt', aa)
191 | 
192 |     if EVALW and True:
193 |         w = np.loadtxt('w.txt')
194 |         w = torch.from_numpy(w).float().to(device)
195 |         winit = w.clone()
196 | 
197 |         alpha = np.loadtxt('alpha.txt')
198 |         alpha = torch.from_numpy(alpha).float().to(device)
199 |         # alpha.fill_(torch.mean(alpha))
200 | 
201 | 
202 |     print("MODULTYPE is:",  MODULTYPE)
203 |     assert MODULTYPE == 'EXTERNAL' or MODULTYPE == 'INTERNAL', "Modulation type must be 'INTERNAL' or 'EXTERNAL'"
204 | 
205 | 
206 | 
207 |     # Ready to start the evolutionary loop, iterating over generations (i.e. lifetimes). 
208 | 
209 |     for numgen in range(NBGEN):
210 | 
211 | 
212 | 
213 |         if numgen == NUMGENCUTLR:
214 |             for param_group in optimizer.param_groups:
215 |                 param_group['lr']  /=  5.0
216 | 
217 | 
218 | 
219 |         # Every 10th generation is for testing on the withheld task (with no weight change)
220 |         TESTING = False
221 |         if numgen == 0 or numgen == NBGEN-1 or numgen % 10 == 0:
222 |             TESTING = True
223 |             if PRINTING:
224 |                 print("TESTING")
225 |         if EVALW:
226 |             TESTING = False
227 | 
228 | 
229 |         tic = time.time()   
230 |         responses0thisgen = []
231 | 
232 |         
233 |         
234 |         alpha.clip_(min=0)
235 |     
236 | 
237 | 
238 |         # Generating the population of mutated individuals:
239 | 
240 |         # First, batch the weights.
241 |         bw = torch.dstack(BS*[w]).movedim(2,0).to(device)     # batched weights
242 |         balpha = torch.dstack(BS*[alpha]).movedim(2,0).to(device)     # batched alphas
243 |         # Generate the mutations, for both w and alpha
244 |         # NOTE: batch element 0 (and BS/2, its antithetic pair) are NOT mutated, represent the curent unmutated candidate genotype.
245 |         mutations_wandalpha = []
246 |         for  n, x in enumerate( (bw, balpha) ):
247 |             mutations = torch.randn_like(x, requires_grad=False).to(device) *  MUTATIONSIZE
248 |             mutations[0,:,:] = 0  # 1st item in batch = current candidate
249 |             mutations[BS//2:, :, :] = -mutations[:BS//2, :, :]    # Antithetic sampling for mutations ! Really helps.
250 |             if TESTING or EVALW:
251 |                 mutations *= 0.0  # No mutation - results in batch score variance being caused only by randomness in trial order and (possibly) lifetime perturbations
252 |             x += mutations  
253 |             mutations_wandalpha.append(mutations)
254 | 
255 | 
256 |         
257 |         bw.data[:, :NBINPUTNEURONS, :] = 0  # Input neurons receive 0 connections. Probably not necessary.
258 |         bworig = bw.clone()                 # Storing the weights for comparison purposes at the gradient step (below).
259 | 
260 |         lifelosses = torch.zeros(BS, requires_grad=False).to(device)
261 |         lifemselosses = torch.zeros(BS, requires_grad=False).to(device)
262 |         lifeactpens = torch.zeros(BS, requires_grad=False).to(device)
263 |         lifeblosses = torch.zeros(BS, requires_grad=False).to(device)
264 | 
265 |         
266 |         
267 | 
268 |         # Lifetime loop, iterates over task-blocks:
269 |         for numtask in range(NBTASKSPERGEN):
270 |             totalnbtasks += 1
271 | 
272 |             COLLECTMODOUTSANDREWINS = not EVALW and ( (numtask + numgen * 2) % 7 == 0  )
273 | 
274 |             # bpw = batched plastic weights
275 |             bpw = torch.zeros_like(bw).to(device)  # For now, plastic weights are initialized to 0 at the beginning of each task.
276 | 
277 |             # Initialize neural states
278 |             bstates = .1 * torch.ones(BS, N).to(device)  # bstates (batched states) contains the neural activations (before nonlinearity). Dimensionality appropriate for batched matrix multiplication. 
279 |             bstates[:, INPUTNEURONS] = 0
280 |             bresps = 1.0 * bstates  # bresps is the actual neural responses, after nonlinearity, and also serves as the input for the next step.
281 |             bresps[:, BIASNEURONS] = BIASVALUE
282 | 
283 |             meanlosstrace = torch.zeros(BS, 2 * 2).to(device)
284 |             bls = []    # Will store binary losses of all batch elements, for each trial of this task
285 |             bl0s = []   # Same but only for batch element 0 (i.e. the unmutated candidate genome)
286 |             ml0s = []   # MSE loss (the one used for evolution) for element 0 (unmutated candidate), of all trials for this task
287 | 
288 | 
289 | 
290 |             # Choose the task ! If not testing, makes sure it's different from recently chosen tasks.
291 | 
292 | 
293 |             if TESTING: 
294 |                 mytask = TESTTASK 
295 |                 mytasknum = alltasks.index(mytask)
296 |             else:
297 |                 while True:
298 |                     mytasknum = np.random.randint(len(alltasks))
299 | 
300 |                     mytask = alltasks[mytasknum]
301 | 
302 |                     if ( (mytask!= TESTTASK)  
303 |                         and (mytask != TESTTASKNEG)  # We withhold both the test task and its logical negation
304 |                         and     (mytask != mytaskprev) 
305 |                             and (mytask != mytaskprevprev) 
306 |                     ):
307 | 
308 |                         break
309 | 
310 |                 mytaskprevprev = mytaskprev; mytaskprev= mytask
311 | 
312 | 
313 | 
314 |             # # Only use AND and NAND as tasks
315 |             # mytasknum = numtask % 4
316 |             # mytask = alltasks[mytasknum]
317 |             # mytaskprevprev = mytaskprev; mytaskprev= mytask
318 | 
319 | 
320 |             btasks = []  # Tasks for the whole batch
321 |             for ii in range(BS//2):
322 |                 if TESTING: 
323 |                     cand_task = TESTTASK
324 |                     cand_tasknum = alltasks.index(TESTTASK)
325 |                 else:
326 |                     while True:
327 |                         cand_tasknum = np.random.randint(len(alltasks))
328 |                         cand_task = alltasks[cand_tasknum]
329 |                         if ( (cand_task!= TESTTASK)  
330 |                             and (cand_task != TESTTASKNEG)  # We withhold both the test task and its logical negation
331 | 
332 | 
333 |                             and (cand_tasknum % 2 == (numgen // 2) % 2)  # Training on alternate halves of the training set at successive (pairs of) generations
334 |                             # and (cand_tasknum % 4 == numgen  % 4)  # Training on alternate quarters of the training set at successive generations
335 | 
336 | 
337 |                             ): 
338 |                             break
339 |                 btasks.append(cand_task)
340 | 
341 |             btasks = btasks * 2     # Duplicating the list, so each antithetic pair has the same tasks. 
342 | 
343 | 
344 | 
345 |             if EVALW:
346 |                 btasks = [TESTTASK] * BS
347 |                 # btasks = alltasks * (BS // len(alltasks) + 1)
348 |                 # btasks = btasks[:BS]
349 |                 # with open('btasks.txt', 'w') as f:
350 |                 #     for item in btasks:
351 |                 #         f.write("%s\n" % item)
352 | 
353 | 
354 |             # btasks = [mytask] * BS
355 | 
356 | 
357 |             assert(len(btasks) == BS)
358 | 
359 | 
360 |             # Cumulative MSE and binary losses for this task, over the last NBLOSSTRIALS of the block:
361 |             taskmselosses = torch.zeros_like(lifemselosses).to(device)
362 |             taskblosses = torch.zeros_like(lifemselosses).to(device)
363 | 
364 |             respz = []      # Response neuron outputs
365 |             stimz = []      # Stimulus neurons  outputs
366 |             modouts = []    # Neuromodulatory output
367 |             rewins = []     # Received rewards (reward neuron outputs)
368 | 
369 | 
370 |             if PRINTING:
371 |                 print("task[0]:", btasks[0], "task[1]:", btasks[1])
372 |             
373 |             # OK, ready to start the task.
374 | 
375 |             # Generate the task data (inputs and targets) for all trials:
376 |             # taskdata = generateInputsAndTargetsForTask(mytask=mytask)
377 | 
378 |             eligtraces =   torch.zeros_like(bw, requires_grad=False).to(device)  # Initialize the eligibility traces at the start of each block/task.
379 | 
380 | 
381 |             # Task loop, iterating over trials
382 |             # You do NOT erase memory (neural activations or plastic weights) between successive trials ! 
383 |             for numtrial in range(NBTRIALS):
384 | 
385 | 
386 |                 # # Akshully do initialize network activations for each trial - THIS IS ONLY FOR DEBUGGING / SIMPLER TEST TASK!
387 |                 # bresps.fill_(0)
388 |                 # bstates.fill_(0)
389 | 
390 |                 # # We reinitialize only modulatory neuron activations for each trial - THIS IS ONLY FOR DEBUGGING / SIMPLER TEST TASK!
391 |                 # bresps[:, MODNEURONS] = 0
392 |                 # bstates[:, MODNEURONS] = 0
393 | 
394 | 
395 |                 
396 |                 # Initializations
397 |                 mselossesthistrial = torch.zeros(BS, requires_grad=False).to(device)     # MSE losses for this trial
398 |                 totalresps = torch.zeros(BS, NBRESPNEURONS, requires_grad=False).to(device)     # Will accumulate the total outputs of each network over the trial, so we can compute the network's response for this trial.  
399 | 
400 |                 # Generate the inputs and targets for this trial:
401 | 
402 |                 # Pick stimulus 1 and stimulus 2 for this trial (and for each batch member):
403 |                 stims1 = (torch.rand(BS, 1) > .5).float()
404 |                 stims2 = (torch.rand(BS, 1) > .5).float()
405 | 
406 | 
407 | 
408 | 
409 |                 # Antithetic pairs share the exact same stimuli
410 |                 stims1[BS//2:, :] = stims1[:BS//2, :]
411 |                 stims2[BS//2:, :] = stims2[:BS//2, :]
412 | 
413 | 
414 | 
415 |                 # Actual temporal inputs:
416 |                 inpts = np.zeros((BS, NBSTIMNEURONS, STIMTIME)) 
417 |                 StimDur = STIMTIME 
418 |                 StartStim = 0
419 |                 # The two stimuli are presented in succession, with both input neurons locked in opposite values to each other:
420 |                 inpts[:, 0, StartStim:StartStim+StimDur//2 - 2] = 2.0 * stims1 - 1.0
421 |                 inpts[:, 0, StartStim+StimDur//2:StartStim+StimDur - 2] = 2.0 * stims2 - 1.0
422 |                 inpts[:, 1, StartStim:StartStim+StimDur] = -inpts[:, 0, StartStim:StartStim+StimDur] 
423 | 
424 |                 inputs = torch.from_numpy(inpts).float().to(device)
425 | 
426 | 
427 |             
428 |                 # Now we compute the targets, that is, the expected values of the output neurons, depending on inputs and tasks
429 |                 tgts = -100 * np.ones((BS, NBRESPNEURONS, RESPONSETIME)) 
430 |           
431 |                 for ii in range(BS):
432 |                     # First we generate the expected output for the non-null response neuron, based on inputs and task:
433 |                     if btasks[ii] == 'watchstim1':
434 |                         tgts[ii, 1, :] = stims1[ii, 0]
435 |                     elif btasks[ii] == 'watchstim2':
436 |                         tgts[ii, 1, :] = stims2[ii, 0]
437 |                     elif btasks[ii] == 'antiwatchstim1':
438 |                         tgts[ii, 1, :] = 1.0 - stims1[ii, 0]
439 |                     elif btasks[ii] == 'antiwatchstim2':
440 |                         tgts[ii, 1, :] = 1.0 - stims2[ii, 0]
441 |                     elif btasks[ii] == 'and':
442 |                         tgts[ii, 1, :] = (stims1[ii, 0] * stims2[ii, 0])
443 |                     elif btasks[ii] == 'nand':
444 |                         tgts[ii, 1, :] = 1.0 - (stims1[ii, 0] * stims2[ii, 0])
445 |                     # These two lines add  25% running time to the entire program! looks like np.clip is *slow*.
446 |                     # elif btasks[ii] == 'or':
447 |                     #     tgts[ii, 1, :] = np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0)
448 |                     # elif btasks[ii] == 'nor':
449 |                     #     tgts[ii, 1, :] = 1.0 - np.clip(stims1[ii, 0] + stims2[ii, 0], 0.0, 1.0)
450 |                     # Instead, we will clip after the full array is done. This should still work out the same.
451 |                     elif btasks[ii] == 'or':
452 |                         tgts[ii, 1, :] = stims1[ii, 0] + stims2[ii, 0]
453 |                     elif btasks[ii] == 'nor':
454 |                         tgts[ii, 1, :] = 1.0 - stims1[ii, 0] + stims2[ii, 0]
455 |                     elif btasks[ii] == '10':
456 |                         tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0])
457 |                     elif btasks[ii] == 'anti10':
458 |                         tgts[ii, 1, :] = stims1[ii, 0] * (1.0 - stims2[ii, 0])
459 |                     elif btasks[ii] == '01':
460 |                         tgts[ii, 1, :] = (1.0 - stims1[ii, 0]) * stims2[ii, 0]
461 |                     elif btasks[ii] == 'anti01':
462 |                         tgts[ii, 1, :] = 1.0 - (1.0 - stims1[ii, 0]) * stims2[ii, 0]
463 |                     elif btasks[ii] == 'dms':
464 |                         tgts[ii, 1, :] = (stims1[ii, 0]  == stims2[ii, 0])
465 |                     elif btasks[ii] == 'dnms':
466 |                         tgts[ii, 1, :] = (stims1[ii, 0]  != stims2[ii, 0])
467 |                     else:
468 |                         tgts[ii, 1, :] = (stims1[ii, 0]  == stims2[ii, 0])
469 |                     
470 |                 tgts[:, 1, :] = np.clip(tgts[:, 1, :], 0.0, 1.0)
471 | 
472 | 
473 |                 # tgts[:, 1, :] = 1.0
474 | 
475 |                 # The null-response neuron's expected output is just the opposite of the non-null response neuron output (response is either 0 or 1).
476 |                 tgts[:, 0, :] = 1.0 - tgts[:, 1, :]
477 | 
478 |                 assert np.all(np.logical_or(tgts == 0.0 , tgts == 1.0))
479 | 
480 |                 if EVALW:
481 |                     alltgts.append(tgts[:,1, 0])
482 |                     allstims.append(np.hstack((stims1, stims2)))
483 | 
484 |                 # assert numgen < 2 or numtrial < 15
485 | 
486 | 
487 | 
488 |                 targets = torch.from_numpy(tgts).float().to(device)     
489 | 
490 |                 # In practice, we clip targets to 0.1/0.9 instead of actually 0.0/1.0. This may or may not help.
491 |                 targets.clip_(min=0.1, max=0.9)
492 | 
493 | 
494 | 
495 |                 # raise ValueError
496 | 
497 | 
498 |                 # Run the network. Trial loop, iterating over timesteps
499 |                 for numstep in range(T):          
500 | 
501 |                     # Update neural activations, using previous-step bresps (actual neural outputs) as input:
502 |                     bstates += (DT / TAU) * (-bstates +  torch.bmm((bw + balpha * bpw), bresps[:, :, None])[:,:,0] )  
503 | 
504 | 
505 |                     # Applying the random perturbations on neural activations, both for noise and for the lifetime plasticity algorithm (node-perturbation)
506 |                     # And also updating the eligibility trace appropriately
507 |                     if numstep > 1 : 
508 |                         perturbindices =  (torch.rand(1, N) < PROBAMODUL).int()   # Which neurons get perturbed?
509 | 
510 |                         # perturbindices[0, MODNEURONS] = 0 # We disable perturbations on neuromodulatory neurons for debugging...
511 | 
512 | 
513 |                         perturbations = (ALPHAMODUL * perturbindices * (2 * torch.rand(1, N) - 1.0)).to(device)  # Note the dimensions: the same noise vector is applied to all elements in the batch (to save time!)
514 |                         
515 | 
516 | 
517 | 
518 |                         if numtrial > NBTRIALS - 20:
519 |                             perturbations.fill_(0)
520 | 
521 | 
522 | 
523 |                         bstates += perturbations
524 |                         
525 |                         # Node-perturbation: Hebbian eligibility trace = product between inputs (bresps from previous time step) and *perturbations* in outputs. dH = X * deltaY 
526 |                         # We do this with a (batched) outer product between the (column) vector of perturbations (1 per neuron) and the (row) vector of inputs
527 |                         # Note that here, since we have an RNN, the input is bresps - the network's responses from the previous time step
528 |                         if torch.sum(perturbindices) > 0:
529 |                             eligtraces += torch.bmm( perturbations.expand(BS, -1)[:, :, None],  bresps[:, None, :] ) 
530 | 
531 |                     # Eligibility traces, unlike actual plastic weights, are decaying
532 |                     eligtraces -=  (DT / TAU_ET) * eligtraces
533 | 
534 | 
535 |                     # We can now compute the actual neural responses for this time step, applying the appropriate nonlinearity to each neuron
536 |                     bresps = bstates.clone() # F.leaky_relu(bstates)
537 |                     # The following assumes that response neurons are the last neurons of the network !                        
538 |                     bresps[:,N-NBRESPNEURONS:].sigmoid_()     # The response neurons (NOT output neurons - modulatory neuron not included!) are sigmoids, all others are tanh. An arbitrary design choice.
539 |                     bresps[:,:N-NBRESPNEURONS].tanh_()
540 |                     
541 | 
542 |                     # Are we in the input presentation period? Then apply the inputs.
543 |                     # Inputs are clamping, fixing the response of the input neurons.
544 |                     if numstep < STIMTIME:
545 |                         # bresps[:, STIMNEURONS] = STIMSIZE * inputs[:, :, numstep]        
546 |                         bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = STIMSIZE * inputs[:, :, numstep]        
547 |                     else:
548 |                         bresps[:, STIMNEURONS[0]:STIMNEURONS[-1]+1] = 0
549 |                         # bresps[:, STIMNEURONS] = 0
550 | 
551 |                     # Bias input is always-on, always clamping.
552 |                     # bresps[:, BIASNEURONS] = BIASVALUE
553 |                     bresps[:, BIASNEURONS[0]] = BIASVALUE
554 | 
555 |                     # All the responses have now been computed  for this step
556 | 
557 |                     # Are we in the response period? Then collect network response.
558 |                     if numstep >= STARTRESPONSETIME and numstep < ENDRESPONSETIME:
559 | 
560 |                         assert numstep < STARTREWARDTIME
561 |                         # Accumulate the total activation of each output neuron, so that we can compute the network's actual response at the end of response period:
562 |                         # totalresps +=  bresps[:, RESPNEURONS] 
563 |                         totalresps +=  bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] 
564 |                         # Accumulate the MSE error between actual and expected outputs:
565 |                         # mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME
566 |                         mselossesthistrial += torch.sum( (bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] - targets[:, :, numstep - STARTRESPONSETIME]) ** 2, axis=1 ) / RESPONSETIME
567 | 
568 |                     else:
569 |                         bresps[:, RESPNEURONS[0]:RESPNEURONS[-1]+1] = 0.0
570 |                         # bresps[:, RESPNEURONS] = 0.0
571 | 
572 | 
573 |                     # Is the response period for this trial finished, or equivalently, are we at the first step of the reward / feedback period?
574 |                     # If so, compute the network's response (i.e. which neuron fired most)
575 |                     # Also, if using external neuromodulation, with compute the neuromodulation (based on baselined rewards for this trial) and apply plasticity
576 |                     if numstep == STARTREWARDTIME:
577 |                         # The network's response for this trial (0 or 1) is the index of the output neuron that had the highest cumulative output over the response period
578 |                         responses = torch.argmax(totalresps, dim=1)  # responses is a 1D, integer-valued array of size BS. totalresps is a 2D real-vlued array of size BS, NBRESPS+1                           
579 |                         
580 |                         # blosses (binary losses) is a 1/-1 "correct/wrong" signal for each batch element for this trial.
581 |                         blosses = 2.0 * (responses == torch.argmax(targets[:, :, 0], dim=1)).float() - 1.0    
582 |                         responses0thisgen.append(float(responses[0]))
583 | 
584 |                         # We also want the 1-hot version of the response for each neuron. This will be used as the response signal below.
585 |                         if numtrial > 0:
586 |                             responses1hot_prev = responses1hot.clone()
587 |                         responses1hot = F.one_hot(responses, 2)
588 | 
589 |                         # Now we apply lifetime plasticity, with node-perturbation, based on eligibility trace and suitably baselined reward/loss
590 | 
591 | 
592 |                         # Baseline computation - only used for external neuromodulation experiments
593 |                         # We compute separate baseline (running average) losses for different types of trials, as defined by their inputs (as in Miconi, eLife 2017). 
594 |                         # So we need to find out the trial type for each element in batch.
595 |                         # input1 = inputs[:, 0, 0]; input2 = inputs[:, 1, 0]  # Uh, what was that?
596 |                         input1 = stims1[:,  0]; input2 = stims2[:, 0]  
597 |                         trialtypes = (input1>0).long() * 2 + (input2>0).long()
598 | 
599 |                         if MODULTYPE == 'EXTERNAL' and numtrial > 30: #  + (300 if EVALW else 0):                        
600 |                             dw =  - (ETA * eligtraces  * (  meanlosstrace[np.arange(BS), trialtypes] * (mselossesthistrial - meanlosstrace[np.arange(BS), trialtypes]) )[:, None, None]).clamp(-MAXDW, MAXDW)
601 |                             bpw += dw
602 | 
603 | 
604 | 
605 |                         # Updating the baseline - running average of losses, for each batch element, for the trial type just seen
606 |                         meanlosstrace[torch.arange(BS).long(), trialtypes] *= MULOSSTRACE
607 |                         meanlosstrace[torch.arange(BS).long(), trialtypes] +=  (1.0 - MULOSSTRACE) * mselossesthistrial
608 | 
609 | 
610 | 
611 | 
612 |                     # Plasticity computation for internal (network-controlled) neuromodulation.
613 |                     # Note that it is applied at every time step, unlike external neuromodulation experiments which only apply plasticity once per  trial, at the beginning of the reward period (see above).
614 |                     if numtrial > 10 and MODULTYPE == 'INTERNAL':  # Lifetime plasticity is only applied after a few burn-in trials.
615 |                         # eligtraces: BS x N x N (1 per connection & batch element)  mselossesthhistrial:  BS.    meanlosstrace: BS x (N.N).    trialtypes: BS    bresps/bstates:  BS x N 
616 |                         # dw should have shape BS x N x N, i.e. one for each connection and batch element. Do not sum over batch dimension! The batch is purely evolutionary !
617 | 
618 |                         # Compute and apply the plasticity, based on accumulated eligibility traces and output of a certain neuron
619 |                         if  numstep > 0:
620 |                             modulsprev = moduls.clone()
621 |                         moduls = bresps[:, MODNEURONS[0]] - bresps[:, MODNEURONS[1]]
622 |                         # lifeactpens += torch.abs(moduls)
623 |                         if numstep > 0 :
624 |                             lifeactpens += (modulsprev - moduls) ** 2
625 | 
626 | 
627 |                         # If we use only the first neuromodulatory neuron's (tanh) output as the actual neuromodulatory output:
628 |                         # dw =   (ETA * eligtraces * bresps[:, MODNEURONS[0]][:, None, None] ).clamp(-MAXDW, MAXDW)
629 | 
630 |                         dw =   (ETA * eligtraces * moduls[:, None, None] ).clamp(-MAXDW, MAXDW)
631 | 
632 | 
633 |                         bpw += dw
634 | 
635 | 
636 | 
637 |                     # Are we in the reward signal period?
638 |                     # Note: the actual neuromodulatory reward signal (which influences plasticity) is applied just once per trial, above. Here we provide a binary "correct/ incorrect" signal to the network, 
639 |                     # i.e. "was my response right or wrong for this trial?" 
640 |                     # We also provide a signal indicating which response it gave in this trial (in theory it should be able to calculate it itself if needed, but this may help).
641 |                     if numstep >= STARTREWARDTIME and numstep < ENDREWARDTIME: # Note that by this time, the loss has been computed and is fixed
642 |                         
643 |                         # # We provide a binary, "correct/incorrect" signal to the network
644 |                         # bresps[:,REWARDNEURONS[0]] = REWARDSIZE * blosses[:]         # Reward input is also clamping
645 |                         # bresps[:,REWARDNEURONS[1]] = -REWARDSIZE * blosses[:]         # Reward input is also clamping
646 | 
647 |                         # Akshully, we provide the same MSE loss that is used to guide evolution
648 |                         # bresps[:,REWARDNEURONS[0]] = REWARDSIZE * mselossesthistrial[:]         # Reward input is also clamping
649 |                         # bresps[:,REWARDNEURONS[1]] = -REWARDSIZE * mselossesthistrial[:]         # Reward input is also clamping
650 | 
651 |                         # Akshully^2, we duplicate the reward signal across many neurons to (maybe) increase its potnetial impact and exploitability (?...)
652 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = REWARDSIZE * mselossesthistrial[:, None]         # Reward input is also clamping
653 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1].clip_(min=0)            # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve.
654 |                         # bresps[:,REWARDNEURONS] = REWARDSIZE * mselossesthistrial[:, None]         # Reward input is also clamping
655 |                         # bresps[:,REWARDNEURONS].clip_(min=0)            # Not sure if this helps. Well, obviously not if using plain MSE which is always +ve.
656 | 
657 | 
658 | 
659 | 
660 |                         # We provide the network with a signal indicating the actual response it chose for this trial. Not sure if needed.  
661 |                         # bresps[:, RESPSIGNALNEURONS] = responses1hot.float() * RESPSIGNALSIZE
662 |                         bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = responses1hot.float() * RESPSIGNALSIZE
663 | 
664 | 
665 |                         
666 |                     else:
667 |                         bresps[:,REWARDNEURONS[0]:REWARDNEURONS[-1]+1] = 0
668 |                         bresps[:, RESPSIGNALNEURONS[0]:RESPSIGNALNEURONS[-1]+1] = 0
669 |                         # bresps[:,REWARDNEURONS] = 0
670 |                         # bresps[:, RESPSIGNALNEURONS] = 0
671 | 
672 | 
673 |                     
674 |                     # modouts.append(float(moduls[0]))
675 |                     # rewins.append(float(bresps[0, REWARDNEURONS[0]]))
676 |                     if COLLECTMODOUTSANDREWINS:
677 |                         stimz.append(bresps[0, STIMNEURONS[0]])
678 |                         respz.append(bresps[0, RESPNEURONS[1]] - bresps[0, RESPNEURONS[0]])
679 |                         if MODULTYPE == 'INTERNAL': #  Doesn't make sense  for external modulation
680 |                             modouts.append(moduls[0])   
681 |                         rewins.append(bresps[0, REWARDNEURONS[0]])
682 |                     
683 | 
684 |                     if EVALW: 
685 |                         allresps.append(bresps.cpu().numpy().astype('float32'))
686 |                     # if EVALW and numtrial >= NBTRIALS - 50:
687 |                     #     stimz.append(bresps[:, STIMNEURONS[0]])
688 |                     #     respz.append(bresps[:, RESPNEURONS[1]] - bresps[:, RESPNEURONS[0]])
689 |                     #     if MODULTYPE == 'INTERNAL': #  Doesn't make sense  for external modulation
690 |                     #         modouts.append(moduls[:])
691 |                     #     rewins.append(bresps[:, REWARDNEURONS[0]])
692 | 
693 | 
694 | 
695 |                 # Now all steps done for this trial:
696 |             
697 |                 if PRINTING:
698 |                     if np.random.rand() < .1: 
699 |                         print("|", int(responses[0]), int(blosses[0]), end=' ')
700 |                 
701 |                 ml0s.append(float(mselossesthistrial[0]))
702 |                 bl0s.append(float(blosses[0]))
703 |                 bls.append(blosses.cpu().numpy())
704 | 
705 | 
706 |                 # If this trial is part of the last NBTRIALSLOSS, we accumulate its trial loss into the agent's total loss for this task.
707 |                 if numtrial >= NBTRIALS - NBTRIALSLOSS:     # Lifetime  losses are only estimated over the last NBTRIALSLOSS trials
708 |                     # taskmselosses += 2 * mselossesthistrial / NBTRIALSLOSS   # the 2* doesn't mean anything
709 |                     taskmselosses +=  mselossesthistrial / NBTRIALSLOSS 
710 |                     taskblosses += blosses / NBTRIALSLOSS
711 |                     
712 | 
713 |             # Now all trials done for this task:
714 |             if PRINTING:
715 |                 # print("Med task mseloss:", "{:.4f}".format(float(torch.median(taskmselosses))))
716 |                 print("\nTASK BLOSS[0]:", "{:.4f}".format(float(taskblosses[0])), "Med task bloss:", "{:.4f}".format(float(torch.median(taskblosses))), 
717 |                 "Med-abs totaldw[0]:", "{:.4f}".format(float(torch.median(torch.abs(bpw[0,:,:])))),
718 |                 "Max-abs totaldw[0]:", "{:.4f}".format(float(torch.max(torch.abs(bpw[0,:,:]))))
719 |                 )
720 |             
721 |             
722 |             
723 | 
724 |             if COLLECTMODOUTSANDREWINS:
725 |                 print("Saving Resps, Stims,  RI, MO")
726 | 
727 |                 np.savetxt('stims.txt', np.array([float(x) for x in stimz]))
728 |                 np.savetxt('resps.txt', np.array([float(x) for x in respz]))
729 |                 np.savetxt('modouts.txt', np.array([float(x) for x in modouts]))
730 |                 np.savetxt('rewins.txt', np.array([float(x) for x in rewins]))
731 | 
732 |                 # print("")
733 |             lifemselosses += taskmselosses / NBTASKSPERGEN 
734 |             lifeblosses += taskblosses / NBTASKSPERGEN 
735 |         
736 |             if (TESTING or numgen == 0) and numtask == 0:
737 |                 # These files contain respectively the first and *latest* Testing block of the *current* run only. 
738 |                 FNAME = 'bl_1standLastBlock_gen0.txt' if numgen == 0 else 'bl_1standLastBlock_lastgen.txt'
739 |                 # np.savetxt(FNAME, np.array(bl0s))
740 |                 np.savetxt(FNAME, np.vstack(bls))
741 | 
742 | 
743 | 
744 |         # After all tasks done for this lifetime / generation:
745 | 
746 |         lifeactpens /= (NBTASKSPERGEN * NBTRIALS)
747 |         # lifeactpens -= torch.mean(lifeactpens); lifeactpens /= torch.std(lifeactpens)
748 |         # lifeactpens += torch.mean(lifemselosses); lifeactpens *= torch.std(lifemselosses)
749 |         
750 |         lifelosses = lifemselosses + ALPHAACTPEN * lifeactpens
751 | 
752 |         binarylosses.append(float(lifeblosses[0]))
753 |         evolosses.append(float(lifemselosses[0]))
754 | 
755 | 
756 |         if TESTING and not EVALW:
757 |             np.savetxt('blosses_onerun.txt', np.array(binarylosses))
758 |             np.savetxt('mselosses_onerun.txt', np.array(evolosses))
759 |             ww = w.cpu().numpy()
760 |             pw0 = bpw[0,:,:].cpu().numpy()
761 |             aa = alpha.cpu().numpy()
762 |             np.savetxt('w.txt', ww)
763 |             np.savetxt('pw0.txt', pw0)
764 |             np.savetxt('alpha.txt', aa)
765 | 
766 | 
767 |         if EVALW  and True:
768 |             # Note: we use .npy format, because multi-dimensional.
769 |             
770 |             np.save('allstims.npy',  np.stack(allstims, -1))
771 |             np.save('alltgts.npy',  np.stack(alltgts, -1))
772 | 
773 |             # print(len(allresps), len(allstims), len(alltgts))
774 |             assert len(allresps) == NBTRIALS * T
775 |             # print(allresps[0].shape, allstims[0].shape, alltgts[0].shape)
776 |             print("Rearranging saved responses into appropriate shape...")
777 |             z1 = np.dstack(allresps)
778 |             z2 = np.stack(np.split(z1, NBTRIALS, axis=2), axis=-1)
779 |             print("Final shape of the saved responses:", z2.shape)
780 |             assert(z2.shape == (BS, N, T, NBTRIALS))
781 |             np.save('allresps.npy', z2[:,:,:,[29,-1]]) # We only store response data for 29th (before plasticity starts) and last trial (to keep file size manageable) 
782 | 
783 | 
784 | 
785 |         # Now we're ready to perform evolution (by computing gradients by hand, and then applying the optimizer with these gradients)
786 |         optimizer.zero_grad()
787 | 
788 |         # Gradient is just loss x mutation (remember we use antithetic sampling)
789 |         # gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0) # / BS
790 |         gradient = torch.sum(mutations_wandalpha[0] * lifelosses[:, None, None], axis=0)  / (BS * MUTATIONSIZE * MUTATIONSIZE)
791 | 
792 |         
793 |         # gradient = gradient / 100
794 | 
795 | 
796 |         wgradnorm = float(torch.norm(gradient))
797 |         wgradnorms.append(wgradnorm)
798 |         if PRINTING:
799 |             print("norm w:", "{:.4f}".format(float(torch.norm(w))), "norm gradient:", "{:.4f}".format(wgradnorm), 
800 |                   "med-abs w:", "{:.4f}".format(float(torch.median(torch.abs(w)))), 
801 |                   "max-abs w:", "{:.4f}".format(float(torch.max(torch.abs(w)))), 
802 |                     "norm a:", "{:.4f}".format(float(torch.norm(alpha))), "mean a:",  "{:.4f}".format(float(torch.mean(alpha))))
803 | 
804 | 
805 |         w.grad = gradient
806 |         wprev = w.clone()
807 | 
808 |         # gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0) # / BS
809 |         gradientalpha = torch.sum(mutations_wandalpha[1] * lifelosses[:, None, None], axis=0)  / (BS * MUTATIONSIZE * MUTATIONSIZE)
810 | 
811 | 
812 |         # gradientalpha = gradientalpha / 100
813 | 
814 | 
815 |         alpha.grad = gradientalpha
816 |         alphaprev = alpha.clone()
817 | 
818 |         if numgen > 0 and not TESTING and not EVALW:
819 |             optimizer.step()
820 | 
821 |         
822 |         wdiff = w - wprev
823 |         adiff = alpha - alphaprev
824 |         if PRINTING:
825 |             print("Norm w-wprev:", "{:.4f}".format(float(torch.norm(wdiff))), "Max abs w-wprev:", "{:.4f}".format(float(torch.max(torch.abs(wdiff)))), 
826 |                 "Norm a-aprev:", "{:.4f}".format(float(torch.norm(adiff))), "Max abs a-aprev:", "{:.4f}".format(float(torch.max(torch.abs(adiff))))  )
827 | 
828 |     
829 | 
830 |         if PRINTING:
831 |             print("Med/min/max/Half-Nth/0th loss in batch:", float(torch.median(lifelosses)), float(torch.min(lifelosses)), float(torch.max(lifelosses)),
832 |                                     float(lifelosses[BS//2]), float(lifelosses[0]))
833 |             print("Med/min/max/Half-Nth/0th life mse loss in batch:", float(torch.median(lifemselosses)), float(torch.min(lifemselosses)), float(torch.max(lifemselosses)),
834 |                                     float(lifemselosses[BS//2]), float(lifemselosses[0]))
835 |             print("Med/min/max/Half-Nth/0th activity penalty in batch:", float(torch.median(lifeactpens)), float(torch.min(lifeactpens)), float(torch.max(lifeactpens)),
836 |                                     float(lifeactpens[BS//2]), float(lifeactpens[0]))
837 |             print("Gen", numgen, "done in", time.time()-tic)
838 |     
839 | 
840 | 
841 | 
842 | 
843 | 
844 | print("Time taken:", time.time()-ticstart)
845 | 
846 | 


--------------------------------------------------------------------------------
/decoding/decoding.py:
--------------------------------------------------------------------------------
  1 | import numpy as np; import matplotlib.pyplot as plt
  2 | import sklearn
  3 | from sklearn.linear_model import LogisticRegression
  4 | from sklearn.linear_model import LinearRegression
  5 | 
  6 | print("This shows clipped graphs, BUT the image files will be OK!")
  7 | 
  8 | print('ATTENTION: we create graphs for decoding target, stimulus 1, and stimulus 2!')
  9 | 
 10 | #print(r.shape, s.shape, t.shape)
 11 | # (500, 70, 50, 2) (500, 2, 400) (500, 400)
 12 | # r has only the first and last trial
 13 | 
 14 | NBINPUTNEURONS = 7 
 15 | NBOUTPUTNEURONS =  6
 16 | NBTRIALS = 400
 17 | 
 18 | 
 19 | for numfig, figname in enumerate(['target', 'stim1',  'stim2']):
 20 |     fig = plt.figure(figsize=(5,5)); 
 21 | 
 22 |     ff,  axes = plt.subplots(2,2)
 23 |     ax = axes[0,0]
 24 | 
 25 |     print("Making figure", figname)
 26 | 
 27 |     for numgen in range(2):
 28 | 
 29 |         if numgen == 1:
 30 |             r = np.load('allresps.npy') ; s = np.load('allstims.npy')  ;  t = np.load('alltgts.npy')  
 31 |         else:
 32 |             r = np.load('allresps.npy.0'); s = np.load('allstims.npy.0'); t = np.load('alltgts.npy.0') 
 33 | 
 34 |         for numtrial in range(2):
 35 | 
 36 |             numplot =  1 + 2*numgen + numtrial
 37 |             print(numplot, "/", 2*2)
 38 |             plt.subplot(2,2, numplot)
 39 |             plt.gca().set_title('Gen '+str(numgen*1000)+' / Trial '+str(numtrial*NBTRIALS), fontsize=10)
 40 | 
 41 |             # Which trial are we looking at - first (well, actually 9th or 29th - last before onset of plasticity) or last?
 42 |             if numtrial == 0:
 43 |                 rt = 0;    st = 29;  tt = 29 # Older ones use 29 as the "first" trial
 44 |                 #rt = 0;    st = 9;  tt = 9
 45 |             else:
 46 |                 rt = 1;    st = NBTRIALS - 1;  tt = NBTRIALS - 1
 47 | 
 48 |             allvals = []
 49 |             for timepoint_train in range(50):
 50 |                 if timepoint_train % 10 == 9:
 51 |                     print(timepoint_train+1, '/ 50')
 52 |                 vals_thistrainpoint = []
 53 |                 
 54 |                 #for timepoint_test in range(5):  # faster, for debugging
 55 |                 for timepoint_test in range(50):
 56 | 
 57 | 
 58 |                     if numfig == 0:
 59 |                         # predicting target
 60 |                         y = (t[:, tt] -  .5) * 2
 61 |                     elif numfig  == 1:
 62 |                         # predicting first stimulus
 63 |                         y = (s[:, 0, st] - .5) * 2
 64 |                     elif numfig == 2:
 65 |                         # predicting second stimulus
 66 |                         y = (s[:, 1, st] - .5) * 2
 67 | 
 68 |                         
 69 |                     x_test = r[125:250, NBINPUTNEURONS:-NBOUTPUTNEURONS, timepoint_test, rt] 
 70 |                     y_test = y[125:250]
 71 |                     x_test = x_test - np.mean(x_test, axis=0)
 72 |                     x_test = x_test / (1e-8 + np.std(x_test, axis=0))
 73 | 
 74 |                     score = 0
 75 | 
 76 |                     nbtrainsets = 3  # ideally but not necessarily a divisor of 125, smaller=faster
 77 |                     for numtrain in range(nbtrainsets):
 78 | 
 79 |                         setsize =  125 // nbtrainsets
 80 | 
 81 |                         x_train = r[numtrain*setsize:(numtrain+1)*setsize, NBINPUTNEURONS:-NBOUTPUTNEURONS, timepoint_train, rt] 
 82 |                         y_train = y[numtrain*setsize:(numtrain+1)*setsize]
 83 | 
 84 |                         # Normalizing data to allow sklearn fitting 
 85 |                         x_train = x_train - np.mean(x_train, axis=0)
 86 |                         x_train = x_train / (1e-8 + np.std(x_train, axis=0))
 87 | 
 88 |                         traind1 = np.mean(x_train[y_train>0, :], axis=0)
 89 |                         traind2 = np.mean(x_train[y_train<0, :], axis=0)
 90 | 
 91 | 
 92 |                         cc1 = np.corrcoef(np.vstack((traind1, x_test)))[0, 1:]
 93 |                         cc2 = np.corrcoef(np.vstack((traind2, x_test)))[0, 1:]
 94 |                         choice = 2.0 * (cc1 > cc2) - 1.0
 95 |                         
 96 |                         score = score + np.mean(y_test == choice)
 97 |                     score = score / nbtrainsets
 98 | 
 99 | 
100 |                     vals_thistrainpoint.append(score)
101 |                 allvals.append(vals_thistrainpoint)
102 | 
103 |             allvals = np.array(allvals)
104 | 
105 | 
106 |             plt.imshow(allvals); plt.axhline(y=25,color='b', ls=":"); plt.axhline(y=35,color='b', ls=":"); plt.axvline(x=25,color='b', ls=":"); plt.axvline(x=35,color='b', ls=":")
107 | 
108 | 
109 |             plt.xticks(np.arange(9,50,10), labels=[str(z) for z in  20*(1 + np.arange(9,50,10))] )
110 |             plt.yticks(np.arange(9,50,10), labels=[str(z) for z in  20*(1 + np.arange(9,50,10))] )
111 |             plt.clim(0, 1)
112 |             if numgen == 1:
113 |                 plt.xlabel('Train time (ms)')
114 |             if numtrial == 0:
115 |                 plt.ylabel('Test time (ms)')
116 |             if numtrial == 1:
117 |                 plt.colorbar(); 
118 | 
119 |     #fig.suptitle('Changes due to lifetime learning')
120 |     #fig.supylabel('Changes due to evolution')
121 |     plt.tight_layout()
122 |     if numfig == 0:
123 |         ax.text(60, -20 ,'Changes due to lifetime learning', ha="center", va="center",size=10, color='b')
124 |         ax.text(-38, 56 ,'Changes due to evolution', rotation=90,  ha="center", va="center",size=10, color='b')
125 |         ax.annotate('', xytext=(-.2, 1.3), xycoords='axes fraction', xy=(3, 1.3),
126 |                     arrowprops=dict(arrowstyle="->", color='b'))
127 |         ax.annotate('', xytext=(-.6, 1), xycoords='axes fraction', xy=(-.6, -1.3),
128 |                     arrowprops=dict(arrowstyle="->", color='b'))
129 |     plt.show()
130 |     plt.savefig("image_"+figname+".png",bbox_inches='tight',dpi=200)
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------