├── LICENSE
├── PaperSimulations
    ├── Memory Simulation (Bower, 3 sentences).ipynb
    ├── Memory Simulation (Dubrow and Davachi, 2013; 2016) parameter sensitivity.ipynb
    ├── Memory Simulation (Dubrow and Davachi, 2013; 2016).ipynb
    ├── Memory Simulation (Pettijohn, et al, 2016).ipynb
    ├── Memory Simulation (Radvansky & Copeland, 2006).ipynb
    ├── Permutation testing of Video Segmentation.ipynb
    ├── README.md
    ├── Segmentation - Generalizing Structure (Stationary).ipynb
    ├── Segmentation - Generalizing Structure.ipynb
    ├── Segmentation - Schapiro (n250).ipynb
    ├── Segmentation - Video (Dishes).ipynb
    └── run_dubrow_parameter_sensitivity.py
├── README.md
├── Tutorials
    ├── Demo - HRR.ipynb
    ├── Demo - Motion Capture Data.ipynb
    ├── Demo - Segmentation and Memory Tutorial.ipynb
    ├── Demo - Toy Data (Segmentation).ipynb
    └── Readme.md
├── data
    ├── motion_data.pkl
    ├── videodata
    │   └── video_color_Z_embedded_64_5epoch.npy
    ├── zachs2006_data021011.dat
    ├── zachs_2006_young_unwarned.csv
    └── zachs_2006_young_warned.csv
├── environment.yml
├── models
    ├── __init__.py
    ├── event_models.py
    ├── memory.py
    ├── sem.py
    └── utils.py
├── opt
    ├── __init__.py
    ├── csw_utils.pyc
    ├── hrr.py
    └── utils.py
└── simulations
    ├── __init__.py
    ├── exp_dubrow.py
    ├── exp_pettijohn.py
    ├── exp_radvansky.py
    ├── exp_schapiro.py
    ├── saved_simulations
        ├── Dubrow_param_sensitivity.pkl
        ├── Dubrow_sim_0.pkl
        ├── Dubrow_sim_1.pkl
        ├── Dubrow_sim_10.pkl
        ├── Dubrow_sim_11.pkl
        ├── Dubrow_sim_12.pkl
        ├── Dubrow_sim_13.pkl
        ├── Dubrow_sim_14.pkl
        ├── Dubrow_sim_15.pkl
        ├── Dubrow_sim_16.pkl
        ├── Dubrow_sim_17.pkl
        ├── Dubrow_sim_18.pkl
        ├── Dubrow_sim_19.pkl
        ├── Dubrow_sim_2.pkl
        ├── Dubrow_sim_20.pkl
        ├── Dubrow_sim_21.pkl
        ├── Dubrow_sim_22.pkl
        ├── Dubrow_sim_23.pkl
        ├── Dubrow_sim_24.pkl
        ├── Dubrow_sim_3.pkl
        ├── Dubrow_sim_4.pkl
        ├── Dubrow_sim_5.pkl
        ├── Dubrow_sim_6.pkl
        ├── Dubrow_sim_7.pkl
        ├── Dubrow_sim_8.pkl
        ├── Dubrow_sim_9.pkl
        ├── EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl
        ├── EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl
        └── radvansky_sims.pkl
    └── video_segmentation.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ProjectSEM
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PaperSimulations/README.md:
--------------------------------------------------------------------------------
 1 | # Simulations in the Paper
 2 | 
 3 | 
 4 | 
 5 | There are also multiple simulations that demonstrates how the model can capture a wide range of empirical phenomena
 6 | in the event cognition literature:
 7 | * `Segmentation - Video (Dishes)`: show human-like segementation of video data, originally used in Zacks & Tversky, 2001.
 8 | The dimensionality of the videos has been reduced using a variational auto-encoder, the code for which is available as 
 9 |  a seperate library [https://github.com/ProjectSEM/VAE-video](https://github.com/ProjectSEM/VAE-video)
10 | * `Segmentation - Schapiro (n250)`: a simulation of the task found in Schapiro, et al, 2013.
11 | * `Memory Simluation (Bower, 3 setences)`: a simulation of the classic finding in Bower, 1979
12 | * `Memory Simluation (Radvansky & Copeland, 2006)`: a simulation of the findings in Radvansky & Copeland, 2006
13 | * `Memory Simluation (Pettijohn, et al, 2016)`:a simulation of the findings in Pettijohn, et al, 2016
14 | * `Memory Simluation (Dubrow and Davachi, 2013; 2016) `: a simulation of the finding in Dubrow and Davachi, 2013
15 | 
16 | There are also follow-up analyses:
17 | * `Memory Simluation (Dubrow and Davachi, 2013; 2016) parameter sensitivity`: looks at memory  corruption noise and how it effects order memory
18 | * `Segmentation - Generalizing Structure (Stationary)`: looks at a reduced model that does not simulate event dynamics.
19 | 
20 | 


--------------------------------------------------------------------------------
/PaperSimulations/run_dubrow_parameter_sensitivity.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from models import *
  4 | from tqdm import tnrange
  5 | from simulations.exp_dubrow import run_subject, generate_experiment
  6 | 
  7 | 
  8 | 
  9 | # SEM parameters
 10 | df0 = 1.
 11 | scale0 = .2
 12 | 
 13 | mode = df0 * scale0 / (df0 + 2)
 14 | print("Prior variance (mode): {}".format(mode))
 15 | 
 16 | lmda = 10.0  # stickyness parameter
 17 | alfa = 1.  # concentration parameter
 18 | 
 19 | f_class = GRUEvent
 20 | f_opts=dict(var_scale0=scale0, var_df0=df0)
 21 | 
 22 | # create the corrupted memory trace
 23 | # noise parameters
 24 | b = 2
 25 | tau = 0.1
 26 | print("tau: {}".format(tau))
 27 | 
 28 | # set the parameters for the Gibbs sampler
 29 | gibbs_kwargs = dict(
 30 |     memory_alpha = alfa,
 31 |     memory_lambda = lmda,
 32 |     memory_epsilon = np.exp(-20),
 33 |     b = b,  # re-defined here for completeness
 34 |     tau = tau,  # ibid
 35 |     n_samples = 250,
 36 |     n_burnin = 100,
 37 |     progress_bar=False,
 38 | )
 39 | sem_kwargs = dict(lmda=lmda, alfa=alfa, f_class=f_class, f_opts=f_opts)
 40 | 
 41 | epsilon_e = 0.25
 42 | 
 43 | x_list_items, e_tokens = generate_experiment()
 44 | 
 45 | mode = df0 * scale0 / (df0 + 2)
 46 | print("Prior variance (mode): {}".format(mode))
 47 | print("Median Feature variance: {}".format(
 48 |     np.median(np.var(np.concatenate(x_list_items), axis=0))))
 49 | 
 50 | sem_kwargs = dict(
 51 |     lmda=lmda, alfa=alfa, f_class=f_class, f_opts=f_opts
 52 | )
 53 | 
 54 | sem = SEM(**sem_kwargs)
 55 | sem.run_w_boundaries(list_events=x_list_items)
 56 | print sem.results.e_hat
 57 | 
 58 | # fig, axes = plt.subplots(2, 1)
 59 | # axes[0].plot(sem.results.log_prior)
 60 | # axes[1].plot(sem.results.log_like)
 61 | # # plt.show()
 62 | 
 63 | from tqdm import tnrange, tqdm
 64 | 
 65 | n_batch = 25
 66 | n_runs = 16
 67 | 
 68 | results = []
 69 | for ii in tqdm(range(n_batch), desc='Itteration', leave=True):
 70 | 
 71 |     for b in [1, 2, 5, 10]:
 72 | 
 73 |           gibbs_kwargs = dict(
 74 |                memory_alpha = alfa,
 75 |                memory_lambda = lmda,
 76 |                memory_epsilon = np.exp(-20),
 77 |                b = b,  # re-defined here for completeness
 78 |                tau = tau,  # ibid
 79 |                n_samples = 250,
 80 |                n_burnin = 100,
 81 |                progress_bar=False,
 82 |           )
 83 | 
 84 | 
 85 |           _res = run_subject(
 86 |                sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=n_runs, subj_n=ii, progress_bar=False
 87 |           )
 88 | 
 89 |           # clean up the results and run simple analyses
 90 |           _res['b'] = b
 91 |           _res.loc[np.isnan(_res['Transitions Pre-Boundary'].values), 'Transitions Pre-Boundary'] = 0.0
 92 |           _res.loc[np.isnan(_res['Transitions Boundary'].values), 'Transitions Boundary'] = 0.0
 93 |           _res['PreVsPost'] = _res['Transitions Pre-Boundary'].values - _res['Transitions Boundary'].values 
 94 | 
 95 | 
 96 |           results.append(_res)
 97 |           pd.concat(results).to_pickle('Dubrow_param_sensitivity.pkl')
 98 | 
 99 | print "Done!"
100 |     
101 | 
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # EventSegmentation 
 2 | 
 3 | <a href="https://colab.research.google.com/github/ProjectSEM/SEM/blob/master/Tutorials/Demo - Segmentation and Memory Tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
 4 | 
 5 | Accompanying code for the manuscript "Structured event memory: a neuro-symbolic model of event cognition", Franklin, Norman, Ranganath, Zacks, and Gershman (in press), *Psychological Review*, [preprint](https://doi.org/10.1101/541607)
 6 | 
 7 | Contains the SEM model, a few basic demonstrations, and the all of the simulations in the paper. An up-to-date version of the model (but not the simluations) can be found in the following github repository: [https://github.com/nicktfranklin/SEM2](https://github.com/nicktfranklin/SEM2)
 8 | 
 9 | 
10 | 
11 | The main code is listed in the `models` module:
12 | * `models.sem`: contains the code for the SEM model
13 | * `models.event_models`: contains code for the various neural network models used by SEM. They all 
14 |     share a similar structures
15 |     
16 | There is runnable code in Jupyter notebooks:
17 | * `Tutorials`: Contains tutorials, runnable in Google Colab.
18 | * `PaperSimulations`: Contains the simulations presented in the paper.  These have been designed to run locally, with the
19 |  dependencies listed in the enviornments.yml file and have not been tested in colab. These have been pre-run and can be
20 |   opened on github without installation. 
21 | 
22 | #### Installation Instructions
23 | 
24 | This library run on Python 2.7 and uses the tensorflow and keras and libraries for neural networks. 
25 | 
26 | I recommend using Anaconda python and a virtual environment. [You can find instructions to install Anaconda
27 |  here](https://docs.anaconda.com/anaconda/install/).
28 | 
29 | Once you have anaconda installed, you can install a virtual environment by running
30 | 
31 |     conda env create --file environment.yml
32 | 
33 | This will install everything you need to run the Jupyter notebooks.  Note that all of the simulations were run with these
34 | packages versions and may not work with more recent versions (for example, TensorFlow is under active development).
35 | 
36 | You'll need to activate the virtual environments and open jupyter to access the demonstration notebooks. To do so, run
37 | 
38 |     conda activate sem
39 |     jupyter notebook
40 | 
41 | 
42 | To deactivate the virtual environment, run
43 | 
44 |     conda deactivate
45 | 
46 | 
47 | Note: if these instructions do not work for some reason, the critical libraries the model uses are:
48 |     
49 | * Anaconda Python 2.7
50 | * Tensorflow v1.9
51 | * Keras v2.2.0
52 | 


--------------------------------------------------------------------------------
/Tutorials/Demo - HRR.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# ## un-comment out if running locally\n",
 10 |     "\n",
 11 |     "# import os\n",
 12 |     "# os.chdir('../')"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "## if running locally, comment out the following code\n",
 22 |     "\n",
 23 |     "!git clone https://github.com/nicktfranklin/SEM.git\n",
 24 |     "import os\n",
 25 |     "os.chdir('./SEM/')\n",
 26 |     "\n",
 27 |     "!pip install tensorflow==1.9\n",
 28 |     "!pip install keras==2.2"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "%matplotlib inline\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import numpy as np\n",
 40 |     "import statsmodels.api as sm\n",
 41 |     "from opt.hrr import embed_gaussian, plate_formula, encode, decode"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "[[1.         0.59917774]\n",
 54 |       " [0.59917774 1.        ]]\n"
 55 |      ]
 56 |     },
 57 |     {
 58 |      "data": {
 59 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEKCAYAAADuEgmxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztnX2cHXV56L9PNpuwQWATCRYXYqLFULhAAstLjaWCYqwVWEENXGzxpdLW0la0qbFSDVSvuaa92PpSy7Uiig2vugbCNSiJ3IoNJTGEGE0K8r5wJUAWhCywyT73j5nZnJ2dl9+cmTlnzjnP9/PZz54zZ87MM3PO+T2/3/MqqophGIZhuDCl2QIYhmEYrYMpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxpSGYRiG4czUZgtQNAcffLDOnTu32WIYhmG0FJs2bXpKVWen7dd2SmPu3Lls3Lix2WIYhmG0FCLysMt+Zp4yDMMwnDGlYRiGYThjSsMwDMNwpqlKQ0TeJiI7ROR+EVkW8fpHReTnInKviNwuIq9phpyGYRiGR9OUhoh0AV8Gfg84CjhfRI4K7bYZ6FfVY4Ebgc83VkrDMAyjlmauNE4C7lfVB1T1ZeBa4OzaHVR1varu9p9uAA5rsIyGYRhGDc1UGn3AozXPH/O3xfFB4P+UKpFhGIaRSDPzNCRiW2TvWRF5L9AP/G7M6xcBFwHMmTOnKPkMwzCMEM1caTwGHF7z/DDg8fBOIvIW4JPAWar6UtSBVPVKVe1X1f7Zs1MTGg3DMIw6aabSuBs4QkTmicg04Dxgde0OIrIQ+Bc8hfFkE2Q0DMMwamia0lDVPcDFwFrgF8D1qrpNRC4XkbP83VYCrwBuEJF7RGR1zOEMwzCMBtDU2lOqeitwa2jbp2oev6XhQhmGYRixtF3BQsMwjCIY3DzEyrU7eHx4hFf39rB08XwGFiYFeHYGpjQMwzBCDG4e4hPf2crI6F4AhoZH+MR3tgJ0vOKw2lOGYRghVq7dMa4wAkZG97Jy7Y4mSVQdTGkYhmGEeHx4JNP2TsKUhmEYRohX9/Zk2t5JmNIwDMMIsXTxfHq6uyZs6+nuYuni+U2SqDqYI9wwDCNE4Oy26KnJmNIwDMOIYGBhnymJCMw8ZRiGYThjSsMwDMNwxpSGYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxsqIGIZhVJiqdRA0pWEYhlFRqthB0MxThmEYFaWKHQRNaRiGYVSUKnYQNKVhGIZRUarYQdCUhmEYRpMY3DzEohXrmLdsDYtWrGNw89CE16vYQdAc4YZhGE3AxcldxQ6CpjQMo0FULXTS2EczPpskJ3ftuavWQdCUhmE0gCqGThoezfpsqujkdiHVpyEifykiB4rHv4rIT0XkrY0QzjDahXpDJ9Ns3kZ+mhXWWkUntwsujvAPqOpzwFuB2cD7gRWlSmUYbUY9s8pgBjw0PIKybwZsiqNYmjXjr6KT2wUX85T4/98OXKWqW0REkt5gGMZEDurpZnhkNHJ7HK4272bTCr6aJBlf3dvDUISCKHvGX0UntwsuSmOTiNwGzAM+ISIHAGPlimUY7UXcNCtp+tUKNu+q+2oGNw9x2c3b2LV7n8IOy7h08fwJ1wCNm/FXzcntgot56oPAMuBEVd0NTMMzURmG4cjw7smrjKTt0Bo27yqWuQgIFNquiHtcK+PAwj4+d84x9PX2IEBfbw+fO+eYlhvMG4XLSkOBo4B3AJcD+wP7lSmUYbQb9ZhAmjkDDkgzPVV5NRSl0GqplbEVZ/zNwmWl8RXgt4Hz/ee/Br5cmkSG0YbU4/TMOwPOG3nl4oiv8mooTXFVQcZWxGWlcbKqHi8imwFUdZeITCtZLsNoK+p1etY7Ay7C1+DiiK/CaiiOuNUdVEfGVsRFaYyKSBeemQoRmY05wg0jM400gRQReeViemp2BFCS+SxKoQH09nSz/KyjzRxVJy5K45+A7wKHiMhngXcBl5YqlWEYuSjC1xAXJhw26zTLH5C2mmq2QmtXUpWGqn5bRDYBb8bL2RhQ1V+ULplhGHWTN/dgcPMQL7y8Z9L27ilSGbOOy2rKHNzF41JGZA6wG7gZWA284G/LjYi8TUR2iMj9IrIs4vVT/bIle0TkXUWc0zDakbDT+7QjZ+fKNl65dgeje3XS9lfsN7Uyg3CVI7faGRfz1Bo8f4bghdrOA3YAR+c5se8n+TJwBvAYcLeIrFbVn9fs9gjwPuCv8pzLMNqZKDPNTZuGOPeEPtZv31mXaSZu4E3KK2k0zcrk7nRczFPH1D4XkeOBPy7g3CcB96vqA/5xrwXOBsaVhqo+5L9mjnfDiCHOTLN++07uXHZ6XcdMG5CrUDqkypFb7Uzmzn2q+lPgxALO3Qc8WvP8MX+bYRgZKMNMk5RXMrh5iKU3bpmQv7H0xi0NL6RomdzNIXWlISIfrXk6BTge2FnAuaOq7kw2orocSOQi4CKAOXMKcbcYRstQhpkmHHnUO6MbVbjkunsQgbHQL3V0r3LZzdsaPmCbo7vxuKw0Dqj5m47n4zi7gHM/Bhxe8/ww4PF6DqSqV6pqv6r2z549uwDRDKN1yFtiOy5zfGBhH3cuO50rlizgxdExhkdGUSYrjICoGk9G++Hi07ispHPfDRwhIvOAIeA84L+XdC7DaFvy5CO4ZI6n1XAqiyL9JsGxhoZH6BJhryp9BfliquDfaSSxSkNEbibBXKSqZ+U5saruEZGLgbVAF/B1Vd0mIpcDG1V1tYiciJdYOBM4U0QuU9VcUVuG0Y7Ua6ZxyXVw9Y30JvQGCXAdYIssuR4+1l7V3McsQ85WIWml8fdln1xVbwVuDW37VM3ju/HMVoZhlICLEz2phlNA9xRh+VnJ87ksA2yRDaiSVkp5m1q1SqOsIolVGqp6RyMFMQyj8bg40aNCW7u7hP2nTeXZkVFnk0yWAbbIiLC090S97roi6sQEQ5foqSOAz+H11Bjvo6Gqry1RLsMwGkBcrsNpR85m0Yp144NmnkTBgCwDbJERYWkrpfAxs6yIOjHB0CV66irgn4E9wGnAN4FvlSmUYRiNISrX4dwT+rhp09CEPIybNg2xdPF8Hlzx+9y57PS6TC9Zem/kjQhLO1bSMbN0IyxSzlbBRWn0qOrtgKjqw6q6HKgvzdQwjMpRG1oLcM2GR0pp4ZplgC0qcS8wM42M7qXLb8ge/I87ZpYVUScmGLrUnnpRRKYA9/nRTkPAIeWKZRjtSxVDNMMmmSjy2umzhgbnTdyLiprq6e5KHdSzmpw6LcHQRWl8BJgB/AXwd3gmqgvLFMow2pWqhmi65GIUYadvhUZUVtMqmaQ8jXcBt/hhrwDPA+9viFSGUXHqXS1UNUQzbRXRioNmvZFN1rwpmaSVxgXAV0Tk+8Aq4DZVbXxaqGFUjDyrhaqGaCZFGBWVOd1o8kQ2dZrJKQuxjnBVfSfwm8DteKapR0Xkn0Xk1EYJZxhVJG61cNnN2yJrONUSN2ApcOng1jLEdSLOSf2FJQvGo6XialRVjUDOoeGRSVVRW3HFVDUSfRqq+hxwNXC1iLwSrz/4F0VklqoenvRew2gGZdYYCohbFezaPTpetC9u9RFlLw+4ZsMjAHxm4JhJr5VNmkmmqr6YMGE5g+5xSvSKqYpBCVXHqZ+GiMwEzgGWALOAm8oUyjDqIRgwApNEuMZQUTNjV4dwVJhqEKIZx6q7Ho19rZlkyV0ognpXNVFyBgojnF9S+30J8lGK/J60K7FKQ0QOEJE/EJFbgV/gNV76DDBHVT/SKAENwxWXGkNFkJQsFiYutj+OQNE1mrQBtJG+mDyDeRY5G60I24Uk89SDeBVo/xn4vqpasXyj0tRTY6geokw5L7y0h+GRyT+RMstJRJlWwnIVFdXVyHIZeSLMsshZ1aCEqpOkNOao6u6GSWIYOclaYygP4eiaSwe38u0Nj0zoJVCm0zXKx7D0hi2MAXvH9pnllt64ZVzeJPt90gA6uHmI3S/vmfRaWdeXZzDPkmPRiXWjiiApesoUhtFSJJmNBDjtyHK6Og5uHuKmTUOTms9IQvfivpiBKW57mKjZ+OiYjiuM8W1+G9Y0k0/cQNk7o5tPfGfrpK58vT3dpZTLGNw8xBSJ6gTtHirrWtajE+tGFYFLRrhhtAS1ZqPwDFKBmzYN0f+aWYUPdHG+lN2jY7ERRnmzjrOYUHbtHk01+cTJo0rkte0/fWopCuMT39ka6dfJcm9ccywsia8+EqOnRKRLRFY2ShjDyEtQfC9qxl6WkzNpAB8Z3cvy1dsmbc9b6C6rCSXN5BMnz7MRfpqk4+UhTvl2iZRWBDD4vuSp3ttppOVp7BWRE0REVJsU1mEYddBIJ2eaL2V4ZJTBzUOTBqQ8WcdJ+R5henu62X/61FT7fZQ8Uau28PuKIu6zGVO1wbxCuORpbAa+54ffnhP8lS2YYeQhS++GvLiE4Ba9wknL9wgI2rCeduTszNnRg5uHeOGlxjnA6/nMWiVLvZ1wURqzgKfxemic6f+9o0yhDCMvjXRyBgP4zBndsfuUscJJm3339faw8t3HAUxy1Atw7gnxK53AvxAOI545oxwHOGT7zAY3D7Hgstv4yHX3WHJeg0l1hKuqVbY1WoJwSGkRLUpdCUw7Cy+/bVKkEeybLRdd5mTmjO7I882c0c2dy7xeaYtWrIvMkl6/fWfsceP8CzOmFe8AD3B1TCf1/qhCxeB2x6VH+GHAF4FFeN+1HwN/qaqPlSybYTgTlbdw06ahhndR+/SZR8dGRUU1BQpkraeOU9yMurtL+PSZR48/r8e/06zENxc/T1rvjyJltNpUk3EJub0K+Dfg3f7z9/rbzihLKKMatMIPpnbmHqYZs86k2XLUjL9eWeNm27093Sw/y1MYi1as4/HhEab4K5owSb6CrIlvWb8reb5baUqhKL9VqxRpbDQuSmO2ql5V8/wbImK1p9qcVvjBNKJFaT3EzZaLLHMSN9vef7r3k45a0dSS5t/JkkeS9buS97uVFK1Wu6rLO+GpasOsZuPiCH9KRN7r52x0ich78RzjRpsyuHmIj12/pfLF3LK2KG12pE3aDHiKyCSZ4mROMh8l5Tu45oRkySPJWvgvb6HAuGi1wEkPFFK91mpTReOy0vgA8CXgCjyfxk+wtq9tS1JWLlTnBzO4eSgxNwImzozrnd3GzVizbof03Iq9qhNkSpI5yXyUlO/w4IrfT7xntbjmkWQdXPMOxmkO8ygzYD0rBKtNFY2L0jhcVc+q3SAii4BHyhHJaCZps/cq/GCCwTSJcERSPaaGuEF748PPcNOmIeftMHEAvuzmbZERT2GZkmSOUkBBfa3123c2dLCLG1x7Y0KQXQbjNPNSkkIraoWQt9RLu+Jinvqi4zajDUj6YVXlB5Ok2MItSgPirmtoeCTWZBU3aK+669FM22vNLgML+5gxLXmuFsiaNPgNLOzj3BP6JiTsBfW1TjtydkML8S1dPJ/urslFBnftHo1sYRuXj3HakbNZtGIdc5et4ZIc+RdFJXbmLfXSrsR+e0Xkt4E3ALNF5KM1Lx0IuHWgMVqOuFlgmfV/XKideSbVswls2kHkUDBLjbsugfHt4ZVB3KAdZ7pzNem5Rv+kzcjXb9856V6MjO5l/fadfO6cY+qOZjqopxsRGN496vTegYV9LF+9LbKfyLc3PDKpSGSUeem0I2dPWKVFXZereanIFUKeUi/tStKUZxrwCn+fA2q2P4fXK9xoQ+J+cM1WGC51loIihVEmpePnHBQ5ACcNTkkKNEuHvfAM1yX6B9IHv7SVSL3hu7WDv6vvJ66woULkYB+WLykcOaAon4eRj1iloap3AHeIyDdU9eEGymRE0KiciSr+4FyipILBNM6ktOGBXc7nGxoeYd6yNfTO6KZ7ijBa06Oip7uLc0/omzArdpGr9vPr6Y62Cs+c0c2nzzx6/F4PLOxj48PPsOquR9mrSpfIeOmPoO9ElPI6qCe+nEkUaffXZZafpAhdBnuXfbKYl2yFUB4ujvCvici7VXUYQERmAteq6uJyRTMCGp0zkfUHV4RCq6erHHjmpdr9L7nunsj9svbeVjybfHeX0NvTzbMjo+Nmm29veITeGd1MnzqFZ0dGncxlS2/YMq58do+OTbqGC06Zw2cGJhYgDJo7BbLvVeWmTZ5dv3Z7mBde3hNZVTeOIgb1pYvnc8l190TeC5fBPq1ScFX8aYabI/zgQGEAqOou4JDyRDLC5I1rL5O0jnD1HmPpDVtYePltzFu2JraTW19vz6Q+CHEDVFfMMaK37mN0r7L/9KlcsWQBL+0ZY9fu0XGF8tKeMa5YsiCxC19g7x8di1ctcXWglq/e5uxwD8uc5bvhOqgnMbCwjwtOmZO5km5AlHM8OJY5oKuFi9IYE5E5wRMReQ2TTcFGDEUklFU5yagIhRbXujQYoLNkNMdF5px/8uGR2y84Zc54dEwccQlzteGvSdFKUQ7iqHPUMrh5KPZ9LqumLN+NtNLutSa2pO/yZwaOGVei4WijtPdGRSpdsWQBD5XcHKnZCZ+tiIt56pPAj0XkDv/5qcBF5YnUPhRlVqpyklERCs113y4RxlQTTWBxPhmAW7Y8Mf5ZhP0H4Dlj45zlSfb6IvxA4c8y7yoyfLwk819Y/qjoKYgOMKh9f/A4rSptlveWSSuUyqkiLqXRvy8ixwOn4K0YL1HVp0qXrA0oqnZNlZOMXBVa0qCVZs8OcM1oDg8+UdFXL4b8CpCtG15AcJ15BryozzLPKjJ8vEsHt/LtDY+MmweiBsc0+fNkWVe1hlNV5ao6qeYpERHgbcDxqnozMENETipdsjagKLNSlZOMXBrnpPk9XDrfQf0rK1cTWu19dsFVcfcmRDPFfZYu1xrUkurt6Wb/afvu3/Sp+37Wg5uHJiiMgKwmxLTkyCSzTp6oqjKpstm3yriYp74CjOF17rsc+DVwE3BiiXK1BUWalaoaQuhimkmb0UWZR154eQ+jeyeGuta7soobtKK2B/LMW7Ym1nEXjthKY/lZR0+IngKvDevKdx8X+36XVU+w8gqXVRkeGR1/vnLtjtjryDI4Jq0Gk8w6g5uHEKKdoM02rxaxSu5EXJTGyap6vIhsBi96SkSmFXFyEXkb8I94GeZfU9UVodenA98ETsCrrLtEVR8q4tyNoMpmpSJJU2guM7ook1JRP9S4ZLwuv6ps1HniBpS+3p7xjniuxGVAr1y7g0uuu2f8ebjLYJDVHTdYB4NbklJOUgxpvbfD8l73n4/GRoGNjO7lY9dvGb+e4D7GKS2BUn4HWb43Lr9P83tMRjQlEkNE7sIrJ3K3rzxmA7ep6sJcJxbpAv4Lr5nTY8DdwPmq+vOafT4MHKuqfyIi5wHvVNUlScft7+/XjRs35hGtUGyWEu9gdhmAa+9f74xuVL3s4yz3cu6yNbGv9XR3RWa/A5EDShFmQZcM99pzRe1f+3rcqihYEcWVT7nglDmR7XDjzifopByTNPnjcjcAHspQcdeFtPsU956k32ee726rISKbVLU/bT+XlcY/Ad8FDhGRz+KVELk0p3wAJwH3q+oDACJyLXA28POafc4GlvuPbwS+JCKiaZquQjTTrFQVhVXviis8CNRWhs0y4+tLKAcSN0MPBoQi719Sl8EwSeY710CCYL+oarhveN2s2Iq8cSuXLATyJ63YiiToARNeUaY5totYJXcaLtFT3xaRTcCb8b5vA6r6iwLO3Qc8WvP8MeDkuH1UdY+IPAu8ErDorRSqtKyuNyS1iPIWEK+04o4dDAhFKnzX+llhOcKK/4olCybJFOf/2P3yHoDI4oX1mrSy8PjwCFcsWVC6ibbMHjBVDndvFklVbtfg9QYfVNXtwPaCzx2VTxX+1F32QUQuws8dmTNnzqQ3dCLNCCdMywXIet6iahbF5SHEDeBlDAgu9bPC9M7ods5vACZVmt2123OIf+6cYyaZUuLKrQSfXdRAOXNGNy+Ojk26DhGIGq9f7WfEQ7m1zMrsAdMpfsksJIXcXgm8A3hIRK4TkYGiHOA+jwGH1zw/DHg8bh8RmQocBDwTPpCqXqmq/araP3v27AJFbF0avawuopxImCLKWwQMLOzjzmWnTygHEkW9A0JaZnHW+97T3YXqZMUWFyo7sLBvvD+4y/5x9613RndsGPWnzzx6QkhyMKOLUhi19zG49+GSL0VRZg+YKoe7N4tYpaGq31PV84E5wHeAC4FHROTrInJGAee+GzhCROb5yug8YHVon9X+ecHzpaxrJX9GMymqEY0recqJxA24ruUtko7hImdAvQOCi8LMet/PPaEvttx4EW1U4xonPf/iPpNW1EA5sLDPe+8UiXVwN3pgTao3VoQcZSu9VsPFpzECXAdcJyLHAlfjDeS5GjH5PoqLgbX+sb6uqttE5HJgo6quBv4V+JaI3I+3wjgvzzk7iUYvq5MGrCSzlYvvJS16Kov/Jk5OgdhomLSAAhdTYNZs85s2DXFQT3dk/amkCYGr/T0opBg+/uiYjgcCxA2OK9fuiA29bUZUURV7wLQzqUpDRF4FvAdvwD4UuAF4fxEnV9VbgVtD2z5V8/hF4N1FnKvTaIQtuZakPtFJA7pL4l+RZSqyOjZdFJJrHkoga/B5vPDSntiihCOje9mve8okh33QBzyK046czTUbHoncHqX4sq5kXF5vRlRRo7/rnU6SI/xDwPnAfDzz1F+r6p2NEszIj8uA6xKW67JP3GwvyS4/sDC+pWoRBQ/jzDJZVmAuCslVEbnUxKpl1+5RFr1uFj/55TPjpqCgD3i4hSpEl1cHWHPvE5HhtVlXMrWvpyUcNpqqVkxoR5Ic4W8AVgCHq+qfm8JoLVxs/C62eFcHd5zDMG02W4TvJcsxsjo2XRSSS/2tKAJZ4np9ABMURkCcryhO1l27RyMVnwh1yR34NMJ0d0lHRxV1CkntXgsxQRmNx9XG7zKLzmL6iZrtxSWzBQN6XPJZnAkmitOOnD2pKF/S4JdlVuqyishjHgn2iVtxZKkb5VotOGB49yhXLFmQWe6oEN+oUvP1UJWEVCMel4xwo8VwHehdZtFZzUdRNYvC/bTD4ZgbH35mwqCfZIKJOt9Nm4YmDK4C47208+JqzspjHgne95GY3IkoolZRSxfPn1QYMe0Y9codVpQzpuUfSqqUkGrE49K5z2gxXAd6F7NOFtNPlCnrmg2PICi9Pd2x5qD123fWXbo7SkHGtU91pda0t3LtDs49oW/cnNXb081+3VO45Lp7Cu30NrCwL7a0hmsL1YGFfUyb6vaTzmtKKiMvp8ptjY19uPTTmBXxF98gwGg6cQO9QmoeRHhAymKvj8uB2D06Nt5POyqUM48zPK3PQ9Y2nlGD4U2bhli6eP6kPuFpA2XWVqJR97q7S9ive9/PdP9pXUyfGq+0XnjZMetcYePDz9Td6rSMAT7te2CtWauBy5ryp3hZ2bvwJj29wBMi8iTwIVXdVKJ8Rh0k5QQk5UFE2ZCz2OuTBvmkEiZ56vskVXENtmcxc6QNhq7+nThTy8aHn4msLFsrW21eyvMv7mGkprKspxTym29GxzS1m18SZVQcSPoemOmqOriURv8q8F1VXes/fyteJ7/rgX9U1XCRwaZStdLozSKtomoZSVhxZaTD53Utxe2SnHXp4NbI3IS4c6ddc1KZcYh2TAtMakMbdy/CDYmSrtPlfsLE61pw2W2xuR8uuH4vyigZnvQ9iPsut2OJ8mbhWhrdxQDaHygMAFW9DThVVTcA03PIaJRIUPogLpizjCSstLIfwew/bNpxDYONMk9k8V24XHOSDyeLfyfuXFl8N66fUe1+y886OjIc1hXXc9YbZpxE0vfASpRXBxfz1DMi8nHgWv/5EmCX30TJrSOL0RQGNw8xJaZrXRlJWMEgf9nN2yYVBIxq+Zkl8zvOPJGlcmyc8z4t2gvghZf28I7jDp30WqAIF61Y59TjIoq4ga93RndsYcW464oyJ0ZdU94WrGVlYcd9D6xEeXVwMU8dDHwaeCPed+3HwGXAs8AcVb2/bCGzYOYpj6Rs40bU5QkPxkkDaJcI5598OJ8ZOCbyvacdOZtVdz0a27I1aruLGSjOHHLuCX2sufeJSQN28Nr67TsZGh5JPEfUseMG6pkzupkxbeqkwdfF1OT6WbqGQle1XlOcGfK9p8wZ/94Y+Sisc5+qPgX8uYgcCIyp6vM1L1dKYRj7iItkKqryZxrhGWOSfX6v6viA0P+aWZNWFEk+i72qkS1bwwP/9IhQ1Din9zUbHonM0h4Z3cv67Tu5c9npkdeT1m0vaqDu7hKef3HPuJy1Dt64bHrY18o1SxJheL/+18xqmUS6ODNkntBqoz5cChYeA3wTmOU/fwq4UFV/VrJsRg7iTB5jqoUODK4ZvC5VXlfd9Sjrt+/MZHLqq+lEVysDeAmCAcMjo86FBoHULnCuRQrTBuqoooUurVKLcP62Ur0m82lUBxefxr8AH1XV9QAi8ia8Bk1vKFEuIyeNsAFnCYOsnXknrTiyDAKB4zVq8Fu0Yl3dhQaTCO6fy/2NU6i1ss5btibyPI1qldoqmE+jOrhET+0fKAwAVf0RsH9pEhmFUEZ0S5isCV5BRFdSgb6DetzyRtPMbPUWGkyi9v6l3V/XjOmkiKyoaKJzT/DKyXdaglsjvs+GGy5K4wER+VsRmev/XQo8WLZgRj6yVnOth3pNBueffHjsa1GVV8P0dHfxD+85LjEkd0qMYgpHGtW2L42iSyTy/qXdX1eFmjYYBor2iiUL2P3yHq7Z8EihpTtahUZ8nw03XMxTH8CLlvoOnv/tDgpqwmSUS702a1c/Rb0mg/7XzIp1bgcO4SAqqs93IMdlUdfKXGvKifJJJBUarCfBMOn+uipUl9DVpEi4pEz7dqOVfDDtjEv01C7gLxogi1EBsvgp6mkpGxw/jSAqyjWiJylabEw1NTqo6LyDrK1X484zuHmIj12/JdYxD+YMNhpLotIQkQuBv8Tr3gfwC+CfVPWbZQtmNIes/TOC97gOtHGDexRZZtFyqCt4AAAVdElEQVRJ0WLhEh9xFDmTLaJHe6BgkxQGmDPYaCxJ7V7/EPgI8FG8ooUCHA+sFBFMcbQnrmaVsAnriiULcg3uefevWnRNESsXFwVrzmCj0SStND4MvFNVH6rZtk5EzsUrKWJKow1xDSWtt+Jo1jDXKSLMW7YmddAtYmZfNHlXLmkKs7enm+Vn5e+WZxhZSIqeOjCkMADwtx1YlkBGc3EJbczTSyFrmOteVadIoSpH19TbByJuldQlwheWLOCeT7+1EtcXYP0uOoPY2lN+HZITsr7WbKz2VH7SoqfmxiSkgVt5i7Sy7RBfU6rVSmHnKfvu+t4q9NXOc51GNSii9tRvici9UccGXlu3ZEblSTOrxA3owIRVQXCsuOMnDTSXxPTLrjXZNHKwrPdcWQILwtQTjtus5kR5rtNoLRKVRsOkMFqKtGgecBswkgbFuJVIYLJp5GCZ51x5ayalKfAsg3WZStZqQ3UOsUpDVR8ObxORd6jqLeWKZFSdPkdntsuAETcopjm2XQfLIgbKPLPosqO6skS7lalkqxa9ZpSHSxmRWi4vRQqjpXB1ZucZMNIc2y6DpWv9pzTyzKKXLp4/qZNe9xQpLKrLtZtgnuAFF6w2VOfgUkaklvr7SBqlEjWjhmIynKOOHfRtfnx4hN4Z3Tz/4h5Gx/aZrYoYMJJMMy4z26Ls7HHn6p3hVlxx0q+mwF+Ra6hx2eajsjr5GdUjq9L441KkMHIRZXpYesMWEBjdq+Pb6jFHxJk1PnfOMROimBodweMyWBY1UC5dPJ+lN24Zv5cBz7+4Z7zHeRwr1+6Y9L7RvVqYg9h1sG6E+chqQ3UGSRnh58RsPwxAVb9TllBGNqJm1LWz/oB6Ztmus/VGDxjBuWr7kYe78xU1UA4s7GP56m2TmiWNjqUP/o1wELvc+7KTH6sQ9ms0hqSVxpn+/0PwGi6t85+fBvwIr+qtUQGyDEBZB6s4h3fW5kVl8eLo2PjjcHe+IgfKuNarQ8MjLFqxrvBKwEVTpvmoDCe7KaHqkhQ99X4AEbkFOEpVn/CfHwp8uTHiGS5kKc2RdbCKy8lIaqTUKNJWQUUOlEn3uOhKwGVR1mqw6ByNquSeGNG4+DTmBgrD51fA60uSx6iDqIGpe4pM8GlAfYNVXE6GS65G2dTbp7se0nqcF1kJuJZWmHEXbYKzRMFq46I0fiQia4FVeAm/5wHrk99iNJK4gSlqW9YfXVxORlK3u0aRx/STdTCuvcdxK464QTJPM6xWmHEXbYKzRMFq49KE6WIReSdwqr/pSlX9brliGVmJG5jyDi5VMq+EqVe2egfj4B4vWrEucpB07W/uSqvMuIv+jlTFD2RE45rc91NgjapeAqwVkQNKlMnIQdGVRqtcPbZe2fImukUl7AG88PKeQiu7tsqMu+jviCUKVpvUlYaIfAi4CJgFvA7oA74KvLlc0YyslGXOKMIvUJZtvh7Z6m00Fcg8sLBvQqhvQJH5F9BaM+4ineyWKFhtXHwafwacBNwFoKr3icghpUpl1EVVzRlVs80X0WhqOKQwAopcBVTZNFg2lihYXVzMUy+p6svBExGZiucQrxsRmSUiPxCR+/z/M2P2+76IDPthvy1Ds5rRVNWcUWbdo3rudRGNplxrPuWhyqZBo3NxWWncISJ/A/SIyBl4bWBvznneZcDtqrpCRJb5zz8esd9KYAYtVL6kmbPqqpozylJmeRzakGz+SJO5iFWAi8nOZtxG1XBRGsuADwJb8QbvW4Gv5Tzv2cCb/MdX42WYT1Iaqnq7iLwpvL3KNNNEVFVzRhZllsX3kbfBUdI+aTIXkX9RJZOdYbjiEnI7Bvxv/68oXhUkDKrqE+3kI2mmiaiqDkRXZZZ1IC3zXrvInGcVUFX/k2GkkVSwcCsJvgtVPTbpwCLyQ+A3Il76pLN0jojIRXgRXsyZM6fow2ei2SaiKpozXJVZ1oG0zHtdtgKuqv/JMNJIWmm8w///Z/7/b/n/LwB2px1YVd8S95qI/EpEDvVXGYcCT7oIm3CuK4ErAfr7+5ta36KqJqJm46LMsg6kZd3rsInsiiULClfEzZ5cGEa9xEZPqerDfsvXRar616q61f9bBizOed7VwIX+4wuB7+U8XmWoN+KlWRFXVSJrRFIZ0UVFdftLwxLYjFZFNKXwnIjcA1ysqj/2n78B+IqqLqj7pCKvBK4H5gCPAO9W1WdEpB/4E1X9I3+/fweOBF4BPA18UFXXJh27v79fN27cWK9oTSFsywdvAOm08Moq3Ie4EiF9vT0Tmk4VQSsUIzQ6BxHZpKr9afu5RE99EPi6iBzkPx8GPpBHOFV9moiMclXdCPxRzfPfyXOeVsGcoh5VcOQ30tdQRf+TYaThEj21CThORA7EW5k8W75YrUXeGWPcgBQ0+OmkmWizB1LzNRhGMqkZ4SJykIj8L7zOfbeLyD/UrDo6niJs4HEDkvjHK9O2bkzEfA2GkYxLGZGvA78G3uP/PQdcVaZQrUQRJTKiBiphcrxzUaU3wpTphG81B7+V7jCMZFx8Gq9T1XNrnl/mO8cNirGBR9nyszb6qZcyM5OrmPVspTsMIx8uK40REXlj8EREFgGWgeRTVOG6gYV93LnsdB5c8fvcuez02M54RdvWyywmWOax66FR4bSG0c64KI0/Bb4sIg+JyEPAl4A/KVWqFqIsG3ijbOtlRgtVLeu5akrMMFoRl+ipe9gXPYWqPle6VC1EWWGijQo/LTNaqGqRSFVTYobRirh07vsfwOdVddh/PhP4mKpeWrZwrUJZNvBG2NbLLHtStZIqVVNihtGKuJinfi9QGACqugt4e3kiGY2kzGihqkUiWTitYeTHpYzIvcCJqvqS/7wH2KiqRzdAvsxUtYyIlYyoBvY5GEY0RZYRuQYvqe8qvNSBD+A1TjIcqWLoaadi4bSGkY9U85Sqfh74DPBbwNHA3/nbDEcsascwjHbBZaUB8Atgj6r+UERmiMgBqvrrMgVrJyxqxzCMdsGl9tSHgBuBf/E39QGDZQrVbhSVAGgYhtFsXKKn/gxYhFdzClW9D2ibnt6NwKJ2DMNoF1zMUy+p6ssiAoCITCWhd7gxmSr0iagKFr1kGK2Ni9K4Q0T+BugRkTOADwM3lytW+2FROxZFZhjtgIt5ahmwE9gK/DFwK2DZ4EZmLIrMMFofl9pTYyIyCAyq6s4GyGS0KRZFZhitT+xKQzyWi8hTwHZgh4jsFJFPNU48o52wKDLDaH2SzFMfwYuaOlFVX6mqs4CTgUUicklDpDPaCosiM4zWJ0lp/CFwvqo+GGxQ1QeA9/qvGUYmqlbA0DCM7CT5NLpV9anwRlXdKSLdJcpktDEWRWYYrU3SSuPlOl8zDMMw2pSklcZxIhLVpU+A/UqSxzAMw6gwsUpDVbviXjMMwzA6E5fkPsMwDMMA3EujG1jdJMMwDFMajljdpGphCtwwmoMpDUeS6iY1a7Dq1IHTFLhhNA/zaThStbpJwcA5NDyCsm/gHNw81BR5GokVPjSM5mFKw5Gq1U3q5IGzagrcMDoJUxqOVK1uUicPnFVT4IbRSZjScKRqdZM6eeCsmgI3jE7CHOEZqFLdpKWL509wBkPnDJzWPtcwmocpjRal0wfOKilww+gkTGm0MDZwGobRaMynYRiGYThjSsMwDMNwpilKQ0RmicgPROQ+///MiH0WiMh/iMg2EblXRJY0Q1bDMAxjH81aaSwDblfVI4Db/edhdgN/qKpHA28DviAivQ2U0TAMwwjRLKVxNnC1//hqYCC8g6r+l6re5z9+HHgSmN0wCQ3DMIxJNEtpvEpVnwDw/x+StLOInARMA34Z8/pFIrJRRDbu3LmzcGENwzAMj9JCbkXkh8BvRLz0yYzHORT4FnChqo5F7aOqVwJXAvT392tGUStDp1atNQyjdShNaajqW+JeE5FficihqvqErxSejNnvQGANcKmqbihJ1Epg5b4Nw2gFmmWeWg1c6D++EPheeAcRmQZ8F/imqt7QQNmaQidXrTUMo3VoltJYAZwhIvcBZ/jPEZF+Efmav897gFOB94nIPf7fguaIWz6dXLXWMIzWoSllRFT1aeDNEds3An/kP74GuKbBojWNV/f2MBShIDqhaq1hGK2DZYRXBCv3bRhGK2AFCytCp1etNQyjNTClUSGsaq1hGFXHzFOGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxpSGYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnBHVlm2pHYmI7AQeLuBQBwNPFXCcZmPXUT3a5VrsOqpHnmt5jarOTtup7ZRGUYjIRlXtb7YcebHrqB7tci12HdWjEddi5inDMAzDGVMahmEYhjOmNOK5stkCFIRdR/Vol2ux66gepV+L+TQMwzAMZ2ylYRiGYTjTsUpDRGaJyA9E5D7//8yY/b4vIsMickto+zdE5EERucf/W9AYySNlzHst80TkLv/914nItMZIPkk+1+u40N/nPhG5sGb7j0RkR81nckjjpAcReZt//vtFZFnE69P9+3u/f7/n1rz2CX/7DhFZ3Ei5w9R7HSIyV0RGau7/VxstexiHazlVRH4qIntE5F2h1yK/Z80g53XsrflMVucWRlU78g/4PLDMf7wM+J8x+70ZOBO4JbT9G8C7mn0dBV3L9cB5/uOvAn9a1esAZgEP+P9n+o9n+q/9COhvkuxdwC+B1wLTgC3AUaF9Pgx81X98HnCd//gof//pwDz/OF0teB1zgZ81Q+4c1zIXOBb4Zu3vOel71krX4b/2fJHydOxKAzgbuNp/fDUwELWTqt4O/LpRQtVJ3dciIgKcDtyY9v4G4HIdi4EfqOozqroL+AHwtgbJl8RJwP2q+oCqvgxci3c9tdRe343Am/37fzZwraq+pKoPAvf7x2sGea6jaqRei6o+pKr3AmOh91bpe5bnOgqnk5XGq1T1CQD/fz2mjM+KyL0icoWITC9WvEzkuZZXAsOqusd//hjQV7B8rrhcRx/waM3zsLxX+cvwv23wQJYm14R9/Pv9LN79d3lvo8hzHQDzRGSziNwhIr9TtrAp5LmvrfaZJLGfiGwUkQ0ikntCODXvAaqMiPwQ+I2Ilz5ZwOE/Afw/vOXilcDHgcsLOG4kJV5L1MBaWkhdAdeRJO8FqjokIgcANwF/gLdcbwQu9zFun4Z+BinkuY4ngDmq+rSInAAMisjRqvpc0UI6kue+ttpnksQcVX1cRF4LrBORrar6y3qFaWuloapviXtNRH4lIoeq6hMicijwZMZjP+E/fElErgL+KoeoLucr61qeAnpFZKo/azwMeDynuLEUcB2PAW+qeX4Yni8DVR3y//9aRP4Nb1nfKKXxGHB4SK7wfQz2eUxEpgIHAc84vrdR1H0d6hnQXwJQ1U0i8kvg9cDG0qWOJs99jf2eNYFc3w9Vfdz//4CI/AhYiOcjqYtONk+tBoKIiAuB72V5sz+oBT6BAeBnhUqXjbqvxf+hrweCiIvM96JAXK5jLfBWEZnpR1e9FVgrIlNF5GAAEekG3kFjP5O7gSP8SLRpeA7icKRK7fW9C1jn3//VwHl+VNI84AjgPxskd5i6r0NEZotIF4A/qz0Cz4HcLFyuJY7I71lJcqZR93X48k/3Hx8MLAJ+nkuaZkQDVOEPzwZ7O3Cf/3+Wv70f+FrNfv8O7ARG8DT+Yn/7OmAr3sB0DfCKFr6W1+INUvcDNwDTK34dH/BlvR94v79tf2ATcC+wDfhHGhyBBLwd+C+8Wdwn/W2XA2f5j/fz7+/9/v1+bc17P+m/bwfwe836LuW5DuBc/95vAX4KnNnM63C8lhP938ILwNPAtqTvWatdB/AGf5za4v//YF5ZLCPcMAzDcKaTzVOGYRhGRkxpGIZhGM6Y0jAMwzCcMaVhGIZhOGNKwzAMw3DGlIbRstRU79wmIltE5KMikvs7LSLLReRzoW0LROQXdRxrgYi8Pac8D4rI/NC2L4jIXye8Z66INDN3yGhTTGkYrcyIqi5Q1aOBM/Bi2T9dwHFXAUtC284D/q2OYy3Ak8sZP8u6lmv98wevT8FLqruuDnkMIxemNIy2QFWfBC4CLhaP/UTkKhHZ6hfQOw1ARGaIyPV+ocnrxOsH0R861g5gWEROrtn8HrzBGxF5q4j8h9+/4AYReYW//UQR+Ym/6vlPETkILwFrib8iWiJez5BB//wbRORY/73LReRKEbmNyaVPVlGjNIBTgYdU9WF/RfHvviw/FZE3hO+NiLxPRL5U8/wWEXlTyrWsEJGf+3L+fcaPw2hj2rr2lNFZqFdbZwpeddz3+tuOEZEjgdtE5PV4vSB2qeqxIvLfgHtiDhcM1HeJyCnA06p6n1+K4VLgLar6goh8HPioiKzAm/kvUdW7ReRAYDfwKbweHxcDiMgXgc2qOiAip+MpiKCB1wnAG1V1JHRd94rImIgcp6pbfLlW+S8/CZyhqi+KyBH+9glKMI6Ea/kS8E7gSFVVEel1OZ7RGZjSMNqNoCLoG4EvAqjqdhF5GK943hvxSoygqj8TkXtjjnMt8BMR+RgTB+lT8Jom3emVHWMa8B/AfOAJVb3bP/ZzADK5Ovsb8cptoKrrROSV/ooEYHVYYdSwCq8+1Ta8Xgqf8rd3A18Sr3PkXv8aXYm7lueAF4Gvicga4JbYIxgdhykNo23wi+TtxZt9x/XScOqxoaqPishDwO/iDfK/XfP+H6jq+aFzH4tbueqkMtcvJLxvFXAbcAdwr2+OA7gE+BVwHJ65+cWI9+5hoil6vxpZJl0LgIichNfp8TzgYrxGXYZhPg2jPRCR2Xitar+kXkG1/wtc4L/2emAOXjHAH+P5JxCRo4BjEg67CrgC+KWqPuZv2wAsEpHf9I8xwz/+duDVInKiv/0A36H9a+CAmmPWyvUm4Cl16DehXv+Dp4EV7Fv1gFeW/AlVHcPrH9IV8faHgAUiMkVEDmdfV8DIa/H9Ggep6q3AR9hnPjMMUxpGS9MThNwCP8SbiV/mv/YVoEtEtuL5Gt6nqi/522f7ZqmP41XFfTbm+DcAR+M7wAFUdSfwPmCVf4wNeLb/l/Eirr4oIlvw2oPuh1d2/qjAEQ4sB/r9965gX4lxF1YBRwLfrdn2FeBCEdmAZ5qKWq3cCTyIV+X07/Eq0MZeC56Su8XfdgfeasYwAKzKrdFZiNfvodt3HL8OrwT76/1B3zCMFMynYXQaM4D14jVqEuBPTWEYhju20jAMwzCcMZ+GYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnPn/Ha/6DktjRNcAAAAASUVORK5CYII=\n",
 60 |       "text/plain": [
 61 |        "<Figure size 432x288 with 1 Axes>"
 62 |       ]
 63 |      },
 64 |      "metadata": {
 65 |       "needs_background": "light"
 66 |      },
 67 |      "output_type": "display_data"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# figure out how many dimensions we need\n",
 72 |     "n = 10;     # vocabulary size\n",
 73 |     "k = 5;      # maximum number of terms to be combined\n",
 74 |     "err = 0.01; # error probability\n",
 75 |     "d = plate_formula(n, k, err);\n",
 76 |     "\n",
 77 |     "dog = embed_gaussian(d, n=1)\n",
 78 |     "agent = embed_gaussian(d, n=1)\n",
 79 |     "chase = embed_gaussian(d, n=1)\n",
 80 |     "verb = embed_gaussian(d, n=1)\n",
 81 |     "cat = embed_gaussian(d, n=1)\n",
 82 |     "patient = embed_gaussian(d, n=1)\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "sentance = (encode(dog, agent) + encode(chase, verb)) / np.sqrt(2)\n",
 86 |     "# devided by sqrt to keep expected lengh = 1\n",
 87 |     "dog_decoded = decode(sentance, agent)\n",
 88 |     "dog_decoded /= np.linalg.norm(dog_decoded)  # normalize the decoded vector for clarity\n",
 89 |     "\n",
 90 |     "plt.scatter(dog, dog_decoded)\n",
 91 |     "print np.corrcoef(dog, dog_decoded)\n",
 92 |     "plt.gca().set_xlabel('Dog Vector Values')\n",
 93 |     "plt.gca().set_ylabel('Decoded-Dog Vector Values')\n",
 94 |     "plt.show()"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "# Compositonality\n",
102 |     "Circular convolution preserves the simliarity structure of the underlying vectors. That is, if two vectors are more are similar to each other in vector space, then their convolutions with a third vector will retain that similarity.  We can show this buy approximating a circular convolution with a tensor product (Plate, 1995; Doumas and Hummel, 2005). \n",
103 |     "\n",
104 |     "Formally, this stems from the observation is that if $\\mathbf{a}$, $\\mathbf{b}$, and $\\mathbf{c}$  are $D$-dimensional random vectors drawn from $\\mathcal{N}(0, \\sigma \\text{I})$ then typically\n",
105 |     "\n",
106 |     "$$\\cos(\\theta_{\\mathbf{a} + \\mathbf{c}, \\mathbf{b} + \\mathbf{c}}) > \\cos(\\theta_{\\mathbf{a}, \\mathbf{b}})$$\n",
107 |     "\n",
108 |     "or\n",
109 |     "\n",
110 |     "$$\\frac{(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c})}{||(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c}) ||} > \\frac{\\mathbf{a}^{\\text{T}}\\mathbf{b}}{||\\mathbf{a}^{\\text{T}}\\mathbf{b}||}$$\n",
111 |     "\n",
112 |     "meaning that the random vectors that share a common (linearly additive) factor are more to each other than the would be if you were to subtract thier common factor.\n",
113 |     "\n",
114 |     "We can see that this generally the case by noting that $(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c}) = \\mathbf{a}^{\\text{T}}\\mathbf{b} + (\\mathbf{a} + \\mathbf{b})^\\text{T}\\mathbf{c} + \\mathbf{c}^\\text{T}\\mathbf{c}$, hense we can re-arange our claim to that typically $\\mathbf{c}^\\text{T}\\mathbf{c} > (\\mathbf{a} + \\mathbf{b})^\\text{T}\\mathbf{c}$, which is true as long as $\\mathbf{c}$ is not strongly anti-correlated to $\\textbf{a}$ and $\\textbf{b}$. Asymptoically, this will be the case as \n",
115 |     "\n",
116 |     "$$\\mathbb{E}[(\\mathbf{a}+\\mathbf{b})^\\text{T}\\mathbf{c}] =\\sum_{i=1}^{D}\\mathbb{E}[a_i]\\mathbb{E}[c_i]+\\sum_{i=1}^{D}\\mathbb{E}[b_i]\\mathbb{E}[c_i] + (r_{ac} + r_{bc})\\sigma^2 = 0$$\n",
117 |     "\n",
118 |     "where $r_{ac}$ and $r_{ac}$ are the correlations between vectors $\\mathbf{a}$ and $\\mathbf{c}$ and vectors $\\mathbf{b}$ and $\\mathbf{c}$, respectively, and is zero for both when $\\mathbf{a}$, $\\mathbf{b}$, $\\mathbf{c}\\sim\\mathcal{N}(0, \\sigma\\text{I})$. Thus, we would expect the presense of a common factor to increase the simliarity of two random vectors.\n",
119 |     "\n",
120 |     "\n",
121 |     "We can be more rigorus with this proof but it's easiest to just to show it is the case empirically, that as we increase the dimensionality of the vecotrs $D$, $\\Pr\\left (\\cos(\\theta_{\\mathbf{a} + \\mathbf{c}, \\mathbf{b} + \\mathbf{c}}) > \\cos(\\theta_{\\mathbf{a}, \\mathbf{b}})\\right )$ approches 1:\n",
122 |     "\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 4,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "Text(0.5,1,'$\\\\Pr\\\\left ( \\\\cos\\\\ \\\\theta_{a+c, b+c} > \\\\cos\\\\ \\\\theta_{a, b} \\\\right )$')"
134 |       ]
135 |      },
136 |      "execution_count": 4,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     },
140 |     {
141 |      "data": {
142 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAACsCAYAAABVXDjrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFspJREFUeJzt3XmUHWWZx/HvL53uhHRn7yRm7wRiyAYEkgADjmE1LBLBBTI6CjpwFIOCzigcPcjgKDKiKAgoamDgABllUDNMWBQTOCImJBACIQSy0FkhnT3dnfT6zB9VjZeb2923k6pb3fc+n3Pu6VtLVz337bpPv/VW1fvKzHDOuah0SzoA51x+8aTinIuUJxXnXKQ8qTjnIuVJxTkXKU8qzrlIeVJxzkXKk4pzLlKeVHJI0i2Srk06Dtd1SFoqaVLScXSEJ5XDJOltSQckVUt6V9J9ksraWH8Q8FngFzmMcaqk5yXVhgfnqFztuxDFVN63ATdHsJ2c8aRyZD5qZmXAicB04NvpK0jqHr69HFhoZgdyEZikEcBC4FZgILA+U3xdkaQhSceQLsbyXgCcIWloBNvKCU8qETCzLcATwGR4rxbzTUkrgZowsZwHPNvyO5JGSnpMUpWknZJ+lrJsgqTFkvZIWiXpopRl35S0RdJ+SWskndVKWD8CfmlmC8JENp8g8eWD+8OawJck9cvmF1or74jKGmIqbzM7CCwHzj3SbeWKJ5UISBoJnA+8nDJ7DnAB0M/MGoEpwJpw/SLgcaASqACGExyESCoG/hd4GhgMXAM8JGm8pPHAXGC6mfUGPgK8nSGePsBs4Fcps7sBByP5wMm7CPg+wRetUtLDks6RlPF4bq28oyjrcPtxl/dq4PiIthU/M/PXYbwIDrBqYA/BwXo3cFTKss+nrd8AHBu+PxWoArpn2O6HgHeAbinzHgFuAo4BtgNnA8VtxHYx0BTG1vI6AMzPYfn8kOALGfd+yoGvAC8BG4G5GdbJWN5RlHVU5d1WeQHfA+Yldax39OU1lSPzMTPrZ2ajzexqe397yaa0dXcDvcP3I4FKC2ow6YYBm8ysOWVeJTDczNYC1xIc9NslzZc0LMM2KoAFYWz9zKwfsAh4siMfTtLijqyfZjLBf9i4tt9iJ7ASWAH0B8ZkWKe18o6irCGa8m6rvHoTJKouwZNKfNI7qlkJfDB8vwkYldKIm2orMDKtKj8K2AJgZg+b2enA6HAft2bYRg+gtmVC0hhgGkGjX8u874ZtCW9IOr0jH0zSpZKeC690fKqV1YYBt0l6SdLVHdl+NvuQNE7Sd4ENwE+BV4GxZvb1DJtrrbyjKGtop7yzLOu2ymsC8Eorv9f5JF1V6qovglOcs7NdBnwNuDd8X0RwkNwGlAI9gdPCZSXAOuB6oBiYCewHjgXGA2cSHMQlwDzg/gz7PwvYTHCgjgReAK5MW6c0/DkduL2Vz7E4w7wTCBqli8PpQ04NCE5JNgEDws+2PNvtZ7OP8HPvAO4ETsrib5WxvKMo62zKu72ybqu8wv3vAoYlfcxn/d1IOoCu+jqMpFIeHngt7S6jgN8TVN93AHekrDuJ4ErRXuB14OJw/nHA0vDA30XQ+HjIwQYIuIegzacSuDpt+WDgQYIq+grgaynLRgGLw9eelPejwuXfBs5pp2zOBG5JmX4i2+1nsw9gBlDSwb9XxvI+0rJur7zbKussy+uTwGNJH+8dKuukAyikF8EVi2s7QRx3ArPC9/Na+wKTuabyI+Aj4fvuKfNHpLy/FviP8P1s4JvZbr+tfXTFV2tlnW15AUuAyUl/jo68FAbuCoikywhqAy8S/Kf+qJm9m2G9xWY2M23eBOA+oA5Yamb/FrZVLDKzD4Xr/JqgjWEIwX/vL5pZfTbbb20fR/BxE5WprAlqSx0ur67Ck4o7YpJmAMeb2S+TjqUryPfy8qTinIuUX1J2zkXKk4pzLlKZbr7q1MrLy62ioiLpMJwrOMuXL99hZoPaWy+2pCJpHnAhsN3MJmdYLoI7Ic8naPm+3Mxeam+7FRUVLFu2LOpwnXPtkFSZzXpxnv7cD8xqY/l5wLjwdRXBzUPOuS4utqRiZs8R3InYmtnAAxb4G9CvK3VE45zLLMk2leG8/0nezeG8bcmEE4+Gpmaamo2exUUANDUbq7ftY8OOGhqbmznt6HIGlJawYtMeXtuy95CnEAHqG5up3FXLlt0HaDajqJs4cVR/Th4zgNXb9vHXdTs50NCU2w/m8sbA0hJ+ctnUyLaXZFJRhnkZb5qRdBXBKRKjRnWdblZXbt7DVx55mYMNzcy7fDof6NuTKx9YxvLK3e9br7SkiJr6tpNCv17FjOzfi+IiUVvfxO1/epOWW4xGD+zFwNKSuD6Gy3NHhf/wopJkUtlM8ERnixEEj6IfwszuBe4FmDZtWqe9W+/ljbu5ZeEbbNlzgFEDerGscheDynoA8KlfvMCA0hLe3XeQ786exLSKATQ1G8++WcXm3Qc47ZiBzKgYQEn3Q89Ii7qJ3j2L3zdvR3UdL1Xu5oNDelNRXpqTz+dcNpJMKguAuZLmAycDe82sS536HGxo4vsLV7Pmnf3UNzXz8sY9DO7dg1PGDqRyZw0fPX4YN144kYMNzVxx/4u8u+8gD195CieN7v/eNiYP73tY+y4v68G5kz4Q1UdxLjJxXlJ+hKB/inJJm4HvEPRZgZn9nKDn8fOBtQSXlK+IK5Y47Kyu418eWMaKTXuYNro/JUXdmHvGMXxx5tGU9Ti0WBfMPY36xmZKMyxzLp/EdoSb2Zx2lhvw5bj2H6cVm/ZwzSMvsX1fHfd8+kRmTW7/olVxUTeKi/wGZpf//N9mB+w/2MB9z7/NHc+8xZA+PZl/1SlMHdW//V90roB4UslCc7Nx29NreOCFSqrrGrlgylC+f/EU+vYqbv+XnSswnlSy8MALb3P34nVcMGUoV/3jWI4fmdX4Vc4VJE8q7Vi7vZpbnniDM8YP4mf/NJXgkSXnXGs8qbSitr6R59fu5Md/fJOjSoq49ePHeUJxLgueVDLYvLuWC+/8C3tqGyjr0Z3bLz2BwX16Jh2Wc12CJ5UM7l68jtq6Ju6/YjqnHj2QHt2jvY3ZuXzmSSXN1j0H+O2yTVw6fSQzxw9OOhznuhy/GyvNz59dB8CXZh6TcCTOdU2eVFJs2XOA+Us38YmTRjC831FJh+Ncl+RJJWRmfOcPr1HUTXz5DK+lOHe4PKmEnlr1Ln9avZ3rzhnHiP69kg7HuS7Lkwqwt7aBmxasYsLQPlxx2pikw3GuSyv4pLL3QAP/PG8Ju2rq+cElU/xJYueOUEF/g2rrG/ncvKWs3raPez5zoj/T41wECvo+lSdfe4cVm/Zw55ypnDVhSNLhOJcXCrqm8rf1O+l7VDEXTPGRQZyLSkEnlSUbdjG9YgDduvmDgs5FpWCTyjt7D1K5s5ZTxg5IOhTn8krBJpUlG3YCcPKYgQlH4lx+KeCksouyHt2ZOKxP0qE4l1diTSqSZklaI2mtpOszLB8t6RlJKyUtljQiznhSLVm/k2kV/Sny9hTnIhVbUpFUBNwFnAdMBOZImpi22m0Eg7QfB9wM3BJXPKmq9texrqrGT32ci0GcNZUZwFozW29m9cB8YHbaOhOBZ8L3izIsj8WiN7YDeCOtczGIM6kMBzalTG8O56V6Bfh4+P5ioLekWKsPTc3GPc+uY8LQPpzgd9A6F7k4k0qmxor0wdX/FfiwpJeBDwNbgMZDNiRdJWmZpGVVVVVHFNTjK7eyYUcNXz3rGO/I2rkYxJlUNgMjU6ZHAFtTVzCzrWZ2iZlNBb4VztubviEzu9fMppnZtEGDBh12QE3Nxp1/Xsv4Ib05d6IPbu5cHOJMKi8C4ySNkVQCXAYsSF1BUrmklhhuAObFGA+L3tjO2u3VXHPWMX4XrXMxiS2pmFkjMBd4ClgN/MbMVkm6WdJF4WozgTWS3gSGAN+LKx6AZZW7KSnq5rUU52IU61PKZrYQWJg278aU948Cj8YZQ6rXt+3jmMFllHQv2Hv+nItdQX27Vm/b53fQOhezgkkq2/cfpGp/HROHelJxLk4Fk1RWb9sPwARPKs7FqmCSyutb9wF4TcW5mBVMUlm9bR/D+x1F317FSYfiXF4rmKTy+rZ9furjXA4URFI52NDE+qpqv/LjXA4URFJZ885+mg0mDu2ddCjO5b2CSCqrtwWNtH7641z8CiKprKuqpkf3boz0MZKdi11BJJUNO2qoGFjqDxE6lwMFk1TGlJcmHYZzBSHvk0pjUzMbd9VS4UnFuZzI6illST2Bq4HTCXpv+wtwj5kdjDG2SGzdc5CGJmOsJxXnciLbrg8eAPYDd4bTc4AHgU/GEVSU1u+oBvCainM5km1SGW9mx6dML5L0ShwBRe3tHTUA3qbiXI5k26bysqRTWiYknQw8H09I0dqwo4ayHt0pLytJOhTnCkK2NZWTgc9K2hhOjwJWS3oVsHAwsE5pw85axpSXes/5zuVItkllVqxRxGjDjmpOGNk/6TCcKxhZJRUzq4w7kDjUNTaxZfcBLp6asyGanSt4eX2fyqZdtTQbjCn32/Ody5VYk4qkWZLWSFor6foMy0dJWiTpZUkrJZ0f5f437KgFYEx5WZSbdc61IbakIqkIuAs4j2Ag9jmSJqat9m2C8YCmEgw2dneUMVTuDC8nD/TLyc7lSpw1lRnAWjNbb2b1wHxgdto6BrT0R9CXtGFRj9SumnqKuok+R8U6vJFzLkWc37bhwKaU6c0El6ZT3QQ8LekaoBQ4O8oAauubKC0p8svJzuVQnDWVTN9kS5ueA9xvZiOA84EHU8ZW/vuGpKskLZO0rKqqKusAqusaKevhtRTncinOpLIZGJkyPYJDT2++APwGwMxeAHoC5ekbMrN7zWyamU0bNGhQ1gHU1DVS6knFuZyKM6m8CIyTNEZSCUFD7IK0dTYCZwFImkCQVLKvirSjuq6RXp5UnMup2JKKmTUCc4GngNUEV3lWSbpZ0kXhal8HrgwfTnwEuNzM0k+RDlttfRNlPYqi2pxzLgux/hs3s4XAwrR5N6a8fx04La7919Q1MrDUb3xzLpfy+o5ab6h1LvfyOqnU1DXSy09/nMup/E4q9U1+9ce5HMvbpNLQ1Ex9YzNlJZ5UnMulvE0qNXWNAF5TcS7H8japVL+XVLxNxblcytukUlPXBHhNxblcy9+kUu+nP84lIX+TSnj64/epOJdbeZ9UepV4m4pzuZS3SaU6bFPxmopzuZW3SaXW21ScS0TeJpVqb1NxLhF5m1Rq6hrpJujRPW8/onOdUt5+42rqgud+vH9a53Irj5OKd3vgXBLyN6nUe/+0ziUhb5NKdV0wPIdzLrfyNql4T/rOJcOTinMuUvmbVOq9oda5JMSaVCTNkrRG0lpJ12dYfrukFeHrTUl7otp3cEnZ21Scy7XY/pVLKgLuAs4hGK3wRUkLwmE5ADCz61LWvwaYGtX+q+saKfWuJJ3LuThrKjOAtWa23szqgfnA7DbWn0MwoNgRa+mf1ttUnMu9OJPKcGBTyvTmcN4hJI0GxgB/bmV5hwZor/Ve35xLTJxJJdP98a0NaXoZ8KiZNWVa2NEB2qvrWx4m9DYV53ItzqSyGRiZMj0C2NrKupcR0akPpHbQ5DUV53ItzqTyIjBO0hhJJQSJY0H6SpLGA/2BF6LasXd74FxyYksqZtYIzAWeAlYDvzGzVZJulnRRyqpzgPlm1tqpUYd5m4pzyYn1W2dmC4GFafNuTJu+Ker9+pg/ziUnL++ofW90Qm9TcS7n8jOpeP+0ziUmP5OK96TvXGLyNKkE/dP2LM7Lj+dcp5aX37pJw/pw2YxR3j+tcwnIy/OD86YM5bwpQ5MOw7mClJc1FedccjypOOci5UnFORcpRXh3fE5IqgIq21ilHNiRo3AOl8cYDY8xGtnGONrM2u0moMsllfZIWmZm05KOoy0eYzQ8xmhEHaOf/jjnIuVJxTkXqXxMKvcmHUAWPMZoeIzRiDTGvGtTcc4lKx9rKs65BOVVUmlv8LIkSBopaZGk1ZJWSfpqOH+ApD9Keiv82T/hOIskvSzp8XB6jKQlYXz/HXYJmihJ/SQ9KumNsDxP7YTleF34d35N0iOSeiZdlpLmSdou6bWUeRnLTYE7wu/QSkkndnR/eZNUUgYvOw+YCMyRNDHZqABoBL5uZhOAU4Avh3FdDzxjZuOAZ8LpJH2VoNvPFrcCt4fx7Qa+kEhU7/dT4EkzOxY4niDeTlOOkoYDXwGmmdlkoIigb+aky/J+YFbavNbK7TxgXPi6Crinw3szs7x4AacCT6VM3wDckHRcGeL8A8GojWuAoeG8ocCaBGMaER5YZwKPEwyvsgPonqlsE4qxD7CBsB0wZX5nKseWsa4GEDys+zjwkc5QlkAF8Fp75Qb8ApiTab1sX3lTU6EDg5clRVIFwdCuS4AhZrYNIPw5OLnI+AnwDaA5nB4I7LGg83LoHGU5FqgC7gtP034lqZROVI5mtgW4DdgIbAP2AsvpfGUJrZfbEX+P8impdGTwspyTVAb8D3Ctme1LOp4Wki4EtpvZ8tTZGVZNuiy7AycC95jZVKCG5E8Z3ydsl5hNMNrmMKCU4HQiXdJl2ZYj/tvnU1LpyOBlOSWpmCChPGRmj4Wz35U0NFw+FNieUHinARdJeptgvOszCWou/SS19LfTGcpyM7DZzJaE048SJJnOUo4AZwMbzKzKzBqAx4B/oPOVJbRebkf8PcqnpJLV4GW5pqD7uV8Dq83sxymLFgCfC99/jqCtJefM7AYzG2FmFQRl9mcz+zSwCPhE0vG1MLN3gE3h4HMAZwGv00nKMbQROEVSr/Dv3hJjpyrLUGvltgD4bHgV6BRgb8tpUtaSatSKqTHqfOBNYB3wraTjCWM6naD6uBJYEb7OJ2i3eAZ4K/w5oBPEOhN4PHw/FlgKrAV+C/ToBPGdACwLy/L3BCNbdqpyBP4deAN4DXgQ6JF0WRIMKbwNaCCoiXyhtXIjOP25K/wOvUpwJatD+/M7ap1zkcqn0x/nXCfgScU5FylPKs65SHlScc5FypOKcy5SnlQKmKQmSSvCp2pfkfQ1Sd3CZdMk3ZFQXH9NYr8uGn5JuYBJqjazsvD9YOBh4Hkz+06ykbmuzGsqDgAz207wqPvc8G7KmSl9q9wk6b8kPS3pbUmXSPpPSa9KejJ8DAFJJ0l6VtJySU+l3Aa+WNKtkpZKelPSh8L5k8J5K8K+O8aF86vDn5L0w7BvklclXRrOnxlus6VvlYfCO1iR9ANJr4fbuy3X5ejydCxld3jMbH14+pPpSd+jgTMI+qp5Afi4mX1D0u+ACyT9H3AnMNvMqsIE8D3g8+HvdzezGZLOB75D8JzMF4GfmtlD4aMVRWn7vITgLtrjCcameVHSc+GyqcAkgudSngdOk/Q6cDFwrJmZpH5HXCiuwzypuHSZnlIFeMLMGiS9SvDlfzKc/ypBXx3jgcnAH8NKQxHBreEtWh6kXB6uD0Fy+pakEcBjZvZW2j5PBx4xsyaCB+CeBaYD+4ClZrYZQNKKcJt/Aw4CvwqT3OMd+uQuEn76494jaSzQROYnfesAzKwZaLC/N8Y1E/xzErDKzE4IX1PM7Nz03w+33z3c1sPARcAB4ClJZ6aH1Ea4dSnvmwhqQo3ADIInwj/G3xOfyyFPKg4ASYOAnwM/s8NrvV8DDJJ0ari9YkmT2tnnWGC9md1B8HTscWmrPAdcqqD/3EHAPxI8mNfa9sqAvma2ELiW4NTJ5Zif/hS2o8JTh2KCvnQfBH7c9q9kZmb1kj4B3CGpL8Gx9RNgVRu/dinwGUkNwDvAzWnLf0fQ/eIrBE96f8PM3pF0bCvb6w38QVJPglrOdYfzWdyR8UvKzrlI+emPcy5SnlScc5HypOKci5QnFedcpDypOOci5UnFORcpTyrOuUh5UnHORer/AXFIk8CSR8YOAAAAAElFTkSuQmCC\n",
143 |       "text/plain": [
144 |        "<Figure size 288x144 with 1 Axes>"
145 |       ]
146 |      },
147 |      "metadata": {
148 |       "needs_background": "light"
149 |      },
150 |      "output_type": "display_data"
151 |     }
152 |    ],
153 |    "source": [
154 |     "N = 1000\n",
155 |     "dot = [None]* 100\n",
156 |     "for d in range(1, 101):\n",
157 |     "    a = (np.random.randn(N, d))\n",
158 |     "    b = (np.random.randn(N, d))\n",
159 |     "    c = (np.random.randn(N, d))\n",
160 |     "    f = np.array([np.dot(a[ii, :] + c[ii, :], b[ii, :] + c[ii, :]) - np.dot(a[ii, :], b[ii, :])\n",
161 |     "        for ii in range(N)]) \n",
162 |     "    dot[d-1] = np.mean(f >= 0)\n",
163 |     "\n",
164 |     "plt.figure(figsize=(4, 2))    \n",
165 |     "plt.plot(range(1, 101), dot)\n",
166 |     "plt.xlabel('Dimensions')\n",
167 |     "plt.ylabel('p')\n",
168 |     "plt.title(r'$\\Pr\\left ( \\cos\\ \\theta_{a+c, b+c} > \\cos\\ \\theta_{a, b} \\right )$')"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "We can extend this arguements to tensor products by first noting that tensor products are distributive, so:\n",
176 |     "\n",
177 |     "$$(\\mathbf{x} + \\mathbf{y}) \\otimes\\mathbf{z} = \\mathbf{x}\\otimes\\mathbf{z} + \\mathbf{y}\\otimes\\mathbf{z}$$\n",
178 |     "\n",
179 |     "Thus, if we make two random vectors $\\mathbf{a}$ and $\\mathbf{b}$ similar to eachother by adding to each a common factor $\\mathbf{d}$, then taking the tensor product of each of those two vectors with a third random vector $\\mathbf{c}$, we can decompose both tensor products into the sum of two seperate tensors:\n",
180 |     "\n",
181 |     "$$(\\mathbf{a} + \\mathbf{d}) \\otimes\\mathbf{c} = \\mathbf{a}\\otimes\\mathbf{c} + \\mathbf{d}\\otimes\\mathbf{c}$$\n",
182 |     "$$(\\mathbf{b} + \\mathbf{d}) \\otimes\\mathbf{c} = \\mathbf{b}\\otimes\\mathbf{c} + \\mathbf{d}\\otimes\\mathbf{c}$$\n",
183 |     "\n",
184 |     "Thus, both tensors share a common tensor. Then, by the arguments above we can show that:\n",
185 |     "\n",
186 |     "$$\\cos \\theta_{(\\mathbf{a} + \\mathbf{d}) \\otimes\\mathbf{c}, (\\mathbf{b} + \\mathbf{d}) \\otimes\\mathbf{c}} > \\cos \\theta_{\\mathbf{a} \\otimes\\mathbf{c}, \\mathbf{b}\\otimes\\mathbf{c} }$$\n",
187 |     "\n",
188 |     "will be true with probabilty approaching 1 as the dimensionality of the vectors goes to infinity. Thus, taking the tensor product of two similar vectors and a third random vector will result in two similar tensor products. Because circular convolution resembles a tensor product opperation (Plate, 1995; Doumas and Hummel, 2005) this argumemt will hold for it as well. Without getting into a rigorous proof of this, we can demonstrate this empirically:\n"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 5,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "name": "stdout",
198 |      "output_type": "stream",
199 |      "text": [
200 |       "Dot Product:\n",
201 |       "\n",
202 |       "dot(Olivia, William) = 0.590\n",
203 |       "dot(Olivia, Coffee)  = 0.036\n",
204 |       "dot(Coffee, William) = 0.045\n",
205 |       "\n",
206 |       "dot(Olivia(*)Agent, William(*)Agent) = 0.549\n",
207 |       "dot(Olivia(*)Agent, Coffee(*)Agent)  = 0.075\n",
208 |       "dot(Coffee(*)Agent, William(*)Agent) = 0.056\n",
209 |       "\n",
210 |       "Euclidean Distance:\n",
211 |       "\n",
212 |       "||Olivia - William|| = 0.952\n",
213 |       "||Olivia - Coffee || = 1.362\n",
214 |       "||Coffee - William|| = 1.332\n",
215 |       "\n",
216 |       "||Olivia(*)Agent - William(*)Agent|| = 0.987\n",
217 |       "||Olivia(*)Agent - Coffee(*)Agent || = 1.440\n",
218 |       "||Coffee(*)Agent - William(*)Agent|| = 1.497\n"
219 |      ]
220 |     }
221 |    ],
222 |    "source": [
223 |     "from sklearn.preprocessing import normalize\n",
224 |     "\n",
225 |     "# both Olivia and William will share the property isPerson\n",
226 |     "isPerson = embed_gaussian(d)\n",
227 |     "\n",
228 |     "Olivia = (embed_gaussian(d) + isPerson) / np.sqrt(2)\n",
229 |     "William = (embed_gaussian(d) + isPerson) / np.sqrt(2)\n",
230 |     "Agent = embed_gaussian(d)\n",
231 |     "Coffee = embed_gaussian(d)\n",
232 |     "\n",
233 |     "\n",
234 |     "OliviaAgent = encode(Olivia, Agent)\n",
235 |     "WilliamAgent = encode(William, Agent)\n",
236 |     "CoffeeAgent = encode(Coffee, Agent)\n",
237 |     "\n",
238 |     "print \"Dot Product:\"\n",
239 |     "print \n",
240 |     "print \"dot(Olivia, William) = %.3f\" % np.dot(OliviaAgent, WilliamAgent.T)[0][0]\n",
241 |     "print \"dot(Olivia, Coffee)  = %.3f\" % np.dot(CoffeeAgent, WilliamAgent.T)[0][0]\n",
242 |     "print \"dot(Coffee, William) = %.3f\" % np.dot(OliviaAgent, CoffeeAgent.T)[0][0]\n",
243 |     "\n",
244 |     "\n",
245 |     "print \n",
246 |     "print \"dot(Olivia(*)Agent, William(*)Agent) = %.3f\" % np.dot(Olivia, William.T)[0][0]\n",
247 |     "print \"dot(Olivia(*)Agent, Coffee(*)Agent)  = %.3f\" % np.dot(Coffee, William.T)[0][0]\n",
248 |     "print \"dot(Coffee(*)Agent, William(*)Agent) = %.3f\" % np.dot(Olivia, Coffee.T)[0][0]\n",
249 |     "\n",
250 |     "\n",
251 |     "print\n",
252 |     "print \"Euclidean Distance:\"\n",
253 |     "print \n",
254 |     "print \"||Olivia - William|| = %.3f\" % np.linalg.norm(Olivia - William)\n",
255 |     "print \"||Olivia - Coffee || = %.3f\" % np.linalg.norm(Olivia - Coffee)\n",
256 |     "print \"||Coffee - William|| = %.3f\" % np.linalg.norm(Coffee - William)\n",
257 |     "\n",
258 |     "print\n",
259 |     "print \"||Olivia(*)Agent - William(*)Agent|| = %.3f\" % np.linalg.norm(OliviaAgent - WilliamAgent)\n",
260 |     "print \"||Olivia(*)Agent - Coffee(*)Agent || = %.3f\" % np.linalg.norm(CoffeeAgent - WilliamAgent)\n",
261 |     "print \"||Coffee(*)Agent - William(*)Agent|| = %.3f\" % np.linalg.norm(OliviaAgent - CoffeeAgent)\n",
262 |     "\n"
263 |    ]
264 |   }
265 |  ],
266 |  "metadata": {
267 |   "kernelspec": {
268 |    "display_name": "Python 2",
269 |    "language": "python",
270 |    "name": "python2"
271 |   },
272 |   "language_info": {
273 |    "codemirror_mode": {
274 |     "name": "ipython",
275 |     "version": 2
276 |    },
277 |    "file_extension": ".py",
278 |    "mimetype": "text/x-python",
279 |    "name": "python",
280 |    "nbconvert_exporter": "python",
281 |    "pygments_lexer": "ipython2",
282 |    "version": "2.7.15"
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 1
287 | }
288 | 


--------------------------------------------------------------------------------
/Tutorials/Readme.md:
--------------------------------------------------------------------------------
 1 | # Tutorials    
 2 | 
 3 | 
 4 | There are a few prepackaged tutorials in Jupyter notebooks meant to demonstrate basic functions of the model. They can
 5 | all be run in Google colab (the accompanying jupyter notebooks have been pre-run and can be viewed in GitHub)
 6 | 
 7 | * Demo - Segmentation and Memory Tutorial
 8 | 
 9 |     This brief tutorial walks through some basic functions of segmentation and the memory model in a toy 2-d world.  This also includes a comparison between SEM and an HMM in Memory.
10 |     
11 |     <a href="https://colab.research.google.com/github/ProjectSEM/SEM/blob/master/Tutorials/Demo - Segmentation and Memory Tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
12 | 
13 |     &nbsp;
14 | 
15 | 
16 | 
17 | * Demo - Toy Data (Segmentation)
18 | 
19 |     These simulations demonstrate how SEM can segement simple, 2D dynamical systems with
20 |     various different methods of estimating the event dynamics of the system.
21 |     
22 |     <a href="https://colab.research.google.com/github/ProjectSEM/SEM/blob/master/Tutorials/Demo - Toy Data (Segmentation).ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
23 |      
24 |      &nbsp;
25 | 
26 | * Demo - HRR
27 | 
28 |     Demonstration of the Holographic reduced representation
29 |     
30 |     <a href="https://colab.research.google.com/github/ProjectSEM/SEM/blob/master/Tutorials/Demo - HRR.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
31 |     
32 |     &nbsp;
33 | 
34 | * Demo - Motion Capture Data.ipynb
35 |     
36 |     Simulations of the SEM model on the 3D motion capture data. 
37 |     
38 |     <a href="https://colab.research.google.com/github/ProjectSEM/SEM/blob/master/Tutorials/Demo - Motion Capture Data.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
39 |     
40 |     &nbsp;
41 | 


--------------------------------------------------------------------------------
/data/motion_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/data/motion_data.pkl


--------------------------------------------------------------------------------
/data/videodata/video_color_Z_embedded_64_5epoch.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/data/videodata/video_color_Z_embedded_64_5epoch.npy


--------------------------------------------------------------------------------
/data/zachs_2006_young_unwarned.csv:
--------------------------------------------------------------------------------
  1 | 1.0582010582010568, 0
  2 | 1.5873015873015888, 0
  3 | 2.6455026455026456, 0.0842105263157894
  4 | 3.9682539682539684, 0
  5 | 4.761904761904766, 0
  6 | 5.820105820105823, 0.24736842105263168
  7 | 6.87830687830688, 0
  8 | 7.936507936507937, 0.16842105263157903
  9 | 8.730158730158735, 0.16842105263157903
 10 | 9.788359788359784, 0
 11 | 10.846560846560848, 0
 12 | 11.904761904761905, 0.16842105263157903
 13 | 12.962962962962962, 0.0842105263157894
 14 | 13.75661375661376, 0.0842105263157894
 15 | 14.814814814814817, 0.0052631578947368585
 16 | 15.873015873015873, 0.24736842105263168
 17 | 16.931216931216937, 0.16842105263157903
 18 | 17.724867724867728, 0.24736842105263168
 19 | 19.841269841269842, 0.0052631578947368585
 20 | 21.957671957671955, 0.16315789473684206
 21 | 23.80952380952381, 0.010526315789473717
 22 | 25.925925925925924, 0.3263157894736841
 23 | 26.984126984126988, 0.0842105263157894
 24 | 28.835978835978835, 0.0842105263157894
 25 | 29.8941798941799, 0.16315789473684206
 26 | 30.952380952380963, 0.0052631578947368585
 27 | 32.01058201058201, 0.0842105263157894
 28 | 32.80423280423281, 0.0052631578947368585
 29 | 34.920634920634924, 0.16315789473684206
 30 | 36.77248677248677, 0.0052631578947368585
 31 | 37.830687830687836, 0.16842105263157903
 32 | 38.8888888888889, 0.09473684210526312
 33 | 39.682539682539684, 0.24736842105263168
 34 | 41.005291005291014, 0.0052631578947368585
 35 | 41.79894179894181, 0.0842105263157894
 36 | 42.85714285714286, 0.0842105263157894
 37 | 43.915343915343925, 0.0052631578947368585
 38 | 44.973544973544975, 0.0842105263157894
 39 | 46.82539682539684, 0.0842105263157894
 40 | 47.883597883597886, 0.24736842105263168
 41 | 50, 0.0052631578947368585
 42 | 69.84126984126985, 0
 43 | 70.89947089947091, 0.2578947368421052
 44 | 71.95767195767196, 0.3315789473684211
 45 | 73.01587301587303, 0.17368421052631577
 46 | 74.07407407407408, 0.2421052631578947
 47 | 74.86772486772486, 0
 48 | 79.89417989417991, 0
 49 | 80.95238095238095, 0.1578947368421053
 50 | 82.01058201058203, 0.0052631578947368585
 51 | 82.80423280423281, 0.0842105263157894
 52 | 83.86243386243387, 0
 53 | 85.97883597883599, 0
 54 | 87.03703703703704, 0.3315789473684211
 55 | 87.83068783068785, 0.07894736842105265
 56 | 88.88888888888889, 0
 57 | 89.94708994708996, 0.0842105263157894
 58 | 91.00529100529101, 0
 59 | 93.12169312169314, 0
 60 | 93.9153439153439, 0.0842105263157894
 61 | 96.03174603174602, 0.0842105263157894
 62 | 97.08994708994709, 0
 63 | 98.14814814814815, 0
 64 | 98.94179894179896, 0.0842105263157894
 65 | 100, 0.0052631578947368585
 66 | 101.05820105820106, 0.16315789473684206
 67 | 102.11640211640213, 0.0842105263157894
 68 | 102.91005291005291, 0.0842105263157894
 69 | 105.02645502645504, 0.3315789473684211
 70 | 106.08465608465607, 0.16842105263157903
 71 | 106.87830687830689, 0.24736842105263168
 72 | 107.93650793650795, 0.0052631578947368585
 73 | 108.99470899470901, 0.0842105263157894
 74 | 110.05291005291005, 0.0052631578947368585
 75 | 111.11111111111111, 0.0842105263157894
 76 | 111.9047619047619, 0.0052631578947368585
 77 | 112.96296296296299, 0.0842105263157894
 78 | 114.02116402116403, 0
 79 | 115.07936507936509, 0
 80 | 116.13756613756615, 0.0842105263157894
 81 | 116.93121693121694, 0.0842105263157894
 82 | 117.989417989418, 0.0052631578947368585
 83 | 119.04761904761907, 0.0842105263157894
 84 | 120.1058201058201, 0.0052631578947368585
 85 | 120.89947089947091, 0.5
 86 | 121.95767195767198, 0
 87 | 123.01587301587301, 0
 88 | 124.07407407407408, 0.0842105263157894
 89 | 125.13227513227514, 0.0842105263157894
 90 | 125.92592592592592, 0.0052631578947368585
 91 | 126.98412698412699, 0.0842105263157894
 92 | 128.04232804232805, 0.0052631578947368585
 93 | 129.1005291005291, 0.0842105263157894
 94 | 130.15873015873018, 0.0842105263157894
 95 | 131.21693121693121, 0
 96 | 135.978835978836, 0
 97 | 138.0952380952381, 0.4157894736842105
 98 | 140.21164021164023, 0
 99 | 146.03174603174605, 0
100 | 147.0899470899471, 0.16842105263157903
101 | 148.14814814814818, 0
102 | 155.02645502645504, 0
103 | 156.08465608465607, 0.0842105263157894
104 | 157.14285714285717, 0
105 | 161.11111111111111, -0.0052631578947368585
106 | 162.16931216931218, 0.42105263157894735
107 | 163.22751322751324, 0.0052631578947368585
108 | 165.0793650793651, 0.16842105263157903
109 | 166.13756613756615, 0.08947368421052637
110 | 167.19576719576722, 0.24736842105263168
111 | 168.1216931216931, 0.0842105263157894
112 | 169.1798941798942, 0.1657894736842105
113 | 170.1058201058201, 0.0052631578947368585
114 | 173.01587301587304, 0.3315789473684211
115 | 174.07407407407408, 0.16842105263157903
116 | 175.13227513227514, 0.16842105263157903
117 | 176.1904761904762, 0
118 | 184.12698412698413, 0
119 | 


--------------------------------------------------------------------------------
/data/zachs_2006_young_warned.csv:
--------------------------------------------------------------------------------
  1 | 0.9043209876543195, 0
  2 | 4.515432098765427, 0
  3 | 5.444444444444439, 0.3277777777777775
  4 | 6.638888888888889, 0.1777777777777776
  5 | 7.712962962962958, 0.24444444444444424
  6 | 9.787037037037042, 0.011111111111111072
  7 | 10.58024691358025, 0.08333333333333304
  8 | 11.69135802469135, 0.08333333333333304
  9 | 12.481481481481485, 0.16111111111111098
 10 | 13.67901234567901, 0.005555555555555314
 11 | 14.512345679012345, 0.005555555555555314
 12 | 15.537037037037035, 0.16111111111111098
 13 | 16.685185185185187, 0.09444444444444433
 14 | 17.296296296296294, 0.49444444444444424
 15 | 18.682098765432098, 0
 16 | 19.37962962962963, 0.24444444444444424
 17 | 20.62654320987654, 0
 18 | 21.737654320987655, 0
 19 | 22.524691358024693, 0.08333333333333304
 20 | 23.635802469135808, 0.08333333333333304
 21 | 24.65740740740741, 0.24444444444444424
 22 | 27.57098765432099, 0
 23 | 28.682098765432098, 0
 24 | 29.42592592592592, 0.16111111111111098
 25 | 31.456790123456788, 0.005555555555555314
 26 | 32.52777777777777, 0.07777777777777772
 27 | 33.67901234567901, 0.005555555555555314
 28 | 34.422839506172835, 0.16666666666666652
 29 | 35.53395061728395, 0.16666666666666652
 30 | 37.5679012345679, 0.005555555555555314
 31 | 38.67901234567901, 0.005555555555555314
 32 | 39.42283950617285, 0.16666666666666652
 33 | 40.580246913580254, 0.08333333333333304
 34 | 41.41358024691358, 0.08333333333333304
 35 | 42.478395061728406, 0.16666666666666652
 36 | 43.404320987654316, 0
 37 | 45.62654320987653, 0
 38 | 48.26543209876543, 0.24999999999999978
 39 | 49.51543209876543, 0
 40 | 65.34876543209876, 0
 41 | 66.41666666666667, 0.07777777777777772
 42 | 67.57098765432097, 0
 43 | 69.51543209876543, 0
 44 | 71.22839506172838, 0.4166666666666665
 45 | 72.5246913580247, 0.08333333333333304
 46 | 74.19135802469135, 0.08333333333333304
 47 | 77.29320987654322, 0
 48 | 75.62654320987654, 0
 49 | 78.36111111111111, 0.07777777777777772
 50 | 79.23765432098764, 0
 51 | 81.1820987654321, 0
 52 | 82.1574074074074, 0.24444444444444424
 53 | 83.4043209876543, 0
 54 | 84.23765432098766, 0
 55 | 86, 0.3277777777777775
 56 | 88.34876543209877, 0.09999999999999987
 57 | 89.14814814814815, 0.16111111111111098
 58 | 90.34876543209876, 0
 59 | 94.23765432098764, 0
 60 | 95.25925925925927, 0.16111111111111098
 61 | 96.45987654320987, 0
 62 | 100.070987654321, 0
 63 | 101.0925925925926, 0.16111111111111098
 64 | 102.2932098765432, 0
 65 | 103.26851851851852, 0.24444444444444424
 66 | 104.10185185185186, 0.24444444444444424
 67 | 106.45987654320989, 0
 68 | 107.2932098765432, 0
 69 | 108.26543209876543, 0.24999999999999978
 70 | 109.46913580246915, 0.08333333333333304
 71 | 111.96913580246915, 0.08333333333333304
 72 | 113.12654320987654, 0
 73 | 118.95987654320986, 0
 74 | 120.02777777777779, 0.5777777777777777
 75 | 122.01234567901234, 0.005555555555555314
 76 | 121.13888888888887, 0.07777777777777772
 77 | 123.08333333333333, 0.07777777777777772
 78 | 124.23765432098766, 0
 79 | 125.06790123456788, 0.005555555555555314
 80 | 126.08950617283953, 0.16666666666666652
 81 | 127.57098765432097, 0
 82 | 128.9567901234568, 0.005555555555555314
 83 | 129.9814814814815, 0.16111111111111098
 84 | 131.17592592592592, 0.011111111111111072
 85 | 131.97530864197532, 0.07222222222222197
 86 | 133.12345679012347, 0.005555555555555314
 87 | 134.19444444444443, 0.07777777777777772
 88 | 135.070987654321, 0
 89 | 136.1820987654321, 0
 90 | 136.87962962962965, 0.24444444444444424
 91 | 138.12654320987656, 0
 92 | 138.9598765432099, 0
 93 | 140.0277777777778, 0.07777777777777772
 94 | 141.1820987654321, 0
 95 | 146.1820987654321, 0
 96 | 146.96913580246914, 0.08333333333333304
 97 | 148.08024691358028, 0.08333333333333304
 98 | 149.23765432098767, 0
 99 | 150.90432098765433, 0
100 | 151.97222222222226, 0.07777777777777772
101 | 153.12654320987656, 0
102 | 154.79320987654322, 0
103 | 155.81172839506175, 0.16666666666666652
104 | 157.01543209876544, 0
105 | 158.1265432098766, 0
106 | 158.91666666666669, 0.07777777777777772
107 | 160.070987654321, 0
108 | 161.78703703703707, 0.411111111111111
109 | 162.8456790123457, 0.005555555555555314
110 | 163.91358024691363, 0.08333333333333304
111 | 165.0246913580247, 0.08333333333333304
112 | 165.67283950617286, 0.4166666666666665
113 | 166.96913580246917, 0.08333333333333304
114 | 168.08024691358028, 0.08333333333333304
115 | 168.9598765432099, 0
116 | 169.79320987654322, 0
117 | 170.76234567901238, 0.2555555555555553
118 | 171.83641975308643, 0.3222222222222222
119 | 172.7037037037037, 0.26111111111111107
120 | 173.7808641975309, 0.3222222222222222
121 | 175.07098765432102, 0
122 | 182.8487654320988, 0
123 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: sem
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 | #  - appnope=0.1.0=py27_0
  7 |   - backports=1.0=py27_1
  8 |   - backports.functools_lru_cache=1.5=py_1
  9 |   - backports.shutil_get_terminal_size=1.0.0=py_3
 10 |   - backports_abc=0.5=py27_0
 11 |   - blas=1.1=openblas
 12 |   - bleach=2.1.3=py_0
 13 |   - ca-certificates=2018.4.16=0
 14 |   - certifi=2018.4.16=py27_0
 15 |   - configparser=3.5.0=py27_0
 16 |   - cycler=0.10.0=py_1
 17 |   - decorator=4.3.0=py_0
 18 |   - entrypoints=0.2.3=py27_1
 19 |   - enum34=1.1.6=py27_1
 20 |   - freetype=2.8.1=0
 21 |   - functools32=3.2.3.2=py27_2
 22 |   - futures=3.2.0=py27_0
 23 |   - html5lib=1.0.1=py_0
 24 |   - ipykernel=4.8.2=py27_0
 25 |   - ipython=5.7.0=py27_0
 26 |   - ipython_genutils=0.2.0=py_1
 27 |   - ipywidgets=7.2.1=py27_1
 28 |   - jinja2=2.10=py_1
 29 |   - jsonschema=2.6.0=py27_1
 30 |   - jupyter_client=5.2.3=py_1
 31 |   - jupyter_core=4.4.0=py_0
 32 |   - kiwisolver=1.0.1=py27_1
 33 |   - libgfortran # =3.0.0=0
 34 |   - libpng=1.6.34=ha92aebf_1
 35 |   - libsodium=1.0.16=0
 36 |   - markupsafe=1.0=py27_0
 37 |   - matplotlib=2.2.2=py27_1
 38 |   - mistune=0.8.3=py27_1
 39 |   - nbconvert=5.3.1=py_1
 40 |   - nbformat=4.4.0=py27_0
 41 |   - ncurses=5.9=10
 42 |   - notebook=5.5.0=py27_0
 43 |   - numpy=1.14.5=py27_blas_openblashd3ea46f_201
 44 |   - openblas=0.2.20=8
 45 |   - openssl=1.0.2o=0
 46 |   - pandas=0.23.3=py27_0
 47 | #  - pandoc=2.2.1=hde52d81_0
 48 |   - pandocfilters=1.4.2=py27_0
 49 |   - pathlib2=2.3.2=py27_0
 50 |   - patsy=0.5.0=py_1
 51 |   - pexpect=4.6.0=py27_0
 52 |   - pickleshare=0.7.4=py27_0
 53 |   - pip=9.0.3=py27_0
 54 |   - prompt_toolkit=1.0.15=py27_0
 55 |   - ptyprocess=0.6.0=py27_0
 56 |   - pygments=2.2.0=py_1
 57 |   - pyparsing=2.2.0=py_1
 58 |   - python=2.7.15=0
 59 |   - python-dateutil=2.7.3=py_0
 60 |   - pytz=2018.5=py_0
 61 |   - pyzmq=17.0.0=py27_4
 62 |   - readline=7.0=0
 63 |   - scandir=1.7=py27_0
 64 |   - scikit-learn # =0.19.1=py27_blas_openblas_201
 65 |   - scipy=1.1.0=py27_blas_openblas_200
 66 |   - seaborn=0.8.1=py_1
 67 |   - send2trash=1.5.0=py_0
 68 |   - setuptools=40.0.0=py27_0
 69 |   - simplegeneric=0.8.1=py_1
 70 |   - singledispatch=3.4.0.3=py27_0
 71 |   - six=1.11.0=py27_1
 72 |   - sqlite=3.20.1=2
 73 |   - statsmodels=0.9.0=py27_0
 74 |   - subprocess32=3.5.2=py27_0
 75 |   - terminado=0.8.1=py27_0
 76 |   - testpath=0.3.1=py27_0
 77 |   - tk=8.6.7=0
 78 |   - tornado=5.0.2=py27_0
 79 |   - tqdm=4.23.4=py_0
 80 |   - traitlets=4.3.2=py27_0
 81 |   - wcwidth=0.1.7=py_1
 82 | #  - webencodings=0.5.1=py27_0
 83 |   - wheel=0.31.1=py27_0
 84 |   - widgetsnbextension=3.2.1=py27_0
 85 |   - zeromq=4.2.5=hfc679d8_3
 86 |   - zlib=1.2.11=h470a237_3
 87 | #  - anaconda=custom=py27h2cfa9e9_0
 88 |   - pip:
 89 |     - absl-py==0.2.2
 90 |     - astor==0.7.1
 91 |     - backports.weakref==1.0.post1
 92 |     - edward==1.3.5
 93 |     - funcsigs==1.0.2
 94 |     - gast==0.2.0
 95 |     - grpcio==1.13.0
 96 |     - h5py==2.8.0
 97 |     - keras==2.2.0
 98 |     - keras-applications==1.0.2
 99 |     - keras-preprocessing==1.0.1
100 |     - markdown==2.6.11
101 |     - mock==2.0.0
102 |     - pbr==4.1.0
103 |     - protobuf==3.6.0
104 |     - pyyaml==3.13
105 |     - tensorboard==1.9.0
106 |     - tensorflow==1.9.0
107 |     - termcolor==1.1.0
108 |     - werkzeug==0.14.1
109 | #prefix: /anaconda3/envs/sem
110 | 
111 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from event_models import *
2 | from sem import *
3 | from memory import *


--------------------------------------------------------------------------------
/models/event_models.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from utils import unroll_data
  4 | import keras
  5 | from keras.models import Sequential
  6 | from keras.layers import Dense, Activation, SimpleRNN, GRU, Dropout, LSTM, LeakyReLU, Lambda
  7 | from keras.initializers import glorot_uniform  # Or your initializer of choice
  8 | from keras import regularizers
  9 | from keras.optimizers import *
 10 | from models.utils import fast_mvnorm_diagonal_logprob
 11 | 
 12 | print("TensorFlow Version: {}".format(tf.__version__))
 13 | print("Keras      Version: {}".format(keras.__version__))
 14 | 
 15 | config = tf.ConfigProto()
 16 | config.intra_op_parallelism_threads = 4
 17 | config.inter_op_parallelism_threads = 4
 18 | tf.Session(config=config)
 19 | 
 20 | 
 21 | # run a check that tensorflow works on import
 22 | def check_tf():
 23 |     a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
 24 |     b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
 25 |     c = tf.matmul(a, b)
 26 | 
 27 |     with tf.Session() as sess:
 28 |         sess.run(c)
 29 |     print "TensorFlow Check Passed"
 30 | check_tf()
 31 | 
 32 | 
 33 | 
 34 | def reset_weights(session, model):
 35 |     for layer in model.layers:
 36 |         if hasattr(layer, "kernel_initializer"):
 37 |             layer.kernel.initializer.run(session=session)
 38 | 
 39 | 
 40 | def map_variance(samples, df0, scale0):
 41 |     """
 42 |     This estimator assumes an scaled inverse-chi squared prior over the
 43 |     variance and a Gaussian likelihood. The parameters d and scale
 44 |     of the internal function parameterize the posterior of the variance.
 45 |     Taken from Bayesian Data Analysis, ch2 (Gelman)
 46 | 
 47 |     samples: N length array or NxD array
 48 |     df0: prior degrees of freedom
 49 |     scale0: prior scale parameter
 50 |     mu: (optional) mean function
 51 | 
 52 |     returns: float or d-length array, mode of the posterior
 53 |     """
 54 |     if np.ndim(samples) > 1:
 55 |         n, d = np.shape(samples)
 56 |     else:
 57 |         n = np.shape(samples)[0]
 58 |         d = 1
 59 | 
 60 |     v = np.var(samples, axis=0)
 61 |     df = df0 + n
 62 |     scale = (df0 * scale0 + n * v) / df
 63 |     return df * scale / (df * 2)
 64 | 
 65 | 
 66 | class LinearEvent(object):
 67 |     """ this is the base clase of the event model """
 68 | 
 69 |     def __init__(self, d, var_df0, var_scale0, optimizer=None, n_epochs=10, init_model=False,
 70 |                  kernel_initializer='glorot_uniform', l2_regularization=0.00, batch_size=32, prior_log_prob=0.0,
 71 |                  reset_weights=False, batch_update=True, optimizer_kwargs=None):
 72 |         """
 73 | 
 74 |         :param d: dimensions of the input space
 75 |         """
 76 |         self.d = d
 77 |         self.f_is_trained = False
 78 |         self.f0_is_trained = False
 79 |         self.f0 = np.zeros(d)
 80 | 
 81 |         self.x_history = [np.zeros((0, self.d))]
 82 |         self.prior_probability = prior_log_prob
 83 | 
 84 |         if (optimizer is None) and (optimizer_kwargs is None):
 85 |             optimizer = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0, amsgrad=False)
 86 |         elif (optimizer is None) and not (optimizer_kwargs is None):
 87 |             optimizer = Adam(**optimizer_kwargs)
 88 |         elif (optimizer is not None) and (type(optimizer) != str):
 89 |             optimizer = optimizer()
 90 | 
 91 |         self.compile_opts = dict(optimizer=optimizer, loss='mean_squared_error')
 92 |         self.kernel_initializer = kernel_initializer
 93 |         self.kernel_regularizer = regularizers.l2(l2_regularization)
 94 |         self.n_epochs = n_epochs
 95 |         self.batch_size = batch_size
 96 |         self.var_df0 = var_df0
 97 |         self.var_scale0 = var_scale0
 98 |         self.d = d
 99 |         self.reset_weights = reset_weights
100 |         self.batch_update = batch_update
101 |         self.training_pairs = []
102 |         self.prediction_errors = np.zeros((0, self.d), dtype=np.float)
103 |         self.model_weights = None
104 | 
105 |         # initialize the covariance with the mode of the prior distribution
106 |         self.Sigma = np.ones(d) * var_df0 * var_scale0 / (var_df0 + 2)
107 | 
108 |         self.is_visited = False  # governs the special case of model's first prediction (i.e. with no experience)
109 | 
110 |         # switch for inheritance -- don't want to init the model for sub-classes
111 |         if init_model:
112 |             self.init_model()
113 | 
114 |     def init_model(self):
115 |         self._compile_model()
116 |         self.model_weights = self.model.get_weights()
117 |         return self.model
118 | 
119 |     def _compile_model(self):
120 |         self.model = Sequential([
121 |             Dense(self.d, input_shape=(self.d,), use_bias=True, kernel_initializer=self.kernel_initializer,
122 |                   kernel_regularizer=self.kernel_regularizer),
123 |             Activation('linear')
124 |         ])
125 |         self.model.compile(**self.compile_opts)
126 | 
127 |     def set_model(self, sess, model):
128 |         self.sess = sess
129 |         self.model = model
130 |         self.do_reset_weights()
131 | 
132 |     def reestimate(self):
133 |         self.do_reset_weights()
134 |         self.estimate()
135 | 
136 |     def do_reset_weights(self):
137 |         # self._compile_model()
138 |         reset_weights(self.sess, self.model)
139 |         self.model_weights = self.model.get_weights()
140 | 
141 |     def update(self, X, Xp, update_estimate=True):
142 |         """
143 |         Parameters
144 |         ----------
145 |         X: NxD array-like data of inputs
146 | 
147 |         y: NxD array-like data of outputs
148 | 
149 |         Returns
150 |         -------
151 |         None
152 | 
153 |         """
154 |         if X.ndim > 1:
155 |             X = X[-1, :]  # only consider last example
156 |         assert X.ndim == 1
157 |         assert X.shape[0] == self.d
158 |         assert Xp.ndim == 1
159 |         assert Xp.shape[0] == self.d
160 | 
161 |         x_example = X.reshape((1, self.d))
162 |         xp_example = Xp.reshape((1, self.d))
163 | 
164 |         # concatenate the training example to the active event token
165 |         self.x_history[-1] = np.concatenate([self.x_history[-1], x_example], axis=0)
166 | 
167 |         # also, create a list of training pairs (x, y) for efficient sampling
168 |         #  picks  random time-point in the history
169 |         self.training_pairs.append(tuple([x_example, xp_example]))
170 | 
171 |         if update_estimate:
172 |             self.estimate()
173 |             self.f_is_trained = True
174 | 
175 |     def update_f0(self, Xp, update_estimate=True):
176 |         self.update(np.zeros(self.d), Xp, update_estimate=update_estimate)
177 |         self.f0_is_trained = True
178 | 
179 |         # precompute f0 for speed
180 |         self.f0 = self._predict_f0()
181 | 
182 |     def get_variance(self):
183 |         # Sigma is stored as a vector corresponding to the entries of the diagonal covariance matrix
184 |         return self.Sigma
185 | 
186 |     def predict_next(self, X):
187 |         """
188 |         wrapper for the prediction function that changes the prediction to the identity function
189 |         for untrained models (this is an initialization technique)
190 | 
191 |         """
192 |         if not self.f_is_trained:
193 |             if np.ndim(X) > 1:
194 |                 return np.copy(X[-1, :]).reshape(1, -1)
195 |             return np.copy(X).reshape(1, -1)
196 | 
197 |         return self._predict_next(X)
198 | 
199 |     def _predict_next(self, X):
200 |         """
201 |         Parameters
202 |         ----------
203 |         X: 1xD array-like data of inputs
204 | 
205 |         Returns
206 |         -------
207 |         y: 1xD array of prediction vectors
208 | 
209 |         """
210 |         if X.ndim > 1:
211 |             X0 = X[-1, :]
212 |         else:
213 |             X0 = X
214 |  
215 |         self.model.set_weights(self.model_weights)
216 |         return self.model.predict(np.reshape(X0, newshape=(1, self.d)))
217 | 
218 |     def predict_f0(self):
219 |         """
220 |         wrapper for the prediction function that changes the prediction to the identity function
221 |         for untrained models (this is an initialization technique)
222 | 
223 |         N.B. This answer is cached for speed
224 | 
225 |         """
226 |         return self.f0
227 | 
228 |     def _predict_f0(self):
229 |         return self._predict_next(np.zeros(self.d))
230 | 
231 |     def log_likelihood_f0(self, Xp):
232 | 
233 |         if not self.f0_is_trained:
234 |             return self.prior_probability
235 | 
236 |         # predict the initial point
237 |         Xp_hat = self.predict_f0()
238 | 
239 |         # return the probability
240 |         return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma)
241 | 
242 |     def log_likelihood_next(self, X, Xp):
243 |         if not self.f_is_trained:
244 |             return self.prior_probability
245 | 
246 |         Xp_hat = self.predict_next(X)
247 |         return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma)
248 | 
249 |     def log_likelihood_sequence(self, X, Xp):
250 |         if not self.f_is_trained:
251 |             return self.prior_probability
252 | 
253 |         Xp_hat = self.predict_next_generative(X)
254 |         return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma)
255 | 
256 |     # create a new cluster of scenes
257 |     def new_token(self):
258 |         if len(self.x_history) == 1 and self.x_history[0].shape[0] == 0:
259 |             # special case for the first cluster which is already created
260 |             return
261 |         self.x_history.append(np.zeros((0, self.d)))
262 | 
263 |     def predict_next_generative(self, X):
264 |         self.model.set_weights(self.model_weights)
265 |         # the LDS is a markov model, so these functions are the same
266 |         return self.predict_next(X)
267 | 
268 |     def run_generative(self, n_steps, initial_point=None):
269 |         self.model.set_weights(self.model_weights)
270 |         if initial_point is None:
271 |             x_gen = self._predict_f0()
272 |         else:
273 |             x_gen = np.reshape(initial_point, (1, self.d))
274 |         for ii in range(1, n_steps):
275 |             x_gen = np.concatenate([x_gen, self.predict_next_generative(x_gen[:ii, :])])
276 |         return x_gen
277 | 
278 |     def estimate(self):
279 |         if self.reset_weights:
280 |             self.do_reset_weights()
281 |         else:
282 |             self.model.set_weights(self.model_weights)
283 | 
284 |         n_pairs = len(self.training_pairs)
285 | 
286 |         if self.batch_update:
287 |             def draw_sample_pair():
288 |                 # draw a random cluster for the history
289 |                 idx = np.random.randint(n_pairs)
290 |                 return self.training_pairs[idx]
291 |         else:
292 |             # for online sampling, just use the last training sample
293 |             def draw_sample_pair():
294 |                 return self.training_pairs[-1]
295 | 
296 |         # run batch gradient descent on all of the past events!
297 |         for _ in range(self.n_epochs):
298 | 
299 |             # draw a set of training examples from the history
300 |             x_batch = []
301 |             xp_batch = []
302 |             for _ in range(self.batch_size):
303 | 
304 |                 x_sample, xp_sample = draw_sample_pair()
305 | 
306 |                 # these data aren't
307 |                 x_batch.append(x_sample)
308 |                 xp_batch.append(xp_sample)
309 | 
310 |             x_batch = np.reshape(x_batch, (self.batch_size, self.d))
311 |             xp_batch = np.reshape(xp_batch, (self.batch_size, self.d))
312 |             self.model.train_on_batch(x_batch, xp_batch)
313 | 
314 |         # cache the model weights
315 |         self.model_weights = self.model.get_weights()
316 | 
317 |         # Update Sigma
318 |         x_train_0, xp_train_0 = self.training_pairs[-1]
319 |         xp_hat = self.model.predict(x_train_0)
320 |         self.prediction_errors = np.concatenate([self.prediction_errors, xp_train_0 - xp_hat], axis=0)
321 |         if np.shape(self.prediction_errors)[0] > 1:
322 |             self.Sigma = map_variance(self.prediction_errors, self.var_df0, self.var_scale0)
323 | 
324 | 
325 | class NonLinearEvent(LinearEvent):
326 | 
327 |     def __init__(self, d, var_df0, var_scale0, n_hidden=None, hidden_act='tanh', batch_size=32,
328 |                  optimizer=None, n_epochs=10, init_model=False, kernel_initializer='glorot_uniform',
329 |                  l2_regularization=0.00, dropout=0.50, prior_log_prob=0.0, reset_weights=False,
330 |                  batch_update=True,
331 |                  optimizer_kwargs=None):
332 |         LinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs,
333 |                              init_model=False, kernel_initializer=kernel_initializer, batch_size=batch_size,
334 |                              l2_regularization=l2_regularization, prior_log_prob=prior_log_prob,
335 |                              reset_weights=reset_weights, batch_update=batch_update,
336 |                              optimizer_kwargs=optimizer_kwargs)
337 | 
338 |         if n_hidden is None:
339 |             n_hidden = d
340 |         self.n_hidden = n_hidden
341 |         self.hidden_act = hidden_act
342 |         self.dropout = dropout
343 | 
344 |         if init_model:
345 |             self.init_model()
346 | 
347 |     def _compile_model(self):
348 |         self.model = Sequential()
349 |         self.model.add(Dense(self.n_hidden, input_shape=(self.d,), activation=self.hidden_act,
350 |                              kernel_regularizer=self.kernel_regularizer,
351 |                              kernel_initializer=self.kernel_initializer))
352 |         self.model.add(Dropout(self.dropout))
353 |         self.model.add(Dense(self.d, activation='linear',
354 |                              kernel_regularizer=self.kernel_regularizer,
355 |                              kernel_initializer=self.kernel_initializer))
356 |         self.model.compile(**self.compile_opts)
357 | 
358 | 
359 | class NonLinearEvent_normed(NonLinearEvent):
360 | 
361 |     def __init__(self, d, var_df0, var_scale0, n_hidden=None, hidden_act='tanh',
362 |                  optimizer=None, n_epochs=10, init_model=False, kernel_initializer='glorot_uniform',
363 |                  l2_regularization=0.00, dropout=0.50, prior_log_prob=0.0, reset_weights=False, batch_size=32,
364 |                  batch_update=True, optimizer_kwargs=None):
365 | 
366 |         NonLinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs,
367 |                                      l2_regularization=l2_regularization,batch_size=batch_size,
368 |                                      kernel_initializer=kernel_initializer, init_model=False,
369 |                                      prior_log_prob=prior_log_prob, reset_weights=reset_weights,
370 |                                      batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
371 | 
372 |         if n_hidden is None:
373 |             n_hidden = d
374 |         self.n_hidden = n_hidden
375 |         self.hidden_act = hidden_act
376 |         self.dropout = dropout
377 | 
378 |         if init_model:
379 |             self.init_model()
380 | 
381 |     def _compile_model(self):
382 |         self.model = Sequential()
383 |         self.model.add(Dense(self.n_hidden, input_shape=(self.d,), activation=self.hidden_act,
384 |                              kernel_regularizer=self.kernel_regularizer,
385 |                              kernel_initializer=self.kernel_initializer))
386 |         self.model.add(Dropout(self.dropout))
387 |         self.model.add(Dense(self.d, activation='linear',
388 |                              kernel_regularizer=self.kernel_regularizer,
389 |                              kernel_initializer=self.kernel_initializer))
390 |         self.model.add(Lambda(lambda x: K.l2_normalize(x, axis=-1)))  
391 |         self.model.compile(**self.compile_opts)
392 | 
393 | 
394 | class StationaryEvent(LinearEvent):
395 | 
396 |     def _predict_next(self, X):
397 |         """
398 |         Parameters
399 |         ----------
400 |         X: 1xD array-like data of inputs
401 | 
402 |         Returns
403 |         -------
404 |         y: 1xD array of prediction vectors
405 | 
406 |         """
407 | 
408 |         return self.model.predict(np.zeros((1, self.d)))
409 | 
410 | 
411 | 
412 | class RecurentLinearEvent(LinearEvent):
413 | 
414 |     # RNN which is initialized once and then trained using stochastic gradient descent
415 |     # i.e. each new scene is a single example batch of size 1
416 | 
417 |     def __init__(self, d, var_df0, var_scale0, t=3,
418 |                  optimizer=None, n_epochs=10, l2_regularization=0.00, batch_size=32,
419 |                  kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False,
420 |                  batch_update=True, optimizer_kwargs=None):
421 |         #
422 |         # D = dimension of single input / output example
423 |         # t = number of time steps to unroll back in time for the recurrent layer
424 |         # n_hidden1 = # of nodes in first hidden layer
425 |         # n_hidden2 = # of nodes in second hidden layer
426 |         # hidden_act1 = activation f'n of first hidden layer
427 |         # hidden_act2 = activation f'n of second hidden layer
428 |         # sgd_kwargs = arguments for the stochastic gradient descent algorithm
429 |         # n_epochs = how many gradient descent steps to perform for each training batch
430 |         # dropout = what fraction of nodes to drop out during training (to prevent overfitting)
431 | 
432 |         LinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs,
433 |                              init_model=False, kernel_initializer=kernel_initializer,
434 |                              l2_regularization=l2_regularization, prior_log_prob=prior_log_prob,
435 |                              reset_weights=reset_weights, batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
436 | 
437 |         self.t = t
438 |         self.n_epochs = n_epochs
439 | 
440 |         # list of clusters of scenes:
441 |         # each element of list = history of scenes for given cluster
442 |         # history = N x D tensor, N = # of scenes in cluster, D = dimension of single scene
443 |         #
444 |         self.x_history = [np.zeros((0, self.d))]
445 |         self.batch_size = batch_size
446 | 
447 |         if init_model:
448 |             self.init_model()
449 | 
450 |         # cache the initial weights for retraining speed
451 |         self.init_weights = None
452 | 
453 |     def do_reset_weights(self):
454 |         # # self._compile_model()
455 |         if self.init_weights is None:
456 |             for layer in self.model.layers:
457 |                 new_weights = [glorot_uniform()(w.shape).eval(session=self.sess) for w in layer.get_weights()]
458 |                 layer.set_weights(new_weights)
459 |             self.model_weights = self.model.get_weights()
460 |             self.init_weights = self.model.get_weights()
461 |         else:
462 |             self.model.set_weights(self.init_weights)
463 | 
464 |     # initialize model once so we can then update it online
465 |     def _compile_model(self):
466 |         self.model = Sequential()
467 |         self.model.add(SimpleRNN(self.d, input_shape=(self.t, self.d),
468 |                                  activation=None, kernel_initializer=self.kernel_initializer,
469 |                                  kernel_regularizer=self.kernel_regularizer))
470 |         self.model.compile(**self.compile_opts)
471 | 
472 |     # concatenate current example with the history of the last t-1 examples
473 |     # this is for the recurrent layer
474 |     #
475 |     def _unroll(self, x_example):
476 |         x_train = np.concatenate([self.x_history[-1][-(self.t - 1):, :], x_example], axis=0)
477 |         x_train = np.concatenate([np.zeros((self.t - x_train.shape[0], self.d)), x_train], axis=0)
478 |         x_train = x_train.reshape((1, self.t, self.d))
479 |         return x_train
480 | 
481 |     # predict a single example
482 |     def _predict_next(self, X):
483 |         self.model.set_weights(self.model_weights)
484 |         # Note: this function predicts the next conditioned on the training data the model has seen
485 | 
486 |         if X.ndim > 1:
487 |             X = X[-1, :]  # only consider last example
488 |         assert np.ndim(X) == 1
489 |         assert X.shape[0] == self.d
490 | 
491 |         x_test = X.reshape((1, self.d))
492 | 
493 |         # concatenate current example with history of last t-1 examples
494 |         # this is for the recurrent part of the network
495 |         x_test = self._unroll(x_test)
496 |         return self.model.predict(x_test)
497 | 
498 |     def _predict_f0(self):
499 |         return self.predict_next_generative(np.zeros(self.d))
500 | 
501 |     def _update_variance(self):
502 |         if np.shape(self.prediction_errors)[0] > 1:
503 |             self.Sigma = map_variance(self.prediction_errors, self.var_df0, self.var_scale0)
504 | 
505 |     def update(self, X, Xp, update_estimate=True):
506 |         if X.ndim > 1:
507 |             X = X[-1, :]  # only consider last example
508 |         assert X.ndim == 1
509 |         assert X.shape[0] == self.d
510 |         assert Xp.ndim == 1
511 |         assert Xp.shape[0] == self.d
512 | 
513 |         x_example = X.reshape((1, self.d))
514 |         xp_example = Xp.reshape((1, self.d))
515 | 
516 |         # concatenate the training example to the active event token
517 |         self.x_history[-1] = np.concatenate([self.x_history[-1], x_example], axis=0)
518 | 
519 |         # also, create a list of training pairs (x, y) for efficient sampling
520 |         #  picks  random time-point in the history
521 |         _n = np.shape(self.x_history[-1])[0]
522 |         x_train_example = np.reshape(
523 |                     unroll_data(self.x_history[-1][max(_n - self.t, 0):, :], self.t)[-1, :, :], (1, self.t, self.d)
524 |                 )
525 |         self.training_pairs.append(tuple([x_train_example, xp_example]))
526 | 
527 |         if update_estimate:
528 |             self.estimate()
529 |             self.f_is_trained = True
530 | 
531 |     def predict_next_generative(self, X):
532 |         self.model.set_weights(self.model_weights)
533 |         X0 = np.reshape(unroll_data(X, self.t)[-1, :, :], (1, self.t, self.d))
534 |         return self.model.predict(X0)
535 | 
536 |     # optional: run batch gradient descent on all past event clusters
537 |     def estimate(self):
538 |         if self.reset_weights:
539 |             self.do_reset_weights()
540 |         else:
541 |             self.model.set_weights(self.model_weights)
542 | 
543 |         n_pairs = len(self.training_pairs)
544 | 
545 |         if self.batch_update:
546 |             def draw_sample_pair():
547 |                 # draw a random cluster for the history
548 |                 idx = np.random.randint(n_pairs)
549 |                 return self.training_pairs[idx]
550 |         else:
551 |             # for online sampling, just use the last training sample
552 |             def draw_sample_pair():
553 |                 return self.training_pairs[-1]
554 | 
555 |         # run batch gradient descent on all of the past events!
556 |         for _ in range(self.n_epochs):
557 | 
558 |             # draw a set of training examples from the history
559 |             x_batch = np.zeros((0, self.t, self.d))
560 |             xp_batch = np.zeros((0, self.d))
561 |             for _ in range(self.batch_size):
562 | 
563 |                 x_sample, xp_sample = draw_sample_pair()
564 | 
565 |                 x_batch = np.concatenate([x_batch, x_sample], axis=0)
566 |                 xp_batch = np.concatenate([xp_batch, xp_sample], axis=0)
567 | 
568 |             self.model.train_on_batch(x_batch, xp_batch)
569 |         self.model_weights = self.model.get_weights()
570 | 
571 |         # Update Sigma
572 |         x_train_0, xp_train_0 = self.training_pairs[-1]
573 |         xp_hat = self.model.predict(x_train_0)
574 |         self.prediction_errors = np.concatenate([self.prediction_errors, xp_train_0 - xp_hat], axis=0)
575 |         self._update_variance()
576 |  
577 | 
578 | class RecurrentEvent(RecurentLinearEvent):
579 | 
580 |     def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None,
581 |                  n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32,
582 |                  kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False, 
583 |                  batch_update=True, optimizer_kwargs=None):
584 | 
585 |         RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs,
586 |                                      l2_regularization=l2_regularization, batch_size=batch_size,
587 |                                      kernel_initializer=kernel_initializer, init_model=False, prior_log_prob=prior_log_prob,
588 |                                      reset_weights=reset_weights, batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
589 | 
590 |         if n_hidden is None:
591 |             self.n_hidden = d
592 |         else:
593 |             self.n_hidden = n_hidden
594 |         self.dropout = dropout
595 | 
596 |         if init_model:
597 |             self.init_model()
598 | 
599 |     def _compile_model(self):
600 |         self.model = Sequential()
601 |         # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer
602 |         # input_shape[1] = input_dim; each example is a self.d-dimensional vector
603 |         self.model.add(SimpleRNN(self.n_hidden, input_shape=(self.t, self.d),
604 |                                  kernel_regularizer=self.kernel_regularizer,
605 |                                  kernel_initializer=self.kernel_initializer))
606 |         self.model.add(LeakyReLU(alpha=0.3))
607 |         self.model.add(Dropout(self.dropout))
608 |         self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer,
609 |                   kernel_initializer=self.kernel_initializer))
610 |         self.model.compile(**self.compile_opts)
611 | 
612 | 
613 | class GRUEvent(RecurentLinearEvent):
614 | 
615 |     def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None,
616 |                  n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32,
617 |                  kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False,
618 |                  batch_update=True, optimizer_kwargs=None):
619 | 
620 |         RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs,
621 |                                      l2_regularization=l2_regularization, batch_size=batch_size,
622 |                                      kernel_initializer=kernel_initializer, init_model=False,
623 |                                      prior_log_prob=prior_log_prob, reset_weights=reset_weights,
624 |                                      batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
625 | 
626 |         if n_hidden is None:
627 |             self.n_hidden = d
628 |         else:
629 |             self.n_hidden = n_hidden
630 |         self.dropout = dropout
631 | 
632 |         if init_model:
633 |             self.init_model()
634 | 
635 |     def _compile_model(self):
636 |         self.model = Sequential()
637 |         # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer
638 |         # input_shape[1] = input_dim; each example is a self.d-dimensional vector
639 |         self.model.add(GRU(self.n_hidden, input_shape=(self.t, self.d),
640 |                                  kernel_regularizer=self.kernel_regularizer,
641 |                                  kernel_initializer=self.kernel_initializer))
642 |         self.model.add(LeakyReLU(alpha=0.3))
643 |         self.model.add(Dropout(self.dropout))
644 |         self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer,
645 |                   kernel_initializer=self.kernel_initializer))
646 |         self.model.compile(**self.compile_opts)
647 | 
648 | 
649 | class GRUEvent_normed(RecurentLinearEvent):
650 | 
651 |     def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None,
652 |                  n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32,
653 |                  kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False,
654 |                  batch_update=True, optimizer_kwargs=None):
655 | 
656 |         RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs,
657 |                                      l2_regularization=l2_regularization, batch_size=batch_size,
658 |                                      kernel_initializer=kernel_initializer, init_model=False,
659 |                                      prior_log_prob=prior_log_prob, reset_weights=reset_weights,
660 |                                      batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
661 | 
662 |         if n_hidden is None:
663 |             self.n_hidden = d
664 |         else:
665 |             self.n_hidden = n_hidden
666 |         self.dropout = dropout
667 | 
668 |         if init_model:
669 |             self.init_model()
670 | 
671 |     def _compile_model(self):
672 |         self.model = Sequential()
673 |         # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer
674 |         # input_shape[1] = input_dim; each example is a self.d-dimensional vector
675 |         self.model.add(GRU(self.n_hidden, input_shape=(self.t, self.d),
676 |                                  kernel_regularizer=self.kernel_regularizer,
677 |                                  kernel_initializer=self.kernel_initializer))
678 |         self.model.add(LeakyReLU(alpha=0.3))
679 |         self.model.add(Dropout(self.dropout))
680 |         self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer,
681 |                   kernel_initializer=self.kernel_initializer))
682 |         self.model.add(Lambda(lambda x: K.l2_normalize(x, axis=-1)))  
683 |         self.model.compile(**self.compile_opts)
684 | 
685 | 
686 | 
687 | class GRUEvent_spherical_noise(GRUEvent):
688 | 
689 |     def _update_variance(self):
690 |         if np.shape(self.prediction_errors)[0] > 1:
691 |             var = map_variance(self.prediction_errors.reshape(-1), self.var_df0, self.var_scale0)
692 |             self.Sigma = var * np.ones(self.d)
693 | 
694 | 
695 | 
696 | class LSTMEvent(RecurentLinearEvent):
697 | 
698 |     def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None,
699 |                  n_epochs=10, dropout=0.50, l2_regularization=0.00,
700 |                  batch_size=32, kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0,
701 |                  reset_weights=False, batch_update=True, optimizer_kwargs=None):
702 | 
703 |         RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs,
704 |                                      l2_regularization=l2_regularization, batch_size=batch_size,
705 |                                      kernel_initializer=kernel_initializer, init_model=False,
706 |                                      prior_log_prob=prior_log_prob, reset_weights=reset_weights,
707 |                                      batch_update=batch_update, optimizer_kwargs=optimizer_kwargs)
708 | 
709 |         if n_hidden is None:
710 |             self.n_hidden = d
711 |         else:
712 |             self.n_hidden = n_hidden
713 |         self.dropout = dropout
714 | 
715 |         if init_model:
716 |             self.init_model()
717 | 
718 |     def _compile_model(self):
719 |         self.model = Sequential()
720 |         # input_shape[0] = time-steps; we pass the last self.t examples for train the hidden layer
721 |         # input_shape[1] = input_dim; each example is a self.d-dimensional vector
722 |         self.model.add(LSTM(self.n_hidden, input_shape=(self.t, self.d),
723 |                            kernel_regularizer=self.kernel_regularizer,
724 |                            kernel_initializer=self.kernel_initializer))
725 |         self.model.add(LeakyReLU(alpha=0.3))
726 |         self.model.add(Dropout(self.dropout))
727 |         self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer,
728 |                              kernel_initializer=self.kernel_initializer))
729 |         self.model.compile(**self.compile_opts)
730 | 


--------------------------------------------------------------------------------
/models/memory.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | from scipy.stats import multivariate_normal as mvnorm
  4 | from scipy.special import logsumexp
  5 | from models.utils import fast_mvnorm_diagonal_logprob
  6 | np.seterr(divide = 'ignore')
  7 | import os
  8 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
  9 | import multiprocessing
 10 | 
 11 | def sample_pmf(pmf):
 12 |     return np.sum(np.cumsum(pmf) < np.random.uniform(0, 1))
 13 | 
 14 | def get_scrp_prob(e, lmda, alfa):
 15 |     """
 16 |     this function isn't used
 17 | 
 18 |     :param e: list event labels
 19 |     :param lmda: float, sCRP lambda
 20 |     :param alpha: float, sCRP alpha
 21 |     
 22 |     :return: total log likelihood of sequence under sCRP
 23 |     """
 24 |     c = {e0: 0 for e0 in set(e)}
 25 |     log_prob = 0
 26 |     e0_prev = None
 27 |     
 28 |     Z = alfa
 29 |     log_alfa = np.log(alfa)
 30 |     for e0 in e:
 31 |         
 32 |         l = lmda * (e0 == e0_prev)
 33 |         
 34 |         if c[e0] == 0:
 35 |             log_prob += log_alfa - np.log(Z + l)
 36 |         else:
 37 |             log_prob += np.log(c[e0] + l) - np.log(Z + l)
 38 |             
 39 | 
 40 |         # update the counts
 41 |         c[e0] += 1
 42 |         Z += 1
 43 |         e0_prev = e0
 44 |         
 45 |     return log_prob
 46 | 
 47 | def reconstruction_accuracy(y_samples, y_mem):
 48 |     """
 49 |     
 50 |     :param:     y_samples - list of y_samples
 51 |     :param:     y_mem - original corrupted memory trace
 52 | 
 53 |     :return:    item_accuracy, list of probabilities each item in original memory 
 54 |                 is in the final reconstruction
 55 | 
 56 |     # checked this function on 5/20/19, this function is correct if unintuitive
 57 |     """
 58 | 
 59 | 
 60 |     acc = []
 61 |     n_orig = len(y_mem)
 62 | 
 63 |     for y_sample in y_samples:
 64 | 
 65 |         def item_acc(t):
 66 |             # loop through all of the items in the reconstruction trace, and compare them to 
 67 |             # item t in the corrupted trace.  Return 1.0 if there is a match, zero otherwise
 68 |             return np.float(any(
 69 |                 [np.array_equal(yt_samp[0], y_mem[t][0]) for yt_samp in y_sample if yt_samp != None]
 70 |                 ))
 71 | 
 72 |         # evaluate the accuracy for all of the items in the set
 73 |         acc.append([item_acc(t) for t in range(n_orig)])
 74 | 
 75 |     # return the vector of accuracy
 76 |     return np.mean(acc, axis=0)
 77 | 
 78 | def evaluate_seg(e_samples, e_true):
 79 |     acc = []
 80 |     for e in e_samples:
 81 |         acc.append(np.mean(np.array(e) == e_true))
 82 |     return np.mean(acc)
 83 |     
 84 | def create_corrupted_trace(x, e, tau, epsilon_e, b, return_random_draws_of_p_e=False):
 85 |     """
 86 |     create a corrupted memory trace from feature vectors and event labels
 87 | 
 88 |     :param x:           np.array of size nXd, featur vectors
 89 |     :param e:           np.array of length n, event labels
 90 |     :param tau:         float, feature corruption
 91 |     :param epsilon_e:   float, event label precision
 92 |     :param b:           int, time index corruption
 93 | 
 94 |     :return y_mem: list of corrupted memory tuples:
 95 |     """
 96 | 
 97 |     n, d = x.shape
 98 | 
 99 |     # create the corrupted memory trace
100 |     y_mem = list()  # these are list, not sets, for hashability
101 | 
102 |     # pre-draw the uniform random numbers to determine the event-label corruption noise so that 
103 |     # we can return them as needed.
104 |     e_noise_draws = [np.random.uniform(0, 1) for _ in range(n)]
105 | 
106 |     for t in range(n):
107 |         x_mem = x[t, :] + np.random.normal(scale=tau ** 0.5, size=d) # note, built in function uses stdev, not variance 
108 |         e_mem = [None, e[t]][e_noise_draws[t] < epsilon_e]
109 |         t_mem = t + np.random.randint(-b, b + 1)
110 |         y_mem.append([x_mem, e_mem, t_mem])
111 |     
112 |     if return_random_draws_of_p_e:
113 |         return y_mem, e_noise_draws
114 | 
115 |     return y_mem
116 | 
117 | def init_y_sample(y_mem, b, epsilon):
118 |     """
119 |     :param y_mem: list of corrupted memory traces
120 |     :param b: time corruption noise
121 |     :param epsilon: "forgetting" parameter 
122 |     :returns: sample of y_mem
123 |     """
124 |     n_t = len(y_mem)
125 |     y_sample = [None] * n_t
126 | 
127 |     # create a copy of y_mem for sampling without replacement
128 |     y_mem_copy = [[x_i.copy(), e_i, t_mem] for (x_i, e_i, t_mem) in y_mem]
129 | 
130 |     # loop through timepoints in a random order
131 |     for t in np.random.permutation(range(n_t)):
132 | 
133 |         # create a probability function over the sample sets
134 |         log_p = np.zeros(len(y_mem_copy) + 1) - np.inf
135 |         for ii, (x_i, e_i, t_mem) in enumerate(y_mem_copy):
136 |             if np.abs(t_mem - t) <= b:
137 |                 log_p[ii] = 0
138 |                 # draw a sample
139 |         log_p[-1] = np.log(epsilon)
140 |         p = np.exp(log_p - logsumexp(log_p))  # normalize and exponentiate
141 | 
142 |         ii = sample_pmf(p)
143 | 
144 |         if ii < len(y_mem_copy):
145 |             # only create a sample for none-None events
146 |             y_sample[t] = y_mem_copy[ii]
147 |             y_mem_copy = y_mem_copy[:ii] + y_mem_copy[ii + 1:]  # remove the item from the list of available
148 |     return y_sample
149 | 
150 | 
151 | def init_x_sample_cond_y(y_sample, n, d, tau):
152 |     x_sample = np.random.randn(n, d) * tau
153 | 
154 |     for ii, y_ii in enumerate(y_sample):
155 |         if y_ii is not None:
156 |             x_sample[ii, :] = y_ii[0]
157 |     return x_sample
158 | 
159 | 
160 | def sample_y_given_x_e(y_mem, x, e, b, tau, epsilon):
161 |     # total number of samples
162 |     n, d = np.shape(x)
163 | 
164 |     #
165 |     y_sample = [None] * n
166 | 
167 |     # create a copy of y_mem for sampling without replacement
168 |     y_mem_copy = [[x_i.copy(), e_i, t_mem] for (x_i, e_i, t_mem) in y_mem]
169 | 
170 |     _ones = np.ones(d)
171 | 
172 |     for t in np.random.permutation(range(n)):
173 | 
174 |         # create a probability function over the sample sets
175 |         log_p = np.zeros(len(y_mem_copy) + 1) - np.inf
176 |         for ii, (x_i, e_i, t_mem) in enumerate(y_mem_copy):
177 |             if np.abs(t_mem - t) <= b:
178 |                 # because we alwasy assume the covariance function is diagonal, we can use the
179 |                 # univariate normal to speed up the calculations
180 |                 log_p[ii] = fast_mvnorm_diagonal_logprob(x_i.reshape(-1) - x[t, :].reshape(-1), _ones * tau)
181 | 
182 |             # set probability to zero if event token doesn't match
183 |             if e_i is not None:
184 |                 if e_i != e[ii]:
185 |                     log_p[ii] -= np.inf
186 | 
187 |         # the last token is always the null token
188 |         log_p[-1] = np.log(epsilon)
189 |         p = np.exp(log_p - logsumexp(log_p))  # normalize and exponentiate
190 | 
191 |         # draw a sample
192 |         ii = sample_pmf(p)
193 | 
194 |         if ii < len(y_mem_copy):
195 |             # only create a sample for none-None events
196 |             y_sample[t] = y_mem_copy[ii]
197 |             y_mem_copy = y_mem_copy[:ii] + y_mem_copy[ii + 1:]  # remove the item from the list of available
198 | 
199 |     return y_sample
200 | 
201 | 
202 | def sample_e_given_x_y(x, y, event_models, alpha, lmda):
203 |     n, d = np.shape(x)
204 | 
205 |     # define a special case of the sCRP that caps the number
206 |     # of clusters at k, the number of event models
207 |     k = len(event_models)
208 |     c = np.zeros(k)
209 | 
210 |     e_prev = None
211 |     e_sample = [None] * n
212 | 
213 |     # keep a list of all the previous scenes within the sampled event
214 |     x_current = np.zeros((1, d))
215 | 
216 |     # do this as a filtering operation, just via a forward sweep
217 |     for t in range(n):
218 | 
219 |         # first see if there is a valid memory token with a event label
220 |         if (y[t] is not None) and (y[t][1] is not None):
221 |             e_sample[t] = y[t][1]
222 |             e_prev = e_sample[t]
223 |             c[e_sample[t]] += 1
224 |         else:
225 | 
226 |             # calculate the CRP prior
227 |             p_sCRP = c.copy()
228 |             if e_prev is not None:
229 |                 p_sCRP[e_prev] += lmda
230 | 
231 |             # add the alpha value to the unvisited clusters
232 |             if any(p_sCRP == 0):
233 |                 p_sCRP[p_sCRP == 0] = alpha / np.sum(p_sCRP == 0)
234 |             # no need to normalize yet
235 | 
236 |             # calculate the probability of x_t|x_{1:t-1}
237 |             p_model = np.zeros(k) - np.inf
238 |             for idx, e_model in event_models.iteritems():
239 |                 if idx != e_prev:
240 |                     x_t_hat = e_model.predict_next_generative(x_current)
241 |                 else:
242 |                     x_t_hat = e_model.predict_f0()
243 |                 # because we alwasy assume the covariance function is diagonal, we can use the
244 |                 # univariate normal to speed up the calculations
245 |                 p_model[idx] = fast_mvnorm_diagonal_logprob(x[t, :] - x_t_hat.reshape(-1), e_model.Sigma)
246 | 
247 |             log_p = p_model + np.log(p_sCRP)
248 |             log_p -= logsumexp(log_p)
249 | 
250 |             # draw from the model
251 |             e_sample[t] = sample_pmf(np.exp(log_p))
252 | 
253 |             # update counters
254 |             if e_prev == e_sample[t]:
255 |                 x_current = np.concatenate([x_current, x[t, :].reshape(1, -1)])
256 |             else:
257 |                 x_current = x[t, :].reshape(1, -1)
258 |         e_prev = e_sample[t]
259 | 
260 |         # update the counts!
261 |         c[e_sample[t]] += 1
262 | 
263 |     return e_sample
264 | 
265 | 
266 | def sample_x_given_y_e(x_hat, y, e, event_models, tau):
267 |     """
268 |     x_hat: n x d np.array
269 |         the previous sample, to be updated and returned
270 | 
271 |     y: list
272 |         the sequence of ordered memory traces. Each element is
273 |         either a list of [x_y_mem, t_mem] or None
274 | 
275 |     e: np.array of length n
276 |         the sequence of event tokens
277 | 
278 |     event_models: dict {token: model}
279 |         trained event models
280 | 
281 |     tau:
282 |         memory corruption noise
283 | 
284 |     """
285 | 
286 |     # total number of samples
287 |     n, d = np.shape(x_hat)
288 | 
289 |     x_hat = x_hat.copy()  # don't want to overwrite the thing outside the loop...
290 | 
291 |     # Note: this a filtering operation as the backwards pass is computationally difficult. 
292 |     # (by this, we mean that sampling from  Pr(x_t| x_{t+1:n}, x_{1:t-1}, theta, e, y_mem) is intractable
293 |     # and we thus only sample from Pr(x_t|, x_{1:t-1}, theta, e, y_mem), which is is Gaussian)
294 |     for t in np.random.permutation(range(n)):
295 |         # pull the active event model
296 |         e_model = event_models[e[t]]
297 | 
298 |         # pull all preceding scenes within the event
299 |         x_idx = np.arange(len(e))[(e == e[t]) & (np.arange(len(e)) < t)]
300 |         x_prev = np.concatenate([
301 |             np.zeros((1, d)), x_hat[x_idx, :]
302 |         ])
303 | 
304 |         # pull the prediction of the event model given the previous estimates of x
305 |         f_x = e_model.predict_next_generative(x_prev)
306 | 
307 |         # is y_t a null tag?
308 |         if y[t] is None:
309 |             x_bar = f_x
310 |             sigmas = e_model.Sigma
311 |         else:
312 |             # calculate noise lambda for each event model
313 |             u_weight = (1. / e_model.Sigma) / (1. / e_model.Sigma + 1. / tau)
314 | 
315 |             x_bar = u_weight * f_x + (1 - u_weight) * y[t][0]
316 |             sigmas = 1. / (1. / e_model.Sigma + 1. / tau)
317 | 
318 |         # draw a new sample of x_t 
319 |         # N.B. Handcoding a function to draw random variables introduced error into the algorithm
320 |         # and didn't save _any_ time.
321 |         x_hat[t, :] = mvnorm.rvs(mean=x_bar.reshape(-1), cov=np.diag(sigmas))
322 | 
323 |     return x_hat
324 | 
325 | 
326 | def gibbs_memory_sampler(y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, b, tau,
327 |                          n_samples=250, n_burnin=100, progress_bar=True, leave_progress_bar=True):
328 |     """
329 | 
330 |     :param y_mem: list of 3-tuples (x_mem, e_mem, t_mem), corrupted memory trace
331 |     :param sem_mdoel: trained SEM instance
332 |     :param memory_alpha: SEM alpha parameter to use in reconstruction
333 |     :param memory_labmda: SEM lmbda parameter to use in reconstruction
334 |     :param memory_epsilon: (float) parameter controlling propensity to include null trace in reconstruction
335 |     :param b: (int) time index corruption noise
336 |     :param tau: (float, greater than zero) feature vector corruption noise
337 |     :param n_burnin: (int, default 100) number of Gibbs sampling itterations to burn in
338 |     :param n_samples: (int, default 250) number of Gibbs sampling itterations to collect
339 |     :param progress_bar: (bool) use progress bar for sampling?
340 |     :param leave_progress_bar: (bool, default=True) leave the progress bar at the end? 
341 | 
342 |     :return: y_samples, e_samples, x_samples - Gibbs samples
343 |     """
344 | 
345 |     event_models =  {
346 |         k: v for k, v in sem_model.event_models.iteritems() if v.f_is_trained
347 |     }
348 | 
349 |     d = np.shape(y_mem[0][0])[0]
350 |     n = len(y_mem)
351 | 
352 |     #
353 |     e_samples = [None] * n_samples
354 |     y_samples = [None] * n_samples
355 |     x_samples = [None] * n_samples
356 | 
357 |     y_sample = init_y_sample(y_mem, b, memory_epsilon)
358 |     x_sample = init_x_sample_cond_y(y_sample, n, d, tau)
359 |     e_sample = sample_e_given_x_y(x_sample, y_sample, event_models, memory_alpha, memory_lambda)
360 | 
361 |     # loop through the other events in the list
362 |     if progress_bar:
363 |         def my_it(iterator):
364 |             return tqdm(iterator, desc='Gibbs Sampler', leave=leave_progress_bar)
365 |     else:
366 |         def my_it(iterator):
367 |             return iterator
368 |     
369 |     for ii in my_it(range(n_burnin + n_samples)):
370 | 
371 |         # sample the memory features
372 |         x_sample = sample_x_given_y_e(x_sample, y_sample, e_sample, event_models, tau)
373 | 
374 |         # sample the event models
375 |         e_sample = sample_e_given_x_y(x_sample, y_sample, event_models, memory_alpha, memory_lambda)
376 | 
377 |         # sample the memory traces
378 |         y_sample = sample_y_given_x_e(y_mem, x_sample, e_sample, b, tau, memory_epsilon)
379 | 
380 |         if ii >= n_burnin:
381 |             e_samples[ii - n_burnin] = e_sample
382 |             y_samples[ii - n_burnin] = y_sample
383 |             x_samples[ii - n_burnin] = x_sample
384 | 
385 |     return y_samples, e_samples, x_samples
386 | 
387 | ## there appears to be something wrong with this function! do not use for now
388 | # def multichain_gibbs(y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, b, tau, n_chains=2,
389 | #                          n_samples=250, n_burnin=50, progress_bar=True, leave_progress_bar=True):
390 | 
391 | #     """
392 | 
393 | #     :param y_mem: list of 3-tuples (x_mem, e_mem, t_mem), corrupted memory trace
394 | #     :param sem_mdoel: trained SEM instance
395 | #     :param memory_alpha: SEM alpha parameter to use in reconstruction
396 | #     :param memory_labmda: SEM lmbda parameter to use in reconstruction
397 | #     :param memory_epsilon: (float) parameter controlling propensity to include null trace in reconstruction
398 | #     :param b: (int) time index corruption noise
399 | #     :param tau: (float, greater than zero) feature vector corruption noise
400 | #     :param n_burnin: (int, default 100) number of Gibbs sampling itterations to burn in
401 | #     :param n_samples: (int, default 250) number of Gibbs sampling itterations to collect
402 | #     :param progress_bar: (bool) use progress bar for sampling?
403 | #     :param leave_progress_bar: (bool, default=True) leave the progress bar at the end? 
404 | 
405 | #     :return: y_samples, e_samples, x_samples - Gibbs samples
406 | #     """
407 | 
408 | #     y_samples, e_samples, x_samples = [], [], []
409 | #     for _ in range(n_chains):
410 | #         _y0, _e0, _x0 = gibbs_memory_sampler(
411 | #             y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, 
412 | #             b, tau, n_samples, progress_bar, False, leave_progress_bar
413 | #             )
414 | #         y_samples += _y0
415 | #         e_samples += _e0
416 | #         x_samples += _x0
417 | #     return y_samples, e_samples, x_samples


--------------------------------------------------------------------------------
/models/sem.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from scipy.misc import logsumexp
  4 | from tqdm import tqdm
  5 | from keras import backend as K
  6 | from event_models import GRUEvent
  7 | 
  8 | 
  9 | class Results(object):
 10 |     """ placeholder object to store results """
 11 |     pass
 12 | 
 13 | 
 14 | class SEM(object):
 15 |     """
 16 |     This port of SAM's code (done with a different programming logic)
 17 |     in python. More documentation to come!
 18 |     """
 19 | 
 20 |     def __init__(self, lmda=1., alfa=10.0, f_class=GRUEvent, f_opts=None):
 21 |         """
 22 |         Parameters
 23 |         ----------
 24 | 
 25 |         lmda: float
 26 |             sCRP stickiness parameter
 27 | 
 28 |         alfa: float
 29 |             sCRP concentration parameter
 30 | 
 31 |         f_class: class
 32 |             object class that has the functions "predict" and "update".
 33 |             used as the event model
 34 | 
 35 |         f_opts: dictionary
 36 |             kwargs for initializing f_class
 37 |         """
 38 |         self.lmda = lmda
 39 |         self.alfa = alfa
 40 |         # self.beta = beta
 41 | 
 42 |         if f_class is None:
 43 |             raise ValueError("f_model must be specified!")
 44 | 
 45 |         self.f_class = f_class
 46 |         self.f_opts = f_opts
 47 | 
 48 |         # SEM internal state
 49 |         #
 50 |         self.k = 0  # maximum number of clusters (event types)
 51 |         self.c = np.array([])  # used by the sCRP prior -> running count of the clustering process
 52 |         self.d = None  # dimension of scenes
 53 |         self.event_models = dict()  # event model for each event type
 54 | 
 55 |         self.x_prev = None  # last scene
 56 |         self.k_prev = None  # last event type
 57 | 
 58 |         # instead of dumping the results, store them to the object
 59 |         self.results = None
 60 | 
 61 |     def pretrain(self, x, event_types, event_boundaries, progress_bar=True, leave_progress_bar=True):
 62 |         """
 63 |         Pretrain a bunch of event models on sequence of scenes X
 64 |         with corresponding event labels y, assumed to be between 0 and K-1
 65 |         where K = total # of distinct event types
 66 |         """
 67 |         assert x.shape[0] == event_types.size
 68 | 
 69 |         # update internal state
 70 |         k = np.max(event_types) + 1
 71 |         self._update_state(x, k)
 72 |         del k  # use self.k
 73 | 
 74 |         n = x.shape[0]
 75 | 
 76 |         # loop over all scenes
 77 |         if progress_bar:
 78 |             def my_it(l):
 79 |                 return tqdm(range(l), desc='Pretraining', leave=leave_progress_bar)
 80 |         else:
 81 |             def my_it(l):
 82 |                 return range(l)
 83 | 
 84 |         # store a compiled version of the model and session for reuse
 85 |         self.session = tf.Session()
 86 |         K.set_session(self.session)
 87 |         self.model = None
 88 | 
 89 |         for ii in my_it(n):
 90 | 
 91 |             x_curr = x[ii, :].copy()  # current scene
 92 |             k = event_types[ii]  # current event
 93 | 
 94 |             if k not in self.event_models.keys():
 95 |                 # initialize new event model
 96 |                 new_model = self.f_class(self.d, **self.f_opts)
 97 |                 if self.model is None:
 98 |                     self.model = new_model.init_model()
 99 |                 else:
100 |                     new_model.set_model(self.session, self.model)
101 |                 self.event_models[k] = new_model
102 | 
103 |             # update event model
104 |             if not event_boundaries[ii]:
105 |                 # we're in the same event -> update using previous scene
106 |                 assert self.x_prev is not None
107 |                 self.event_models[k].update(self.x_prev, x_curr, update_estimate=True)
108 |             else:
109 |                 # we're in a new event -> update the initialization point only
110 |                 self.event_models[k].new_token()
111 |                 self.event_models[k].update_f0(x_curr, update_estimate=True)
112 | 
113 |             self.c[k] += 1  # update counts
114 | 
115 |             self.x_prev = x_curr  # store the current scene for next trial
116 |             self.k_prev = k  # store the current event for the next trial
117 | 
118 |         self.x_prev = None  # Clear this for future use
119 |         self.k_prev = None  #
120 | 
121 |     def _update_state(self, x, k=None):
122 |         """
123 |         Update internal state based on input data X and max # of event types (clusters) K
124 |         """
125 |         # get dimensions of data
126 |         [n, d] = np.shape(x)
127 |         if self.d is None:
128 |             self.d = d
129 |         else:
130 |             assert self.d == d  # scenes must be of same dimension
131 | 
132 |         # get max # of clusters / event types
133 |         if k is None:
134 |             k = n
135 |         self.k = max(self.k, k)
136 | 
137 |         # initialize CRP prior = running count of the clustering process
138 |         if self.c.size < self.k:
139 |             self.c = np.concatenate((self.c, np.zeros(self.k - self.c.size)), axis=0)
140 |         assert self.c.size == self.k
141 | 
142 |     def _calculate_unnormed_sCRP(self, prev_cluster=None):
143 |         # internal function for consistency across "run" methods
144 | 
145 |         # calculate sCRP prior
146 |         prior = self.c.copy()
147 |         idx = len(np.nonzero(self.c)[0])  # get number of visited clusters
148 | 
149 |         if idx <= self.k:
150 |             prior[idx] += self.alfa  # set new cluster probability to alpha
151 | 
152 |         # add stickiness parameter for n>0, only for the previously chosen event
153 |         if prev_cluster is not None:
154 |             prior[prev_cluster] += self.lmda
155 | 
156 |         # prior /= np.sum(prior)
157 |         return prior
158 | 
159 |     def run(self, x, k=None, progress_bar=True, leave_progress_bar=True, minimize_memory=False, compile_model=True):
160 |         """
161 |         Parameters
162 |         ----------
163 |         x: N x D array of
164 | 
165 |         k: int
166 |             maximum number of clusters
167 | 
168 |         progress_bar: bool
169 |             use a tqdm progress bar?
170 | 
171 |         leave_progress_bar: bool
172 |             leave the progress bar after completing?
173 | 
174 |         minimize_memory: bool
175 |             function to minimize memory storage during running --> only returns the log_probability of each
176 |             cluster and nothing else
177 | 
178 |         Return
179 |         ------
180 |         post: n by k array of posterior probabilities
181 | 
182 |         """
183 | 
184 |         # update internal state
185 |         self._update_state(x, k)
186 |         del k  # use self.k and self.d
187 | 
188 |         n = x.shape[0]
189 | 
190 |         # initialize arrays
191 |         if not minimize_memory:
192 |             post = np.zeros((n, self.k))
193 |             pe = np.zeros(np.shape(x)[0])
194 |             x_hat = np.zeros(np.shape(x))
195 |             log_boundary_probability = np.zeros(np.shape(x)[0])
196 | 
197 |         # these are special case variables to deal with the possibility the current event is restarted
198 |         lik_restart_event = -np.inf
199 |         repeat_prob = -np.inf
200 |         restart_prob = 0
201 | 
202 |         #
203 |         log_like = np.zeros((n, self.k)) - np.inf
204 |         log_prior = np.zeros((n, self.k)) - np.inf
205 | 
206 |         # this code just controls the presence/absence of a progress bar -- it isn't important
207 |         if progress_bar:
208 |             def my_it(l):
209 |                 return tqdm(range(l), desc='Run SEM', leave=leave_progress_bar)
210 |         else:
211 |             def my_it(l):
212 |                 return range(l)
213 | 
214 |         # store a compiled version of the model and session for reuse
215 |         if compile_model:
216 |             self.session = tf.Session()
217 |             K.set_session(self.session)
218 |             self.model = None
219 | 
220 |         for ii in my_it(n):
221 | 
222 |             x_curr = x[ii, :].copy()
223 | 
224 |             # calculate sCRP prior
225 |             prior = self._calculate_unnormed_sCRP(self.k_prev)
226 |             # N.B. k_prev should be none for the first event if there wasn't pre-training
227 | 
228 |             # likelihood
229 |             active = np.nonzero(prior)[0]
230 |             lik = np.zeros(len(active))
231 | 
232 |             for k0 in active:
233 |                 if k0 not in self.event_models.keys():
234 |                     new_model = self.f_class(self.d, **self.f_opts)
235 |                     if self.model is None:
236 |                         self.model = new_model.init_model()
237 |                     else:
238 |                         new_model.set_model(self.session, self.model)
239 |                     self.event_models[k0] = new_model
240 |                     new_model = None  # clear the new model variable from memory
241 | 
242 |                 # get the log likelihood for each event model
243 |                 model = self.event_models[k0]
244 | 
245 |                 # detect when there is a change in event types (not the same thing as boundaries)
246 |                 current_event = (k0 == self.k_prev)
247 | 
248 |                 if current_event:
249 |                     assert self.x_prev is not None
250 |                     lik[k0] = model.log_likelihood_next(self.x_prev, x_curr)
251 | 
252 |                     # special case for the possibility of returning to the start of the current event
253 |                     lik_restart_event = model.log_likelihood_f0(x_curr)
254 | 
255 |                 else:
256 |                     lik[k0] = model.log_likelihood_f0(x_curr)
257 | 
258 |             # determine the event identity (without worrying about event breaks for now)
259 |             _post = np.log(prior[:len(active)]) + lik
260 |             if ii > 0:
261 |                 # the probability that the current event is repeated is the OR probability -- but b/c
262 |                 # we are using a MAP approximation over all possibilities, it is a max of the repeated/restarted
263 | 
264 |                 # is restart higher under the current event
265 |                 restart_prob = lik_restart_event + np.log(prior[self.k_prev] - self.lmda)
266 |                 repeat_prob = _post[self.k_prev]
267 |                 _post[self.k_prev] = np.max([repeat_prob, restart_prob])
268 | 
269 |             # get the MAP cluster and only update it
270 |             k = np.argmax(_post)  # MAP cluster
271 | 
272 |             # determine whether there was a boundary
273 |             event_boundary = (k != self.k_prev) or ((k == self.k_prev) and (restart_prob > repeat_prob))
274 | 
275 |             # calculate the event boundary probability
276 |             _post[self.k_prev] = restart_prob
277 |             if not minimize_memory:
278 |                 log_boundary_probability[ii] = logsumexp(_post) - logsumexp(np.concatenate([_post, [repeat_prob]]))
279 | 
280 |             # calculate the probability of an event label, ignoring the event boundaries
281 |             if self.k_prev is not None:
282 |                 _post[self.k_prev] = logsumexp([restart_prob, repeat_prob])
283 |                 prior[self.k_prev] -= self.lmda / 2.
284 |                 lik[self.k_prev] = logsumexp(np.array([lik[self.k_prev], lik_restart_event]))
285 | 
286 |                 # now, the normalized posterior
287 |                 if not minimize_memory:
288 |                     p = np.log(prior[:len(active)]) + lik - np.max(lik)  # subtracting the max doesn't change proportionality
289 |                     post[ii, :len(active)] = np.exp(p - logsumexp(p))
290 | 
291 |                 # this is a diagnostic readout and does not effect the model
292 |                 log_like[ii, :len(active)] = lik
293 |                 log_prior[ii, :len(active)] = np.log(prior[:len(active)])
294 | 
295 |                 # These aren't used again, remove from memory
296 |                 _post = None
297 |                 lik = None
298 |                 prior = None
299 | 
300 |             else:
301 |                 log_like[ii, 0] = 0.0
302 |                 log_prior[ii, 0] = self.alfa
303 |                 if not minimize_memory:
304 |                     post[ii, 0] = 1.0
305 | 
306 |             if not minimize_memory:
307 |                 # prediction error: euclidean distance of the last model and the current scene vector
308 |                 if ii > 0:
309 |                     model = self.event_models[self.k_prev]
310 |                     x_hat[ii, :] = model.predict_next(self.x_prev)
311 |                     pe[ii] = np.linalg.norm(x_curr - x_hat[ii, :])
312 |                     # surprise[ii] = log_like[ii, self.k_prev]
313 | 
314 |             self.c[k] += 1  # update counts
315 |             # update event model
316 |             if not event_boundary:
317 |                 # we're in the same event -> update using previous scene
318 |                 assert self.x_prev is not None
319 |                 self.event_models[k].update(self.x_prev, x_curr)
320 |             else:
321 |                 # we're in a new event token -> update the initialization point only
322 |                 self.event_models[k].new_token()
323 |                 self.event_models[k].update_f0(x_curr)
324 | 
325 |             self.x_prev = x_curr  # store the current scene for next trial
326 |             self.k_prev = k  # store the current event for the next trial
327 | 
328 |         if minimize_memory:
329 |             self.clear_event_models()
330 |             self.results = Results()
331 |             self.results.log_post = log_like + log_prior
332 |             return
333 | 
334 |         # calculate Bayesian Surprise
335 |         log_post = log_like[:-1, :] + log_prior[:-1, :]
336 |         log_post -= np.tile(logsumexp(log_post, axis=1), (np.shape(log_post)[1], 1)).T
337 |         surprise = np.concatenate([[0], logsumexp(log_post + log_like[1:, :], axis=1)])
338 | 
339 |         self.results = Results()
340 |         self.results.post = post
341 |         self.results.pe = pe
342 |         self.results.surprise = surprise
343 |         self.results.log_like = log_like
344 |         self.results.log_prior = log_prior
345 |         self.results.e_hat = np.argmax(log_like + log_prior, axis=1)
346 |         self.results.x_hat = x_hat
347 |         self.results.log_loss = logsumexp(log_like + log_prior, axis=1)
348 |         self.results.log_boundary_probability = log_boundary_probability
349 |         # # this is a debugging thing
350 |         self.results.restart_prob = restart_prob
351 |         self.results.repeat_prob = repeat_prob
352 | 
353 |         return post
354 | 
355 |     def update_single_event(self, x, update=True, save_x_hat=False, generative_predicitons=False):
356 |         """
357 | 
358 |         :param x: this is an n x d array of the n scenes in an event
359 |         :param update: boolean (default True) update the prior and posterior of the event model
360 |         :param save_x_hat: boolean (default False) normally, we don't save this as the interpretation can be tricky
361 |         N.b: unlike the posterior calculation, this is done at the level of individual scenes within the
362 |         events (and not one per event)
363 |         :return:
364 |         """
365 |         if update:
366 |             self.k += 1
367 |             self._update_state(x, self.k)
368 | 
369 |             n_scene = np.shape(x)[0]
370 | 
371 |             # pull the relevant items from the results
372 |             if self.results is None:
373 |                 self.results = Results()
374 |                 post = np.zeros((1, self.k))
375 |                 log_like = np.zeros((1, self.k)) - np.inf
376 |                 log_prior = np.zeros((1, self.k)) - np.inf
377 |                 if save_x_hat:
378 |                     x_hat = np.zeros((n_scene, self.d))
379 |                     sigma = np.zeros((n_scene, self.d))
380 |                 if generative_predicitons:
381 |                     x_hat_gen = np.zeros((n_scene, self.d))
382 | 
383 |             else:
384 |                 post = self.results.post
385 |                 log_like = self.results.log_like
386 |                 log_prior = self.results.log_prior
387 |                 if save_x_hat:
388 |                     x_hat = self.results.x_hat
389 |                     sigma = self.results.sigma
390 |                 if generative_predicitons:
391 |                     x_hat_gen = self.results.x_hat_gen
392 | 
393 |                 # extend the size of the posterior, etc
394 | 
395 |                 n, k0 = np.shape(post)
396 |                 while k0 < self.k:
397 |                     post = np.concatenate([post, np.zeros((n, 1))], axis=1)
398 |                     log_like = np.concatenate([log_like, np.zeros((n, 1)) - np.inf], axis=1)
399 |                     log_prior = np.concatenate([log_prior, np.zeros((n, 1)) - np.inf], axis=1)
400 |                     n, k0 = np.shape(post)
401 | 
402 |                 # extend the size of the posterior, etc
403 |                 post = np.concatenate([post, np.zeros((1, self.k))], axis=0)
404 |                 log_like = np.concatenate([log_like, np.zeros((1, self.k)) - np.inf], axis=0)
405 |                 log_prior = np.concatenate([log_prior, np.zeros((1, self.k)) - np.inf], axis=0)
406 |                 if save_x_hat:
407 |                     x_hat = np.concatenate([x_hat, np.zeros((n_scene, self.d))], axis=0)
408 |                     sigma = np.concatenate([sigma, np.zeros((n_scene, self.d))], axis=0)
409 | 
410 |                 if generative_predicitons:
411 |                     x_hat_gen = np.concatenate([x_hat_gen, np.zeros((n_scene, self.d))], axis=0)
412 |         else:
413 |             log_like = np.zeros((1, self.k)) - np.inf
414 |             log_prior = np.zeros((1, self.k)) - np.inf
415 | 
416 |         # calculate sCRP prior
417 |         prior = self._calculate_unnormed_sCRP(self.k_prev)
418 | 
419 |         # likelihood
420 |         active = np.nonzero(prior)[0]
421 |         lik = np.zeros((n_scene, len(active)))
422 | 
423 |         # again, this is a readout of the model only and not used for updating,
424 |         # but also keep track of the within event posterior
425 |         map_prediction = np.zeros(np.shape(x))
426 |         k_within_event = np.argmax(prior)  # prior to the first scene within an event having been observed, the
427 |         # prior determines what the event type will be
428 | 
429 |         if save_x_hat:
430 |             _x_hat = np.zeros((n_scene, self.d))  # temporary storre
431 |             _sigma = np.zeros((n_scene, self.d))
432 | 
433 |         if generative_predicitons:
434 |             _x_hat_gen = np.zeros((n_scene, self.d)) 
435 | 
436 |         for ii, x_curr in enumerate(x):
437 | 
438 |             # we need to maintain a distribution over possible event types for the current events --
439 |             # this gets locked down after termination of the event.
440 |             # Also: none of the event models can be updated until *after* the event has been observed
441 | 
442 |             # special case the first scene within the event
443 |             if ii == 0:
444 |                 event_boundary = True
445 |             else:
446 |                 event_boundary = False
447 | 
448 |             # loop through each potentially active event model
449 |             for k0 in active:
450 |                 if k0 not in self.event_models.keys():
451 |                     new_model = self.f_class(self.d, **self.f_opts)
452 |                     if self.model is None:
453 |                         self.model = new_model.init_model()
454 |                     else:
455 |                         new_model.set_model(self.session, self.model)
456 |                     self.event_models[k0] = new_model
457 | 
458 |                 # get the log likelihood for each event model
459 |                 model = self.event_models[k0]
460 | 
461 |                 if not event_boundary:
462 |                     lik[ii, k0] = model.log_likelihood_sequence(x[:ii, :].reshape(-1, self.d), x_curr)
463 |                 else:
464 |                     lik[ii, k0] = model.log_likelihood_f0(x_curr)
465 | 
466 |             if event_boundary:
467 |                 map_prediction[ii, :] = self.event_models[k_within_event].predict_f0()
468 |             else:
469 |                 map_prediction[ii, :] = self.event_models[k_within_event].predict_next_generative(x[:ii, :])
470 | 
471 |             # for the purpose of calculating a prediction error and a prediction error only, calculate
472 |             # a within event estimate of the event type (the real estimate is at the end of the event,
473 |             # taking into account the accumulated evidence
474 |             k_within_event = np.argmax(np.sum(lik[:ii+1, :len(active)], axis=0) + np.log(prior[:len(active)]))
475 |             if save_x_hat:
476 |                 model = self.event_models[k_within_event]
477 |                 _sigma[ii, :] = model.get_variance()
478 |                 if ii > 0:
479 |                     _x_hat[ii, :] = model.predict_next_generative(x[:ii, :])
480 |                 else:
481 |                     _x_hat[ii, :] = model.predict_f0()
482 | 
483 |             if ii == 1 and generative_predicitons:
484 |                 # create a generative prediction of the model, conditioned on the first experienced scene
485 |                 # for now, this is code specific to silvy's simluations
486 |                 model = self.event_models[k_within_event]
487 |                 _x_hat_gen[0, :] = x[0, :]
488 |                 _x_hat_gen[1, :] = x[1, :]
489 |                 for jj in range(2, n_scene):
490 |                     _x_hat_gen[jj, :] = model.predict_next_generative(x[:jj, :])
491 | 
492 | 
493 |         # cache the diagnostic measures
494 |         log_like[-1, :len(active)] = np.sum(lik, axis=0)
495 |         log_prior[-1, :len(active)+1] = np.log(prior[:len(active)+1])
496 | 
497 |         # calculate surprise
498 |         bayesian_surprise = logsumexp(lik + np.tile(log_prior[-1, :len(active)], (np.shape(lik)[0], 1)), axis=1)
499 | 
500 |         if update:
501 | 
502 |             # at the end of the event, find the winning model!
503 |             log_post = log_prior[-1, :len(active)] + log_like[-1, :len(active)]
504 |             post[-1, :len(active)] = np.exp(log_post - logsumexp(log_post))
505 |             k = np.argmax(log_post)
506 | 
507 |             # update the prior
508 |             self.c[k] += n_scene
509 |             # cache for next event
510 |             self.k_prev = k
511 | 
512 |             # update the winning model's estimate
513 |             self.event_models[k].update_f0(x[0])
514 |             x_prev = x[0]
515 |             for X0 in x[1:]:
516 |                 self.event_models[k].update(x_prev, X0)
517 |                 x_prev = X0
518 | 
519 |             self.results.post = post
520 |             self.results.log_like = log_like
521 |             self.results.log_prior = log_prior
522 |             self.results.e_hat = np.argmax(post, axis=1)
523 |             self.results.log_loss = logsumexp(log_like + log_prior, axis=1)
524 | 
525 |             if save_x_hat:
526 |                 x_hat[-n_scene:, :] = _x_hat
527 |                 sigma[-n_scene:, :] = _sigma
528 |                 self.results.x_hat = x_hat
529 |                 self.results.sigma = sigma
530 | 
531 |             if generative_predicitons:
532 |                 x_hat_gen[-n_scene:, :] = _x_hat_gen
533 |                 self.results.x_hat_gen = x_hat_gen
534 | 
535 |         return bayesian_surprise, map_prediction
536 | 
537 |     def init_for_boundaries(self, list_events):
538 |         # update internal state
539 | 
540 |         k = 0
541 |         self._update_state(np.concatenate(list_events, axis=0), k)
542 |         del k  # use self.k and self.d
543 | 
544 |         # store a compiled version of the model and session for reuse
545 |         if self.k_prev is None:
546 |             self.session = tf.Session()
547 |             K.set_session(self.session)
548 | 
549 |             # initialize the first event model
550 |             new_model = self.f_class(self.d, **self.f_opts)
551 |             self.model = new_model.init_model()
552 | 
553 |             self.event_models[0] = new_model
554 | 
555 |     def run_w_boundaries(self, list_events, progress_bar=True, leave_progress_bar=True, save_x_hat=False, 
556 |                          generative_predicitons=False):
557 |         """
558 |         This method is the same as the above except the event boundaries are pre-specified by the experimenter
559 |         as a list of event tokens (the event/schema type is still inferred).
560 | 
561 |         One difference is that the event token-type association is bound at the last scene of an event type.
562 |         N.B. ! also, all of the updating is done at the event-token level.  There is no updating within an event!
563 | 
564 |         evaluate the probability of each event over the whole token
565 | 
566 | 
567 |         Parameters
568 |         ----------
569 |         list_events: list of n x d arrays -- each an event
570 | 
571 | 
572 |         progress_bar: bool
573 |             use a tqdm progress bar?
574 | 
575 |         leave_progress_bar: bool
576 |             leave the progress bar after completing?
577 | 
578 |         save_x_hat: bool
579 |             save the MAP scene predictions?
580 | 
581 |         Return
582 |         ------
583 |         post: n_e by k array of posterior probabilities
584 | 
585 |         """
586 | 
587 |         # loop through the other events in the list
588 |         if progress_bar:
589 |             def my_it(iterator):
590 |                 return tqdm(iterator, desc='Run SEM', leave=leave_progress_bar)
591 |         else:
592 |             def my_it(iterator):
593 |                 return iterator
594 | 
595 |         self.init_for_boundaries(list_events)
596 | 
597 |         for x in my_it(list_events):
598 |             self.update_single_event(x, save_x_hat=save_x_hat, generative_predicitons=generative_predicitons)
599 | 
600 |     def clear_event_models(self):
601 |         for e in self.event_models.itervalues():
602 |             e.model = None
603 |         self.event_models = None
604 |         tf.reset_default_graph()  # for being sure
605 |         K.clear_session()
606 | 
607 | 
608 | 
609 | def clear_sem(sem_model):
610 |     """ This function deletes sem from memory"""
611 |     assert type(sem_model) == SEM
612 |     sem_model.clear_event_models()
613 |     sem_model.results = None
614 |     return None
615 | 


--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def unroll_data(x, t=1):
 5 |     """
 6 |     This function is used by recurrent neural nets to do back-prop through time.
 7 | 
 8 |     Unrolls a data_set for with time-steps, truncated for t time-steps
 9 |     appends t-1 D-dimensional zero vectors at the beginning.
10 | 
11 |     Parameters:
12 |         x: array, shape (N, D) or shape (D,)
13 | 
14 |         t: int
15 |             time-steps to truncate the unroll
16 | 
17 |     output
18 |     ------
19 | 
20 |         X_unrolled: array, shape (N-1, t, D)
21 | 
22 |     """
23 |     if np.ndim(x) == 2:
24 |         n, d = np.shape(x)
25 |     elif np.ndim(x):
26 |         n, d = 1, np.shape(x)[0]
27 |         x = np.reshape(x, (1, d))
28 | 
29 |     x_unrolled = np.zeros((n, t, d))
30 | 
31 |     # append a t-1 blank (zero) input patterns to the beginning
32 |     data_set = np.concatenate([np.zeros((t - 1, d)), x])
33 | 
34 |     for ii in range(n):
35 |         x_unrolled[ii, :, :] = data_set[ii: ii + t, :]
36 | 
37 |     return x_unrolled
38 | 
39 | # precompute for speed (doesn't really help but whatever)
40 | log_2pi = np.log(2.0 * np.pi)
41 | 
42 | def fast_mvnorm_diagonal_logprob(x, variances):
43 |     """
44 |     Assumes a zero-mean mulitivariate normal with a diagonal covariance function
45 | 
46 |     Parameters:
47 | 
48 |         x: array, shape (D,)
49 |             observations
50 | 
51 |         variances: array, shape (D,)
52 |             Diagonal values of the covariance function
53 | 
54 |     output
55 |     ------
56 | 
57 |         log-probability: float
58 | 
59 |     """
60 |     return -0.5 * (log_2pi * np.shape(x)[0] + np.sum(np.log(variances) + (x**2) / variances ))
61 |     


--------------------------------------------------------------------------------
/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from hrr import *


--------------------------------------------------------------------------------
/opt/csw_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/opt/csw_utils.pyc


--------------------------------------------------------------------------------
/opt/hrr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.preprocessing import normalize
 3 | 
 4 | 
 5 | def embed_gaussian(d, n=1):
 6 |     """
 7 |     returns n normal vectors with variance = 1/n, inline with Plate's caluclations
 8 | 
 9 |     :param d: (int), dimensions of the embedding
10 |     :param n: (int, default=1), number of embeddings to return
11 | 
12 |     :return: d-length np.array
13 |     """
14 |     return np.random.normal(loc=0., scale=1./np.sqrt(d), size=(n, d))
15 | 
16 | 
17 | def conv_circ(signal, kernal, n=None):
18 |     '''
19 |     Parameters
20 |     ----------
21 | 
22 |     signal: array of length D
23 | 
24 |     ker: array of length D
25 | 
26 |     Returns
27 |     -------
28 | 
29 |     array of length D
30 | 
31 |     '''
32 |     if n == None:
33 |         n = len(signal) + len(kernal) - 1
34 | 
35 |     return np.real(np.fft.ifft(np.fft.fft(signal, n) * np.fft.fft(kernal, n)))
36 | 
37 | 
38 | def plate_formula(n, k, err):
39 |     '''
40 |     Determine the number of dimensions needed according to Plate's (2003)
41 |     formula:
42 |       D = 3.16(K-0.25)ln(N/err^3)
43 |     where D is the number of dimensions, K is the maximum number of terms
44 |     to be combined, N is the number of atomic values in the language, and
45 |     err is the probability of error.
46 | 
47 |     USAGE: D = plate_formula(n, k, err)
48 |     '''
49 |     return int(round(3.16 * (k - 0.25) * (np.log(n) - 3 * np.log(err))))
50 | 
51 | 
52 | def embed(n, d, distr='spikeslab_gaussian', param=None):
53 |     # Embed symbols in a vector space.
54 |     #
55 |     # USAGE: X = embed(n, d, distr='spikeslab_gaussian', param=None)
56 |     #
57 |     # INPUTS:
58 |     #   n - number of symbols
59 |     #   d - number of dimensions
60 |     #   distr - string specifying the distribution on the vector space:
61 |     #           'spikeslab_gaussian' - mixture of Gaussian "slab" and Bernoulli "spike"
62 |     #           'spikeslab_uniform' - mixture of uniform "slab" and Bernoulli "spike"
63 |     #
64 |     #   param (optional) - parameters of the distribution:
65 |     #                      'spikeslab_gaussian' - param = [variance, spike probability] (default: [1 1])
66 |     #                      'spikeslab_uniform' - param = [bound around 0, spike probability] (default: [1 1])
67 |     # OUTPUTS;
68 |     #   X - [N x D] matrix
69 |     #
70 |     # Sam Gershman, Jan 2013
71 | 
72 |     if param is None:
73 |         param = [1, 1]
74 |     spike = np.round(np.random.rand(n, d) < param[1])
75 | 
76 |     if distr == 'spikeslab_gaussian':
77 |         slab = np.random.randn(n, d) * param[1]
78 |     elif distr == 'spikeslab_uniform':
79 |         slab = np.random.uniform(-param[1], param[1], (n, d))
80 |     else:
81 |         raise (Exception)
82 | 
83 |     return spike * slab
84 | 
85 | 
86 | def encode(a, b):
87 |     return conv_circ(a, b, np.size(a))
88 | 
89 | 
90 | def embed_onehot(n, d):
91 |     v = np.zeros((n, d))
92 |     for ii in range(n):
93 |         v[ii][np.random.randint(d)] = 1
94 |     return v
95 | 
96 | 
97 | def decode(a, b):
98 |     c = np.real(np.fft.ifft(np.fft.fft(a, np.size(a)) * np.conj(np.fft.fft(b, np.size(a)))))
99 |     return c / np.size(a)


--------------------------------------------------------------------------------
/opt/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import models
 4 | import cPickle as pickle
 5 | from sklearn.metrics import adjusted_rand_score
 6 | 
 7 | 
 8 | def generate_random_events(n_events, data_file=None):
 9 |     """
10 | 
11 |     Parameters
12 |     ----------
13 |     n_events: int
14 | 
15 |     data_file: str
16 |         full file path of the Reynolds, Braver, & Zachs data.
17 |         contains pandas dataframe (pickled) with 13 events of
18 |         8-12 time-points and 54 dimensions
19 | 
20 |     :return:
21 |     """
22 | 
23 |     if data_file is None:
24 |         data_file = './datasets/motion_data.pkl'
25 |     motion_data = pd.read_pickle(data_file)
26 |     n_patterns = len(set(motion_data.EventNumber))
27 | 
28 |     z = np.mean(np.linalg.norm(motion_data.values[:, :-1], axis=1))
29 | 
30 |     X = []
31 |     y = []
32 |     p_prev = -1
33 |     for _ in range(n_events):
34 |         while True:
35 |             p = np.random.randint(n_patterns)
36 |             if p != p_prev:
37 |                 p_prev = p
38 |                 break
39 |         e = motion_data.loc[motion_data.EventNumber == p, :].values[:, :-1]
40 |         X.append(e / z)
41 |         y.append([p] * e.shape[0])
42 |     return np.concatenate(X), np.concatenate(y)
43 | 
44 | 
45 | def evaluate(x, y, omega, k=None, number=0, save=False, list_event_boundaries=None):
46 |     """
47 | 
48 |     Parameters
49 |     ----------
50 |     x: NxD array
51 |         scene vectors
52 | 
53 |     y: array of length N
54 |         true class labels
55 | 
56 |     omega: dict
57 |         dictionary of kwargs for the SEM model
58 | 
59 |     k: int
60 |         maximum number of clusters
61 | 
62 | 
63 |     Return
64 |     ------
65 |         r: int, adjusted rand score
66 |     """
67 | 
68 |     sem = models.SEM(**omega)
69 | 
70 |     if k is None:
71 |         k = x.shape[0] / 2
72 | 
73 |     sem.run(x, k=k)
74 | 
75 |     y_hat = np.argmax(sem.results.post, axis=1)
76 | 
77 |     r = adjusted_rand_score(y, y_hat)
78 | 
79 |     if save:
80 |         f = open('SEM_sample_%d.save' % number, 'wb')
81 | 
82 |         pickle.dump({'AdjRandScore': r, 'Omega': omega}, f)
83 |         f.close()
84 |         return
85 | 
86 |     return sem, r
87 | 
88 | 
89 | # generate random string
90 | #
91 | def randstr(N=10):
92 |     import string
93 |     import random
94 |     return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N))
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     pass
99 | 


--------------------------------------------------------------------------------
/simulations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/__init__.py


--------------------------------------------------------------------------------
/simulations/exp_dubrow.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from models import SEM, clear_sem
  4 | from models.memory import evaluate_seg
  5 | from models.memory import gibbs_memory_sampler
  6 | from tqdm import tqdm
  7 | from sklearn.metrics import adjusted_rand_score
  8 | import sys
  9 | 
 10 | def generate_experiment(seed=None, scaling_factor=1.0, event_duration=5, n_events=5, d=25):
 11 | 
 12 |     n = event_duration * n_events
 13 | 
 14 |     if seed:
 15 |         np.random.seed(seed)
 16 | 
 17 |     x = np.random.randn(n, d)
 18 |     e = np.zeros(n, dtype=int)
 19 | 
 20 |     # embed a similarity structure within the items of each category
 21 |     # by adding the same random vector to all of the items within the
 22 |     # category
 23 |     categ_one = (np.random.randn(1, d)) * scaling_factor
 24 |     categ_two = (np.random.randn(1, d)) * scaling_factor
 25 | 
 26 |     for ii in range(n_events):
 27 |         if ii % 2 == 0:
 28 |             x[ii * event_duration:ii * event_duration + event_duration, :] += categ_one
 29 |             e[ii * event_duration:ii * event_duration + event_duration] = 0
 30 |         else:
 31 |             x[ii * event_duration:ii * event_duration + event_duration, :] += categ_two
 32 |             e[ii * event_duration:ii * event_duration + event_duration] = 1
 33 | 
 34 |     x /= np.sqrt(d)
 35 | 
 36 |     # give the model boundaries....
 37 |     e_tokens = np.concatenate([[False], e[1:] != e[:-1]]).cumsum()
 38 |     x_list_items = []
 39 |     for e0 in set(e_tokens):
 40 |         x_list_items.append(x[e0 == e_tokens, :])
 41 | 
 42 |     return x_list_items, e_tokens
 43 | 
 44 | 
 45 | # diagnostics functions
 46 | 
 47 | def hash_y(y):
 48 |     if y is not None:
 49 |         return np.concatenate([y[0], [y[1]], [y[2]]])
 50 |     else:
 51 |         return y
 52 | 
 53 | 
 54 | def eval_acc(y_samples, y_mem):
 55 |     acc = []
 56 |     for y_sample in y_samples:
 57 |         def item_acc(t0):
 58 |             return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample]))
 59 |         # evaluate the accuracy of the boundary items (here, items 10 and 11)
 60 |         acc.append(np.mean([item_acc(t) for t in range(20)]))
 61 |     return np.mean(acc)
 62 | 
 63 | 
 64 | def evaluate_item_position_acc(y_samples, y_mem, t):
 65 |     acc = []
 66 |     for y_sample in y_samples:
 67 |         def item_acc(t0):
 68 |             return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample]))
 69 |         acc.append(item_acc(t))
 70 |     return np.mean(acc)
 71 | 
 72 | 
 73 | def eval_item_acc(y_samples, y_mem, times):
 74 |     acc = []
 75 |     for y_sample in y_samples:
 76 |         def item_acc(t0):
 77 |             return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample]))
 78 |         # evaluate the accuracy of the boundary items (here, items 10 and 11)
 79 |         acc.append(np.mean([item_acc(t) for t in times]))
 80 |     return np.mean(acc)
 81 | 
 82 | 
 83 | def score_transitions(y_samples, y_mem, t):
 84 |     acc = []
 85 |     idx = np.arange(len(y_mem))
 86 |     for y_sample in y_samples:
 87 |         y_t = [all(hash_y(y0) == hash_y(y_mem[t])) for y0 in y_sample]
 88 |         y_t1 = [all(hash_y(y0) == hash_y(y_mem[t - 1])) for y0 in y_sample]
 89 |         # position accuracy is conditioned on recall
 90 |         if any(y_t):
 91 |             if any(y_t1):
 92 |                 acc.append(idx[y_t][0] == (idx[y_t1][0] + 1))
 93 |             else:
 94 |                 acc.append(False)
 95 |     return np.mean(acc)
 96 | 
 97 | def run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=0):
 98 | 
 99 |     # generate an experiment
100 |     x_list_items, e_tokens = generate_experiment()
101 |     n, d = np.concatenate(x_list_items).shape
102 | 
103 |     pre_locs = [ii for ii in range(len(e_tokens) - 1) if e_tokens[ii] != e_tokens[ii + 1]]
104 |     pst_locs = [ii for ii in range(1, len(e_tokens)) if e_tokens[ii] != e_tokens[ii - 1]]
105 | 
106 |     # Train SEM on the stimuli
107 |     sem = SEM(**sem_kwargs)
108 |     sem.run_w_boundaries(list_events=x_list_items, progress_bar=False)
109 | 
110 |     e_seg = np.reshape([[ii] * np.sum(e_tokens == t, dtype=int) for t, ii in enumerate(sem.results.e_hat)], -1)
111 | 
112 |     # create the corrupted memory trace
113 |     y_mem = list()  # these are list, not sets, for hashability
114 | 
115 |     for t in range(n):
116 |         # n.b. python uses stdev, not var
117 |         x_mem = np.concatenate(x_list_items)[t, :] + np.random.normal(scale= gibbs_kwargs['tau'] ** 0.5, size=d)
118 |         e_mem = [None, e_seg[t]][np.random.rand() < epsilon_e]
119 |         t_mem = t + np.random.randint(-gibbs_kwargs['b'], gibbs_kwargs['b'] + 1)
120 |         y_mem.append([x_mem, e_mem, t_mem])
121 | 
122 |     # add the models to the kwargs
123 |     y_samples, e_samples, x_samples = gibbs_memory_sampler(y_mem, sem, **gibbs_kwargs)
124 | 
125 |     results = pd.DataFrame({
126 |         'Block': [block_number],
127 |         'Adj-r2': [adjusted_rand_score(sem.results.e_hat, np.array([0, 1, 0, 1, 0]))],
128 |         'Recon Segment': evaluate_seg(e_samples, e_seg),
129 |         'Overall Acc': eval_acc(y_samples, y_mem),
130 |         'Pre-Boundary': np.mean([evaluate_item_position_acc(y_samples, y_mem, t) for t in pre_locs]),
131 |         'Boundary': np.mean([evaluate_item_position_acc(y_samples, y_mem, t) for t in pst_locs]),
132 |         'Transitions Pre-Boundary': np.mean([score_transitions(y_samples, y_mem, t) for t in pre_locs]),
133 |         'Transitions Boundary': np.mean([score_transitions(y_samples, y_mem, t) for t in pst_locs]),
134 |         'Pre-boundary Acc': eval_item_acc(y_samples, y_mem, pre_locs),
135 |         'Boundary Acc': eval_item_acc(y_samples, y_mem, pst_locs),
136 |     })
137 |     clear_sem(sem)
138 |     sem = None
139 |     return results
140 | 
141 | def run_subject(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=16, subj_n=0, progress_bar=True):
142 |     subject_results = []
143 | 
144 |     if progress_bar:
145 |         for ii in tqdm(range(n_runs), desc='Running Subject'):
146 |             subject_results.append(run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=ii))
147 |     else:
148 |         for ii in range(n_runs):
149 |             subject_results.append(run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=ii))
150 |             
151 |     subject_results = pd.concat(subject_results)
152 |     subject_results['Subject'] = [subj_n] * len(subject_results)
153 |     return subject_results
154 | 
155 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=16, n_batch=8):
156 |     batch_results = []
157 |     for ii in range(n_batch):
158 |         sys.stdout.write("Beginning batch {} of {}\n".format(ii, n_batch))
159 |         batch_results.append(run_subject(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=n_runs, subj_n=ii))
160 |     return pd.concat(batch_results)
161 |     
162 | 


--------------------------------------------------------------------------------
/simulations/exp_pettijohn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from models.sem import SEM, clear_sem
  4 | from sklearn.metrics import adjusted_rand_score
  5 | from models.memory import reconstruction_accuracy, evaluate_seg
  6 | from models.memory import gibbs_memory_sampler
  7 | 
  8 | 
  9 | def generate_task(n=20, d=25):
 10 |     items = np.random.randn(n, d)
 11 | 
 12 |     no_switch_context = np.tile(np.random.randn(1, d), (n, 1))
 13 |     switch_context = np.concatenate([np.tile(np.random.randn(1, d), (n / 2, 1)),
 14 |                                      np.tile(np.random.randn(1, d), (n / 2, 1))], axis=0)
 15 | 
 16 |     x_noswitch = items + no_switch_context
 17 |     x_switch = items + switch_context
 18 | 
 19 |     x_noswitch /= np.sqrt(d)
 20 |     x_switch /= np.sqrt(d)
 21 | 
 22 |     # break the stimuli into two lists for one set of stim and one list for the other
 23 |     x_list_no_switch = [x_noswitch]
 24 |     x_list_switch = [x_switch[:n / 2, :], x_switch[n / 2:, :]]
 25 |     return x_list_no_switch, x_list_switch
 26 | 
 27 | 
 28 | def evaluate_bound_acc(y_samples, y_mem):
 29 |     acc = []
 30 |     for y_sample in y_samples:
 31 |         #
 32 |         def item_acc(t):
 33 |             return np.float(any([all(yt[0] == y_mem[t][0]) for yt in y_sample if yt != None]))
 34 | 
 35 |         # evaluate the accuracy of the boundary items (here, items 10 and 11)
 36 |         acc.append(np.mean([item_acc(t) for t in [10, 11]]))
 37 |     return np.mean(acc)
 38 | 
 39 | 
 40 | def evaluate_non_bound_acc(y_samples, y_mem):
 41 |     acc = []
 42 |     for y_sample in y_samples:
 43 |         def item_acc(t):
 44 |             return np.float(any([all(yt[0] == y_mem[t][0]) for yt in y_sample if yt != None]))
 45 | 
 46 |         # evaluate the accuracy of the boundary items (here, items 10 and 11)
 47 |         acc.append(np.mean([item_acc(t) for t in range(20) if (t != 10) & (t != 11)]))
 48 |     return np.mean(acc)
 49 | 
 50 | 
 51 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e, batch_number=0):
 52 | 
 53 |     x_list_no_switch, x_list_switch = generate_task()
 54 |     n, d = np.concatenate(x_list_switch).shape
 55 | 
 56 |     # run through with the switch condition
 57 |     sem_switch = SEM(**sem_kwargs)
 58 |     sem_switch.run_w_boundaries(list_events=x_list_switch, leave_progress_bar=False)
 59 | 
 60 |     # create the corrupted memory traces
 61 |     y_mem_switch = list()  # these are list, not sets, for hashability
 62 |     y_mem_noswitch = list()  # these are list, not sets, for hashability
 63 | 
 64 |     for t in range(n):
 65 |         # n.b. python uses stdev, not var
 66 |         x_mem = x_list_switch[t / 10][t % 10, :] + np.random.normal(scale=gibbs_kwargs['tau'] ** 0.5, size=d)
 67 |         e_mem = [None, sem_switch.event_models.keys()[t / (n / 2)]][np.random.rand() < epsilon_e]
 68 |         t_mem = t + np.random.randint(-gibbs_kwargs['b'], gibbs_kwargs['b'] + 1)
 69 |         y_mem_switch.append([x_mem, e_mem, t_mem])
 70 | 
 71 |         # do the no-switch condition ahead of time
 72 |         e_mem = [None, 0][np.random.rand() < epsilon_e]
 73 |         y_mem_noswitch.append([x_mem, e_mem, t_mem])
 74 | 
 75 |     # sample from memory
 76 |     gibbs_kwargs['y_mem'] = y_mem_switch
 77 |     gibbs_kwargs['sem_model'] = sem_switch
 78 |     y_samples, e_samples, _ = gibbs_memory_sampler(**gibbs_kwargs)
 79 | 
 80 |     results = pd.DataFrame({
 81 |         'Condition': 'Shift',
 82 |         'r2': adjusted_rand_score(sem_switch.results.e_hat, np.array([0, 1])),
 83 |         'Reconstruction Segementation': evaluate_seg(e_samples, np.concatenate([[e0] * 10 for e0 in sem_switch.event_models])),
 84 |         'Overall Acc': reconstruction_accuracy(y_samples, y_mem_switch).mean(),
 85 |         'Non-boundary Acc': evaluate_bound_acc(y_samples, y_mem_switch),
 86 |         'Boundary Acc': evaluate_non_bound_acc(y_samples, y_mem_switch),
 87 |         'Batch': [batch_number],
 88 |     }, index=[batch_number])
 89 |     clear_sem(sem_switch)
 90 |     sem_switch = None
 91 | 
 92 |     # run through with the no-switch condition
 93 |     sem_no_switch = SEM(**sem_kwargs)
 94 |     sem_no_switch.run_w_boundaries(list_events=x_list_no_switch, leave_progress_bar=False)
 95 | 
 96 |     gibbs_kwargs['y_mem'] = y_mem_noswitch
 97 |     gibbs_kwargs['sem_model'] = sem_no_switch
 98 |     y_samples, e_samples, x_samples = gibbs_memory_sampler(**gibbs_kwargs)
 99 | 
100 |     results = pd.concat([results, pd.DataFrame({
101 |         'Condition': 'No-Shift',
102 |         'Overall Acc': reconstruction_accuracy(y_samples, y_mem_noswitch).mean(),
103 |         'Non-boundary Acc': evaluate_bound_acc(y_samples, y_mem_noswitch),
104 |         'Boundary Acc': evaluate_non_bound_acc(y_samples, y_mem_noswitch),
105 |         'Batch': [batch_number],
106 |     }, index=[batch_number])], sort=True)
107 |     clear_sem(sem_no_switch)
108 |     sem_no_switch = None
109 | 
110 |     return results


--------------------------------------------------------------------------------
/simulations/exp_radvansky.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | import sys
  4 | sys.path.append('./')
  5 | sys.path.append('../')
  6 | from opt import encode
  7 | import pandas as pd
  8 | from models.memory import reconstruction_accuracy, evaluate_seg
  9 | from models.memory import gibbs_memory_sampler
 10 | from scipy.special import logsumexp
 11 | from sklearn.preprocessing import normalize
 12 | from models.sem import clear_sem, SEM
 13 | 
 14 | 
 15 | def make_task(d=25, n_rooms=15):
 16 |     # note: in the experiment there were 66 events and 51 probes
 17 |     verbs = {v: np.random.randn(1, d) / np.sqrt(d) for v in 'enter put_down pick_up leave'.split()}
 18 |     objects_a = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)}
 19 |     objects_b = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)}
 20 |     ctx = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)}
 21 | 
 22 |     # to control the variance of the embedded verbs, each is bound to the same null token if
 23 |     # the sentence has not object
 24 |     null_token = np.random.randn(1, d) / np.sqrt(d)
 25 | 
 26 |     list_events = []
 27 | 
 28 |     list_objects = []
 29 |     for ii in range(n_rooms):
 30 |         event = np.tile(ctx[ii], (4, 1))
 31 |         event += np.concatenate([
 32 |             verbs['enter'],
 33 |             objects_a[ii],
 34 |             objects_b[ii],
 35 |             verbs['leave'],
 36 |         ])
 37 |         list_events.append(event)
 38 |         list_objects.append([objects_a[ii], objects_b[ii]])
 39 | 
 40 |     return list_events, list_objects
 41 | 
 42 | 
 43 | 
 44 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e_switch=0.25, epsilon_e_noswitch=0.75, 
 45 |           gamma=2.5, n_rooms=25, progress_bar=True):
 46 | 
 47 |     sem_model = SEM(**sem_kwargs)
 48 |     _gibbs_kwargs = {k: v for k, v in gibbs_kwargs.iteritems() if k != 'e_true'}
 49 | 
 50 |     acc = []
 51 |     list_events, list_objects = make_task(n_rooms=n_rooms)
 52 |     sem_model.init_for_boundaries(list_events)
 53 | 
 54 |     if progress_bar:
 55 |         def my_it(iterator):
 56 |             return tqdm(iterator, desc='Run SEM', leave=False, total=len(list_events))
 57 |     else:
 58 |         def my_it(iterator):
 59 |             return iterator
 60 | 
 61 |     y_mem_switch = list()
 62 |     for itt, x in my_it(enumerate(list_events)):
 63 | 
 64 |         sem_model.update_single_event(x)
 65 |         n_items, d = np.shape(x)
 66 |         e_list = np.concatenate([[sem_model.results.e_hat[itt]] * n_items for t in range(n_rooms)])
 67 | 
 68 | 
 69 |         # create a corrupted memory trace for the switch condition
 70 |         y_mem_noswitch = [yi for yi in y_mem_switch]
 71 |         for t in range(n_items):
 72 |             x_mem = x[t, :] + np.random.normal(scale= _gibbs_kwargs['tau'] ** 0.5, size=d) # note, python uses stdev, not var
 73 |             e_mem = [None, sem_model.results.e_hat[-1]][np.random.rand() < epsilon_e_switch]
 74 |             t_mem = t + np.random.randint(-_gibbs_kwargs['b'], _gibbs_kwargs['b'] + 1)
 75 |             y_mem_switch.append([x_mem, e_mem, t_mem])
 76 | 
 77 |             # for the no-switch condition
 78 |             e_mem = [None, sem_model.results.e_hat[-1]][np.random.rand() < epsilon_e_noswitch]
 79 |             y_mem_noswitch.append([x_mem, e_mem, t_mem])
 80 | 
 81 |         # for speed, just reconstruct the past 3 events at max
 82 |         if len(y_mem_switch) > 3 * 2:
 83 |             y_mem_switch = y_mem_switch[-6:]
 84 |             y_mem_noswitch = y_mem_noswitch[-6:]
 85 |             e_list = e_list[-6:]
 86 | 
 87 |         # reconstruct (Switch)
 88 |         _gibbs_kwargs['y_mem'] = y_mem_switch
 89 |         _gibbs_kwargs['sem_model'] = sem_model
 90 |         y_samples, e_samples, x_samples = gibbs_memory_sampler(**_gibbs_kwargs)
 91 |         x_samples = np.array(x_samples)
 92 | 
 93 |         item_acc = reconstruction_accuracy(y_samples=y_samples, y_mem=y_mem_switch)
 94 | 
 95 |         # evaluate the probability of the associated vs dissociated items
 96 |         obj_a, obj_b = list_objects[itt]
 97 |         x_samples_ii = np.reshape(x_samples[:, -2:, :], (-1, d))
 98 |         p_a_greater_than_b = \
 99 |             -logsumexp(-np.linalg.norm(x_samples_ii - obj_a, axis=1) * gamma) < \
100 |             -logsumexp(-np.linalg.norm(x_samples_ii - obj_b, axis=1) * gamma)
101 | 
102 |         # use the correct scoring method
103 |         acc.append({
104 |             'Room Number': itt,
105 |             'Condition': 'Switch',
106 |             'Reconstruction Accuracy': item_acc.mean(),
107 |             'Last Room Reconstruction Acc': item_acc[-2:].mean(),
108 |             'Pr(A > B)': p_a_greater_than_b,
109 |             'Reconstruction Segementation': evaluate_seg(e_samples, e_list),
110 |         })
111 |         
112 |         # clear things from memory
113 |         y_samples, e_samples, x_samples = None, None, None
114 | 
115 |         # reconstruct (No-Switch)
116 |         _gibbs_kwargs['y_mem'] = y_mem_noswitch
117 |         y_samples, e_samples, x_samples = gibbs_memory_sampler(**_gibbs_kwargs)
118 |         x_samples = np.array(x_samples)
119 |         item_acc = reconstruction_accuracy(y_samples=y_samples, y_mem=y_mem_noswitch)
120 | 
121 |         # evaluate the probability of the associated vs dissociated items
122 |         obj_a, obj_b = list_objects[itt]
123 |         x_samples_ii = np.reshape(x_samples[:, -2:, :], (-1, d))
124 |         p_a_greater_than_b = \
125 |             -logsumexp(-np.linalg.norm(x_samples_ii - obj_a, axis=1) * gamma) < \
126 |             -logsumexp(-np.linalg.norm(x_samples_ii - obj_b, axis=1) * gamma)
127 | 
128 |         # use the correct scoring method
129 |         acc.append({
130 |             'Room Number': itt,
131 |             'Condition': 'No-Switch',
132 |             'Last Room Reconstruction Acc': item_acc[-2:].mean(),
133 |             'Reconstruction Accuracy': item_acc.mean(),
134 |             'Pr(A > B)': p_a_greater_than_b,
135 |             'Reconstruction Segementation': evaluate_seg(e_samples, e_list),
136 |         })
137 |         # clear things from memory
138 |         y_samples, e_samples, x_samples = None, None, None
139 | 
140 |     # clear SEM from memory
141 |     clear_sem(sem_model)
142 |     sem_model = None
143 | 
144 |     return pd.DataFrame(acc)
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     pass


--------------------------------------------------------------------------------
/simulations/exp_schapiro.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from models import SEM, clear_sem
  3 | from sklearn import metrics
  4 | import pandas as pd
  5 | from scipy.special import logsumexp
  6 | 
  7 | def logsumexp_mean(x):
  8 |     return logsumexp(x) - np.log(len(x))
  9 | 
 10 | def batch_experiment(sem_kwargs, n_train=1400, n_test=600, progress_bar=True):
 11 | 
 12 |     # define the graph structure for the experiment
 13 | 
 14 |     g = np.array([
 15 |         [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 16 |         [1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 17 |         [1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 18 |         [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 19 |         [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 20 |         [0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
 21 |         [0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0],
 22 |         [0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0],
 23 |         [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0],
 24 |         [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0],
 25 |         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0],
 26 |         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1],
 27 |         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1],
 28 |         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1],
 29 |         [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
 30 |     ], dtype=float)
 31 | 
 32 |     # define the random vectors
 33 |     d = 25
 34 |     items = np.random.randn(15, d) / np.sqrt(d)
 35 | 
 36 |     # draw random walks on the graph
 37 |     def sample_pmf(pmf):
 38 |         return np.sum(np.cumsum(pmf) < np.random.uniform(0, 1))
 39 | 
 40 |     train_nodes = [np.random.randint(15)]
 41 |     for _ in range(n_train-1):
 42 |         train_nodes.append(sample_pmf(g[train_nodes[-1]] / g[train_nodes[-1]].sum()))
 43 |         
 44 |     # draw hamiltonian paths from the graph
 45 | 
 46 |     # this graph defines the same thing but a preference order as well
 47 |     # higher number are  c
 48 |     preferred_nodes = np.array([
 49 |         [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0],
 50 |     ], dtype=float)
 51 | 
 52 |     def sample_hamilton(node0):
 53 |         is_visited = np.zeros(15, dtype=bool)
 54 |         counter = 0
 55 |         nodes = []
 56 |         while counter < (len(is_visited)):
 57 |             p = g[node0] * ~is_visited * preferred_nodes
 58 |             if np.sum(p) == 0:
 59 |                 p = g[node0] * ~is_visited
 60 | 
 61 |             node0 = sample_pmf(p / np.sum(p))
 62 |             nodes.append(node0)
 63 |             is_visited[node0] = True
 64 |             counter += 1
 65 |         return nodes
 66 | 
 67 |     test_nodes = []
 68 |     node0 = np.random.randint(15)
 69 |     for _ in range(n_test / 15):
 70 |         test_nodes += sample_hamilton(node0)
 71 |         node0 = test_nodes[-1]
 72 | 
 73 |     # embed the vectors
 74 |     all_nodes = train_nodes + test_nodes
 75 |     x = []
 76 |     for node in all_nodes:
 77 |         x.append(items[node])
 78 |     x = np.array(x)
 79 |     
 80 |     sem_model = SEM(**sem_kwargs)
 81 |     sem_model.run(x, progress_bar=progress_bar)
 82 | 
 83 |     
 84 |     # prepared diagnostic measures
 85 |     clusters = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
 86 |     node_cluster = []
 87 |     for node in test_nodes:
 88 |         node_cluster.append(clusters[node])
 89 |     node_cluster = np.array(node_cluster)
 90 |     
 91 |     all_node_cluster = []
 92 |     for node in all_nodes:
 93 |         all_node_cluster.append(clusters[node])
 94 |     all_node_cluster = np.array(all_node_cluster)
 95 |     all_boundaries_true = np.concatenate([[False], (all_node_cluster[1:] != all_node_cluster[:-1])])
 96 |     
 97 |     test_boundaries = sem_model.results.e_hat[n_train-1:-1] != sem_model.results.e_hat[n_train:]
 98 |     boundaries = sem_model.results.e_hat[:n_train-1] != sem_model.results.e_hat[1:n_train]
 99 |     
100 |     test_bound_prob = sem_model.results.log_boundary_probability[n_train:]
101 |     bound_prob = sem_model.results.log_boundary_probability[1:n_train]
102 |     
103 |     # pull the prediction error (Bayesian Suprise)
104 |     
105 |     test_pe = sem_model.results.surprise[n_train:]
106 |     bound_pe = sem_model.results.surprise[1:n_train]
107 |     
108 |     # cache the correlation between log boundary probability and log surprise
109 |     r = np.corrcoef(
110 |         sem_model.results.log_boundary_probability, sem_model.results.surprise
111 |     )[0][1]
112 | 
113 |     
114 |     output =  {
115 |         'Community Transitions (Hamilton)': np.exp(logsumexp_mean(test_bound_prob[all_boundaries_true[1400:]])),
116 |         'Other Parse (Hamilton)': np.exp(logsumexp_mean(test_bound_prob[all_boundaries_true[1400:]==False])),
117 |         'Community Transitions (All Other Trials)': np.exp(logsumexp_mean(bound_prob[all_boundaries_true[1:n_train]])),
118 |         'Other Parse (All Other Trials)': np.exp(logsumexp_mean(bound_prob[all_boundaries_true[1:n_train]==False])),
119 |         'PE Community Transitions (Hamilton)': logsumexp_mean(test_pe[all_boundaries_true[1400:]]),
120 |         'PE Other Parse (Hamilton)': logsumexp_mean(test_pe[all_boundaries_true[1400:]==False]),
121 |         'PE Community Transitions (All Other Trials)': logsumexp_mean(bound_pe[all_boundaries_true[1:n_train]]),
122 |         'PE Other Parse (All Other Trials)': logsumexp_mean(bound_pe[all_boundaries_true[1:n_train]==False]),
123 |         'r':r
124 |     }
125 |     
126 |     # clear_sem_model
127 |     clear_sem(sem_model)
128 |     sem_model = None
129 | 
130 |     return output
131 | 


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_param_sensitivity.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_param_sensitivity.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_0.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_1.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_10.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_10.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_11.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_11.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_12.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_12.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_13.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_13.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_14.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_14.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_15.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_15.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_16.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_16.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_17.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_17.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_18.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_18.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_19.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_19.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_2.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_20.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_20.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_21.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_21.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_22.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_22.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_23.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_23.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_24.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_24.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_3.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_4.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_5.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_6.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_7.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_7.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_8.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_8.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/Dubrow_sim_9.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_9.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl


--------------------------------------------------------------------------------
/simulations/saved_simulations/radvansky_sims.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/radvansky_sims.pkl


--------------------------------------------------------------------------------
/simulations/video_segmentation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import seaborn as sns
  4 | import pandas as pd
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from models import SEM, GRUEvent, clear_sem
  8 | from scipy.stats import multivariate_normal
  9 | from scipy.special import logsumexp
 10 | 
 11 | 
 12 | def segment_video(event_sequence, sem_kwargs):
 13 |     """
 14 |     :param event_sequence: (NxD np.array) the sequence of N event vectors in D dimensions
 15 |     :param sem_kwargs: (dict) all of the parameters for SEM
 16 |     :return:
 17 |     """
 18 |     sem_model = SEM(**sem_kwargs)
 19 |     sem_model.run(event_sequence, k=event_sequence.shape[0], leave_progress_bar=True)
 20 |     log_posterior = sem_model.results.log_like + sem_model.results.log_prior
 21 | 
 22 |     # clean up memory
 23 |     clear_sem(sem_model)
 24 |     sem_model = None
 25 | 
 26 |     return log_posterior
 27 | 
 28 | def bin_times(array, max_seconds, bin_size=1.0):
 29 |     """ Helper function to learn the bin the subject data"""
 30 |     cumulative_binned = [np.sum(array <= t0 * 1000) for t0 in np.arange(bin_size, max_seconds + bin_size, bin_size)]
 31 |     binned = np.array(cumulative_binned)[1:] - np.array(cumulative_binned)[:-1]
 32 |     binned = np.concatenate([[cumulative_binned[0]], binned])
 33 |     return binned
 34 | 
 35 | def load_comparison_data(data, bin_size=1.0):
 36 | 
 37 |     # Movie A is Saxaphone (185s long)
 38 |     # Movie B is making a bed (336s long)
 39 |     # Movie C is doing dishes (255s long)
 40 | 
 41 |     # here, we'll collapse over all of the groups (old, young; warned, unwarned) for now
 42 |     n_subjs = len(set(data.SubjNum))
 43 | 
 44 |     sax_times = np.sort(list(set(data.loc[data.Movie == 'A', 'MS']))).astype(np.float32)
 45 |     binned_sax = bin_times(sax_times, 185, bin_size) / np.float(n_subjs)
 46 | 
 47 |     bed_times = np.sort(list(set(data.loc[data.Movie == 'B', 'MS']))).astype(np.float32)
 48 |     binned_bed = bin_times(bed_times, 336, bin_size) / np.float(n_subjs)
 49 | 
 50 |     dishes_times = np.sort(list(set(data.loc[data.Movie == 'C', 'MS']))).astype(np.float32)
 51 |     binned_dishes = bin_times(dishes_times, 255, bin_size) / np.float(n_subjs)
 52 | 
 53 |     return binned_sax, binned_bed, binned_dishes
 54 | 
 55 | def get_binned_boundary_prop(e_hat, log_post, bin_size=1.0, frequency=30.0):
 56 |     """
 57 |     :param results: SEM.Results
 58 |     :param bin_size: seconds
 59 |     :param frequency: in Hz
 60 |     :return:
 61 |     """
 62 | 
 63 |     # normalize
 64 |     log_post0 = log_post - np.tile(np.max(log_post, axis=1).reshape(-1, 1), (1, log_post.shape[1]))
 65 |     log_post0 -= np.tile(logsumexp(log_post0, axis=1).reshape(-1, 1), (1, log_post.shape[1]))
 66 | 
 67 |     boundary_probability = [0]
 68 |     for ii in range(1, log_post0.shape[0]):
 69 |         idx = range(log_post0.shape[0])
 70 |         idx.remove(e_hat[ii - 1])
 71 |         boundary_probability.append(logsumexp(log_post0[ii, idx]))
 72 |     boundary_probability = np.array(boundary_probability)
 73 | 
 74 |     frame_time = np.arange(1, len(boundary_probability) + 1) / float(frequency)
 75 | 
 76 |     index = np.arange(0, np.max(frame_time), bin_size)
 77 |     boundary_probability_binned = []
 78 |     for t in index:
 79 |         boundary_probability_binned.append(
 80 |             # note: this operation is equivalent to the log of the average boundary probability in the window
 81 |             logsumexp(boundary_probability[(frame_time >= t) & (frame_time < (t + bin_size))]) - \
 82 |             np.log(bin_size * 30.)
 83 |         )
 84 |     boundary_probability_binned = pd.Series(boundary_probability_binned, index=index)
 85 |     return boundary_probability_binned
 86 | 
 87 | def get_binned_boundaries(e_hat, bin_size=1.0, frequency=30.0):
 88 |     """ get the binned boundaries from the model""" 
 89 |     
 90 |     frame_time = np.arange(1, len(e_hat) + 1) / float(frequency)
 91 |     index = np.arange(0, np.max(frame_time), bin_size)
 92 | 
 93 |     boundaries = np.concatenate([[0], e_hat[1:] !=e_hat[:-1]])
 94 | 
 95 |     boundaries_binned = []
 96 |     for t in index:
 97 |         boundaries_binned.append(np.sum(
 98 |             boundaries[(frame_time >= t) & (frame_time < (t + bin_size))]
 99 |         ))
100 |     return np.array(boundaries_binned, dtype=bool) 
101 | 
102 | def get_point_biserial(boundaries_binned, binned_comp):
103 |     
104 |     
105 |     M_1 = np.mean(binned_comp[boundaries_binned == 1])
106 |     M_0 = np.mean(binned_comp[boundaries_binned == 0])
107 | 
108 |     n_1 = np.sum(boundaries_binned == 1)
109 |     n_0 = np.sum(boundaries_binned == 0)
110 |     n = n_1 + n_0
111 | 
112 |     s = np.std(binned_comp)
113 |     r_pb = (M_1 - M_0) / s * np.sqrt(n_1 * n_0 / (float(n)**2))
114 |     return r_pb
115 | 
116 | 
117 | def get_subjs_rpb(data, bin_size=1.0):
118 |     """get the distribution of subjects' point bi-serial correlation coeffs"""
119 |     grouped_data = np.concatenate(load_comparison_data(data))
120 |     
121 |     r_pbs = []
122 |     
123 |     for sj in set(data.SubjNum):
124 |         _binned_sax =  bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'A'), 'MS'], 185, 1.0)
125 |         _binned_bed =  bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'B'), 'MS'], 336, 1.0)
126 |         _binned_dishes =  bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'C'), 'MS'], 255, 1.0)
127 |         subs = np.concatenate([_binned_sax, _binned_bed, _binned_dishes])
128 |         
129 |         r_pbs.append(get_point_biserial(subs, grouped_data))
130 |     return r_pbs
131 | 
132 | def plot_boundaries(binned_subj_data, binned_model_bounds, label, batch=0):
133 | 
134 |     # boundaries = get_binned_boundaries(log_poseterior)
135 |     # boundaries = binned_model_bounds
136 |     
137 |     plt.figure(figsize=(4.5, 2.0))
138 |     plt.plot(binned_subj_data, label='Subject Boundaries')
139 |     plt.xlabel('Time (seconds)')
140 |     plt.ylabel('Boundary Probability')
141 | 
142 |     b = np.arange(len(binned_model_bounds))[binned_model_bounds][0]
143 |     plt.plot([b, b], [0, 1], 'k:', label='Model Boundary', alpha=0.75)
144 |     for b in np.arange(len(binned_model_bounds))[binned_model_bounds][1:]:
145 |         plt.plot([b, b], [0, 1], 'k:', alpha=0.75)
146 | 
147 |     plt.legend(loc='upper right', framealpha=1.0)
148 |     plt.ylim([0, 0.6])
149 |     plt.title('"' + label + '"')
150 |     
151 |     sns.despine()
152 |     plt.savefig('video_segmentation_{}_batch_{}.png'.format(label.replace(" ", ""), batch),
153 |                 dpi=600, bbox_inches='tight')
154 |     
155 | 
156 | def convert_type_token(event_types):
157 |     tokens = [0]
158 |     for ii in range(len(event_types)-1):
159 |         if event_types[ii] == event_types[ii+1]:
160 |             tokens.append(tokens[-1])
161 |         else:
162 |             tokens.append(tokens[-1] + 1)
163 |     return tokens
164 | 
165 | def get_event_duration(event_types, frequency=30):
166 |     tokens = convert_type_token(event_types)
167 |     n_tokens = np.max(tokens)+1
168 |     lens = []
169 |     for ii in range(n_tokens):
170 |         lens.append(np.sum(np.array(tokens) == ii))
171 |     return np.array(lens, dtype=float) / frequency
172 | 
173 |     
174 | def run_batch(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, batch=0, bin_size=1.0):
175 |     Z = np.load(embedded_data_path)
176 | 
177 |     # the "Sax" movie is from time slices 0 to 5537
178 |     sax = Z[0:5537, :]
179 |     bed = Z[5537:5537 + 10071, :]
180 |     dishes = Z[5537 + 10071: 5537 + 10071 + 7633, :]
181 | 
182 |     # remove the first three seconds of the sax video for clean up
183 |     sax = sax[3*30:, :]
184 | 
185 |     # divide each of the videos by the average norm such that they are, in expectation, unit length
186 |     sax /= np.mean(np.linalg.norm(sax, axis=1))
187 |     bed /= np.mean(np.linalg.norm(bed, axis=1))
188 |     dishes /= np.mean(np.linalg.norm(dishes, axis=1))
189 | 
190 |     # Z[0:5537, :] = sax
191 |     # Z[5537:5537 + 10071, :] = bed
192 |     # Z[5537 + 10071: 5537 + 10071 + 7633, :] = dishes
193 | 
194 |     # calibrate prior
195 |     mode = f_opts['var_df0'] * f_opts['var_scale0'] / (f_opts['var_df0'] + 2)
196 |     f_opts['prior_log_prob'] = multivariate_normal.logpdf(
197 |         np.mean(Z, axis=0), mean=np.zeros(Z.shape[1]), cov=np.eye(Z.shape[1]) * mode
198 |     ) 
199 |     
200 |     sem_kwargs = {
201 |         'lmda': lmda,  # Stickyness (prior)
202 |         'alfa': alfa, # Concentration parameter (prior)
203 |         'f_class': f_class,
204 |         'f_opts': f_opts
205 |     }
206 | 
207 |     sax_log_post = segment_video(sax,    sem_kwargs)
208 |     bed_log_post = segment_video(bed,    sem_kwargs)
209 |     dis_log_post = segment_video(dishes, sem_kwargs)
210 |     
211 |     e_hat_sax = np.argmax(sax_log_post, axis=1)
212 |     e_hat_bed = np.argmax(bed_log_post, axis=1)
213 |     e_hat_dis = np.argmax(dis_log_post, axis=1)
214 |     
215 |     binned_sax_bounds = get_binned_boundaries(e_hat_sax, bin_size=bin_size)
216 |     binned_bed_bounds = get_binned_boundaries(e_hat_bed, bin_size=bin_size)
217 |     binned_dis_bounds = get_binned_boundaries(e_hat_dis, bin_size=bin_size)
218 | 
219 |     binned_sax_log_post = get_binned_boundary_prop(e_hat_sax, sax_log_post, bin_size=bin_size)
220 |     binned_bed_log_post = get_binned_boundary_prop(e_hat_bed, bed_log_post, bin_size=bin_size)
221 |     binned_dis_log_post = get_binned_boundary_prop(e_hat_dis, dis_log_post, bin_size=bin_size)
222 |     
223 |     # pull the subject data for comparions
224 |     data = pd.read_csv(human_data_path, delimiter='\t')
225 |     binned_sax_subj, binned_bed_subj, binned_dis_subj = load_comparison_data(data)
226 | 
227 |     # remove the first three seconds of the sax video
228 |     binned_sax_subj = binned_sax_subj[3:]
229 |     
230 |     # save the plots 
231 |     plot_boundaries(binned_sax_subj, binned_sax_bounds, "Cleaning Saxophone", batch=batch)
232 |     plot_boundaries(binned_bed_subj, binned_bed_bounds, "Making a Bed",       batch=batch)
233 |     plot_boundaries(binned_dis_subj, binned_dis_bounds, 'Washing Dishes',     batch=batch)
234 |     
235 |     # concatenate all of the data to caluclate the r2 values
236 |     binned_subj_bound_freq  = np.concatenate([binned_sax_subj,     binned_bed_subj,     binned_dis_subj])
237 |     binned_model_prob = np.concatenate([binned_sax_log_post, binned_bed_log_post, binned_dis_log_post])
238 |     r2 = np.corrcoef(binned_subj_bound_freq, binned_model_prob)[0][1] ** 2
239 | 
240 |     # calculate the point-biserial correlation
241 |     binned_bounds       = np.concatenate([binned_sax_bounds, binned_bed_bounds, binned_dis_bounds])
242 |     r_pb = get_point_biserial(binned_bounds, binned_subj_bound_freq)
243 |     
244 |     # pull the average duration of the events
245 |     sax_duration = np.mean(get_event_duration(binned_sax_log_post))
246 |     bed_duration = np.mean(get_event_duration(binned_bed_log_post))
247 |     dis_duration = np.mean(get_event_duration(binned_dis_log_post))
248 | 
249 |     # create a data frame with the model's MAP boundaries, boundary log-probabilities and 
250 |     # human boundary frequencies for later permutation testing
251 |     comp_data = {
252 |         'MAP-Boundaries': binned_bounds,
253 |         'Boundary-LogProb': binned_model_prob,
254 |         'Human Boundary Freq': binned_subj_bound_freq,
255 |         'Video': ['Sax'] * len(binned_sax_subj) + ['Bed'] * len(binned_bed_subj) + ['Dishes'] * len(binned_dis_subj),
256 |         't': range(len(binned_sax_subj)) + range(len(binned_bed_subj)) + range(len(binned_dis_subj))
257 |     }
258 | 
259 |     # and summary data as well
260 |     summary_data = {
261 |         'Bin Size': bin_size,
262 |         'Event Length (Sax)': sax_duration,
263 |         'Event Length (Bed)': bed_duration,
264 |         'Event Length (Dishes)': dis_duration,
265 |         'Model r2': r2,
266 |         'Model rpb': r_pb,
267 |         'Batch': batch
268 |     }
269 | 
270 |     return summary_data, comp_data
271 | 
272 | def main(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, output_tag='', n_batch=25):
273 |     
274 |     args = [embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts]
275 |     
276 |     summary = []
277 |     comp_data = []
278 |     for batch in range(n_batch):
279 |         summary_stats, _comp_data = run_batch(*args, batch=batch)
280 |         summary.append(summary_stats)
281 |         pd.DataFrame(summary).to_pickle('simulations/saved_simulations/EventR2_GRU_summary' + output_tag + '.pkl')
282 | 
283 |         _comp_data['Batch'] = [batch] * len(_comp_data['t']) 
284 |         comp_data.append(pd.DataFrame(_comp_data))
285 |         pd.DataFrame(comp_data).to_pickle('simulations/saved_simulations/EventR2_GRU_comp' + output_tag + '.pkl')
286 | 
287 |     return 
288 | 
289 | 
290 |     
291 | 
292 | 
293 | if __name__ == "__main__":
294 |     import os
295 | 
296 |     os.chdir('../')
297 | 
298 |     embedded_data_path = 'data/videodata/video_color_Z_embedded_64_5epoch.npy'
299 |     human_data_path = './data/zachs2006_data021011.dat'
300 |     
301 |     f_class = GRUEvent
302 | 
303 |     f_opts=dict(
304 |         var_df0=10., 
305 |         var_scale0=0.06, 
306 |         l2_regularization=0.0, 
307 |         dropout=0.5,
308 |         n_epochs=10,
309 |         t=4
310 |     )
311 | 
312 |     lmda = 10**4
313 |     alfa = 10**-1
314 | 
315 |     output_tag = '_df0_{}_scale0_{}_l2_{}_do_{}'.format(
316 |         f_opts['var_df0'], f_opts['var_scale0'], f_opts['l2_regularization'],
317 |         f_opts['dropout']
318 |         )
319 | 
320 |     main(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, output_tag, n_batch=25)
321 |     
322 | 


--------------------------------------------------------------------------------