├── LICENSE ├── PaperSimulations ├── Memory Simulation (Bower, 3 sentences).ipynb ├── Memory Simulation (Dubrow and Davachi, 2013; 2016) parameter sensitivity.ipynb ├── Memory Simulation (Dubrow and Davachi, 2013; 2016).ipynb ├── Memory Simulation (Pettijohn, et al, 2016).ipynb ├── Memory Simulation (Radvansky & Copeland, 2006).ipynb ├── Permutation testing of Video Segmentation.ipynb ├── README.md ├── Segmentation - Generalizing Structure (Stationary).ipynb ├── Segmentation - Generalizing Structure.ipynb ├── Segmentation - Schapiro (n250).ipynb ├── Segmentation - Video (Dishes).ipynb └── run_dubrow_parameter_sensitivity.py ├── README.md ├── Tutorials ├── Demo - HRR.ipynb ├── Demo - Motion Capture Data.ipynb ├── Demo - Segmentation and Memory Tutorial.ipynb ├── Demo - Toy Data (Segmentation).ipynb └── Readme.md ├── data ├── motion_data.pkl ├── videodata │ └── video_color_Z_embedded_64_5epoch.npy ├── zachs2006_data021011.dat ├── zachs_2006_young_unwarned.csv └── zachs_2006_young_warned.csv ├── environment.yml ├── models ├── __init__.py ├── event_models.py ├── memory.py ├── sem.py └── utils.py ├── opt ├── __init__.py ├── csw_utils.pyc ├── hrr.py └── utils.py └── simulations ├── __init__.py ├── exp_dubrow.py ├── exp_pettijohn.py ├── exp_radvansky.py ├── exp_schapiro.py ├── saved_simulations ├── Dubrow_param_sensitivity.pkl ├── Dubrow_sim_0.pkl ├── Dubrow_sim_1.pkl ├── Dubrow_sim_10.pkl ├── Dubrow_sim_11.pkl ├── Dubrow_sim_12.pkl ├── Dubrow_sim_13.pkl ├── Dubrow_sim_14.pkl ├── Dubrow_sim_15.pkl ├── Dubrow_sim_16.pkl ├── Dubrow_sim_17.pkl ├── Dubrow_sim_18.pkl ├── Dubrow_sim_19.pkl ├── Dubrow_sim_2.pkl ├── Dubrow_sim_20.pkl ├── Dubrow_sim_21.pkl ├── Dubrow_sim_22.pkl ├── Dubrow_sim_23.pkl ├── Dubrow_sim_24.pkl ├── Dubrow_sim_3.pkl ├── Dubrow_sim_4.pkl ├── Dubrow_sim_5.pkl ├── Dubrow_sim_6.pkl ├── Dubrow_sim_7.pkl ├── Dubrow_sim_8.pkl ├── Dubrow_sim_9.pkl ├── EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl ├── EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl └── radvansky_sims.pkl └── video_segmentation.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 ProjectSEM 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PaperSimulations/README.md: -------------------------------------------------------------------------------- 1 | # Simulations in the Paper 2 | 3 | 4 | 5 | There are also multiple simulations that demonstrates how the model can capture a wide range of empirical phenomena 6 | in the event cognition literature: 7 | * `Segmentation - Video (Dishes)`: show human-like segementation of video data, originally used in Zacks & Tversky, 2001. 8 | The dimensionality of the videos has been reduced using a variational auto-encoder, the code for which is available as 9 | a seperate library [https://github.com/ProjectSEM/VAE-video](https://github.com/ProjectSEM/VAE-video) 10 | * `Segmentation - Schapiro (n250)`: a simulation of the task found in Schapiro, et al, 2013. 11 | * `Memory Simluation (Bower, 3 setences)`: a simulation of the classic finding in Bower, 1979 12 | * `Memory Simluation (Radvansky & Copeland, 2006)`: a simulation of the findings in Radvansky & Copeland, 2006 13 | * `Memory Simluation (Pettijohn, et al, 2016)`:a simulation of the findings in Pettijohn, et al, 2016 14 | * `Memory Simluation (Dubrow and Davachi, 2013; 2016) `: a simulation of the finding in Dubrow and Davachi, 2013 15 | 16 | There are also follow-up analyses: 17 | * `Memory Simluation (Dubrow and Davachi, 2013; 2016) parameter sensitivity`: looks at memory corruption noise and how it effects order memory 18 | * `Segmentation - Generalizing Structure (Stationary)`: looks at a reduced model that does not simulate event dynamics. 19 | 20 | -------------------------------------------------------------------------------- /PaperSimulations/run_dubrow_parameter_sensitivity.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from models import * 4 | from tqdm import tnrange 5 | from simulations.exp_dubrow import run_subject, generate_experiment 6 | 7 | 8 | 9 | # SEM parameters 10 | df0 = 1. 11 | scale0 = .2 12 | 13 | mode = df0 * scale0 / (df0 + 2) 14 | print("Prior variance (mode): {}".format(mode)) 15 | 16 | lmda = 10.0 # stickyness parameter 17 | alfa = 1. # concentration parameter 18 | 19 | f_class = GRUEvent 20 | f_opts=dict(var_scale0=scale0, var_df0=df0) 21 | 22 | # create the corrupted memory trace 23 | # noise parameters 24 | b = 2 25 | tau = 0.1 26 | print("tau: {}".format(tau)) 27 | 28 | # set the parameters for the Gibbs sampler 29 | gibbs_kwargs = dict( 30 | memory_alpha = alfa, 31 | memory_lambda = lmda, 32 | memory_epsilon = np.exp(-20), 33 | b = b, # re-defined here for completeness 34 | tau = tau, # ibid 35 | n_samples = 250, 36 | n_burnin = 100, 37 | progress_bar=False, 38 | ) 39 | sem_kwargs = dict(lmda=lmda, alfa=alfa, f_class=f_class, f_opts=f_opts) 40 | 41 | epsilon_e = 0.25 42 | 43 | x_list_items, e_tokens = generate_experiment() 44 | 45 | mode = df0 * scale0 / (df0 + 2) 46 | print("Prior variance (mode): {}".format(mode)) 47 | print("Median Feature variance: {}".format( 48 | np.median(np.var(np.concatenate(x_list_items), axis=0)))) 49 | 50 | sem_kwargs = dict( 51 | lmda=lmda, alfa=alfa, f_class=f_class, f_opts=f_opts 52 | ) 53 | 54 | sem = SEM(**sem_kwargs) 55 | sem.run_w_boundaries(list_events=x_list_items) 56 | print sem.results.e_hat 57 | 58 | # fig, axes = plt.subplots(2, 1) 59 | # axes[0].plot(sem.results.log_prior) 60 | # axes[1].plot(sem.results.log_like) 61 | # # plt.show() 62 | 63 | from tqdm import tnrange, tqdm 64 | 65 | n_batch = 25 66 | n_runs = 16 67 | 68 | results = [] 69 | for ii in tqdm(range(n_batch), desc='Itteration', leave=True): 70 | 71 | for b in [1, 2, 5, 10]: 72 | 73 | gibbs_kwargs = dict( 74 | memory_alpha = alfa, 75 | memory_lambda = lmda, 76 | memory_epsilon = np.exp(-20), 77 | b = b, # re-defined here for completeness 78 | tau = tau, # ibid 79 | n_samples = 250, 80 | n_burnin = 100, 81 | progress_bar=False, 82 | ) 83 | 84 | 85 | _res = run_subject( 86 | sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=n_runs, subj_n=ii, progress_bar=False 87 | ) 88 | 89 | # clean up the results and run simple analyses 90 | _res['b'] = b 91 | _res.loc[np.isnan(_res['Transitions Pre-Boundary'].values), 'Transitions Pre-Boundary'] = 0.0 92 | _res.loc[np.isnan(_res['Transitions Boundary'].values), 'Transitions Boundary'] = 0.0 93 | _res['PreVsPost'] = _res['Transitions Pre-Boundary'].values - _res['Transitions Boundary'].values 94 | 95 | 96 | results.append(_res) 97 | pd.concat(results).to_pickle('Dubrow_param_sensitivity.pkl') 98 | 99 | print "Done!" 100 | 101 | 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EventSegmentation 2 | 3 | Open in Colab 4 | 5 | Accompanying code for the manuscript "Structured event memory: a neuro-symbolic model of event cognition", Franklin, Norman, Ranganath, Zacks, and Gershman (in press), *Psychological Review*, [preprint](https://doi.org/10.1101/541607) 6 | 7 | Contains the SEM model, a few basic demonstrations, and the all of the simulations in the paper. An up-to-date version of the model (but not the simluations) can be found in the following github repository: [https://github.com/nicktfranklin/SEM2](https://github.com/nicktfranklin/SEM2) 8 | 9 | 10 | 11 | The main code is listed in the `models` module: 12 | * `models.sem`: contains the code for the SEM model 13 | * `models.event_models`: contains code for the various neural network models used by SEM. They all 14 | share a similar structures 15 | 16 | There is runnable code in Jupyter notebooks: 17 | * `Tutorials`: Contains tutorials, runnable in Google Colab. 18 | * `PaperSimulations`: Contains the simulations presented in the paper. These have been designed to run locally, with the 19 | dependencies listed in the enviornments.yml file and have not been tested in colab. These have been pre-run and can be 20 | opened on github without installation. 21 | 22 | #### Installation Instructions 23 | 24 | This library run on Python 2.7 and uses the tensorflow and keras and libraries for neural networks. 25 | 26 | I recommend using Anaconda python and a virtual environment. [You can find instructions to install Anaconda 27 | here](https://docs.anaconda.com/anaconda/install/). 28 | 29 | Once you have anaconda installed, you can install a virtual environment by running 30 | 31 | conda env create --file environment.yml 32 | 33 | This will install everything you need to run the Jupyter notebooks. Note that all of the simulations were run with these 34 | packages versions and may not work with more recent versions (for example, TensorFlow is under active development). 35 | 36 | You'll need to activate the virtual environments and open jupyter to access the demonstration notebooks. To do so, run 37 | 38 | conda activate sem 39 | jupyter notebook 40 | 41 | 42 | To deactivate the virtual environment, run 43 | 44 | conda deactivate 45 | 46 | 47 | Note: if these instructions do not work for some reason, the critical libraries the model uses are: 48 | 49 | * Anaconda Python 2.7 50 | * Tensorflow v1.9 51 | * Keras v2.2.0 52 | -------------------------------------------------------------------------------- /Tutorials/Demo - HRR.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# ## un-comment out if running locally\n", 10 | "\n", 11 | "# import os\n", 12 | "# os.chdir('../')" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "## if running locally, comment out the following code\n", 22 | "\n", 23 | "!git clone https://github.com/nicktfranklin/SEM.git\n", 24 | "import os\n", 25 | "os.chdir('./SEM/')\n", 26 | "\n", 27 | "!pip install tensorflow==1.9\n", 28 | "!pip install keras==2.2" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%matplotlib inline\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import numpy as np\n", 40 | "import statsmodels.api as sm\n", 41 | "from opt.hrr import embed_gaussian, plate_formula, encode, decode" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "[[1. 0.59917774]\n", 54 | " [0.59917774 1. ]]\n" 55 | ] 56 | }, 57 | { 58 | "data": { 59 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEKCAYAAADuEgmxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztnX2cHXV56L9PNpuwQWATCRYXYqLFULhAAstLjaWCYqwVWEENXGzxpdLW0la0qbFSDVSvuaa92PpSy7Uiig2vugbCNSiJ3IoNJTGEGE0K8r5wJUAWhCywyT73j5nZnJ2dl9+cmTlnzjnP9/PZz54zZ87MM3PO+T2/3/MqqophGIZhuDCl2QIYhmEYrYMpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxpSGYRiG4czUZgtQNAcffLDOnTu32WIYhmG0FJs2bXpKVWen7dd2SmPu3Lls3Lix2WIYhmG0FCLysMt+Zp4yDMMwnDGlYRiGYThjSsMwDMNwpqlKQ0TeJiI7ROR+EVkW8fpHReTnInKviNwuIq9phpyGYRiGR9OUhoh0AV8Gfg84CjhfRI4K7bYZ6FfVY4Ebgc83VkrDMAyjlmauNE4C7lfVB1T1ZeBa4OzaHVR1varu9p9uAA5rsIyGYRhGDc1UGn3AozXPH/O3xfFB4P+UKpFhGIaRSDPzNCRiW2TvWRF5L9AP/G7M6xcBFwHMmTOnKPkMwzCMEM1caTwGHF7z/DDg8fBOIvIW4JPAWar6UtSBVPVKVe1X1f7Zs1MTGg3DMIw6aabSuBs4QkTmicg04Dxgde0OIrIQ+Bc8hfFkE2Q0DMMwamia0lDVPcDFwFrgF8D1qrpNRC4XkbP83VYCrwBuEJF7RGR1zOEMwzCMBtDU2lOqeitwa2jbp2oev6XhQhmGYRixtF3BQsMwjCIY3DzEyrU7eHx4hFf39rB08XwGFiYFeHYGpjQMwzBCDG4e4hPf2crI6F4AhoZH+MR3tgJ0vOKw2lOGYRghVq7dMa4wAkZG97Jy7Y4mSVQdTGkYhmGEeHx4JNP2TsKUhmEYRohX9/Zk2t5JmNIwDMMIsXTxfHq6uyZs6+nuYuni+U2SqDqYI9wwDCNE4Oy26KnJmNIwDMOIYGBhnymJCMw8ZRiGYThjSsMwDMNwxpSGYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxsqIGIZhVJiqdRA0pWEYhlFRqthB0MxThmEYFaWKHQRNaRiGYVSUKnYQNKVhGIZRUarYQdCUhmEYRpMY3DzEohXrmLdsDYtWrGNw89CE16vYQdAc4YZhGE3AxcldxQ6CpjQMo0FULXTS2EczPpskJ3ftuavWQdCUhmE0gCqGThoezfpsqujkdiHVpyEifykiB4rHv4rIT0XkrY0QzjDahXpDJ9Ns3kZ+mhXWWkUntwsujvAPqOpzwFuB2cD7gRWlSmUYbUY9s8pgBjw0PIKybwZsiqNYmjXjr6KT2wUX85T4/98OXKWqW0REkt5gGMZEDurpZnhkNHJ7HK4272bTCr6aJBlf3dvDUISCKHvGX0UntwsuSmOTiNwGzAM+ISIHAGPlimUY7UXcNCtp+tUKNu+q+2oGNw9x2c3b2LV7n8IOy7h08fwJ1wCNm/FXzcntgot56oPAMuBEVd0NTMMzURmG4cjw7smrjKTt0Bo27yqWuQgIFNquiHtcK+PAwj4+d84x9PX2IEBfbw+fO+eYlhvMG4XLSkOBo4B3AJcD+wP7lSmUYbQb9ZhAmjkDDkgzPVV5NRSl0GqplbEVZ/zNwmWl8RXgt4Hz/ee/Br5cmkSG0YbU4/TMOwPOG3nl4oiv8mooTXFVQcZWxGWlcbKqHi8imwFUdZeITCtZLsNoK+p1etY7Ay7C1+DiiK/CaiiOuNUdVEfGVsRFaYyKSBeemQoRmY05wg0jM400gRQReeViemp2BFCS+SxKoQH09nSz/KyjzRxVJy5K45+A7wKHiMhngXcBl5YqlWEYuSjC1xAXJhw26zTLH5C2mmq2QmtXUpWGqn5bRDYBb8bL2RhQ1V+ULplhGHWTN/dgcPMQL7y8Z9L27ilSGbOOy2rKHNzF41JGZA6wG7gZWA284G/LjYi8TUR2iMj9IrIs4vVT/bIle0TkXUWc0zDakbDT+7QjZ+fKNl65dgeje3XS9lfsN7Uyg3CVI7faGRfz1Bo8f4bghdrOA3YAR+c5se8n+TJwBvAYcLeIrFbVn9fs9gjwPuCv8pzLMNqZKDPNTZuGOPeEPtZv31mXaSZu4E3KK2k0zcrk7nRczFPH1D4XkeOBPy7g3CcB96vqA/5xrwXOBsaVhqo+5L9mjnfDiCHOTLN++07uXHZ6XcdMG5CrUDqkypFb7Uzmzn2q+lPgxALO3Qc8WvP8MX+bYRgZKMNMk5RXMrh5iKU3bpmQv7H0xi0NL6RomdzNIXWlISIfrXk6BTge2FnAuaOq7kw2orocSOQi4CKAOXMKcbcYRstQhpkmHHnUO6MbVbjkunsQgbHQL3V0r3LZzdsaPmCbo7vxuKw0Dqj5m47n4zi7gHM/Bhxe8/ww4PF6DqSqV6pqv6r2z549uwDRDKN1yFtiOy5zfGBhH3cuO50rlizgxdExhkdGUSYrjICoGk9G++Hi07ispHPfDRwhIvOAIeA84L+XdC7DaFvy5CO4ZI6n1XAqiyL9JsGxhoZH6BJhryp9BfliquDfaSSxSkNEbibBXKSqZ+U5saruEZGLgbVAF/B1Vd0mIpcDG1V1tYiciJdYOBM4U0QuU9VcUVuG0Y7Ua6ZxyXVw9Y30JvQGCXAdYIssuR4+1l7V3McsQ85WIWml8fdln1xVbwVuDW37VM3ju/HMVoZhlICLEz2phlNA9xRh+VnJ87ksA2yRDaiSVkp5m1q1SqOsIolVGqp6RyMFMQyj8bg40aNCW7u7hP2nTeXZkVFnk0yWAbbIiLC090S97roi6sQEQ5foqSOAz+H11Bjvo6Gqry1RLsMwGkBcrsNpR85m0Yp144NmnkTBgCwDbJERYWkrpfAxs6yIOjHB0CV66irgn4E9wGnAN4FvlSmUYRiNISrX4dwT+rhp09CEPIybNg2xdPF8Hlzx+9y57PS6TC9Zem/kjQhLO1bSMbN0IyxSzlbBRWn0qOrtgKjqw6q6HKgvzdQwjMpRG1oLcM2GR0pp4ZplgC0qcS8wM42M7qXLb8ge/I87ZpYVUScmGLrUnnpRRKYA9/nRTkPAIeWKZRjtSxVDNMMmmSjy2umzhgbnTdyLiprq6e5KHdSzmpw6LcHQRWl8BJgB/AXwd3gmqgvLFMow2pWqhmi65GIUYadvhUZUVtMqmaQ8jXcBt/hhrwDPA+9viFSGUXHqXS1UNUQzbRXRioNmvZFN1rwpmaSVxgXAV0Tk+8Aq4DZVbXxaqGFUjDyrhaqGaCZFGBWVOd1o8kQ2dZrJKQuxjnBVfSfwm8DteKapR0Xkn0Xk1EYJZxhVJG61cNnN2yJrONUSN2ApcOng1jLEdSLOSf2FJQvGo6XialRVjUDOoeGRSVVRW3HFVDUSfRqq+hxwNXC1iLwSrz/4F0VklqoenvRew2gGZdYYCohbFezaPTpetC9u9RFlLw+4ZsMjAHxm4JhJr5VNmkmmqr6YMGE5g+5xSvSKqYpBCVXHqZ+GiMwEzgGWALOAm8oUyjDqIRgwApNEuMZQUTNjV4dwVJhqEKIZx6q7Ho19rZlkyV0ognpXNVFyBgojnF9S+30J8lGK/J60K7FKQ0QOEJE/EJFbgV/gNV76DDBHVT/SKAENwxWXGkNFkJQsFiYutj+OQNE1mrQBtJG+mDyDeRY5G60I24Uk89SDeBVo/xn4vqpasXyj0tRTY6geokw5L7y0h+GRyT+RMstJRJlWwnIVFdXVyHIZeSLMsshZ1aCEqpOkNOao6u6GSWIYOclaYygP4eiaSwe38u0Nj0zoJVCm0zXKx7D0hi2MAXvH9pnllt64ZVzeJPt90gA6uHmI3S/vmfRaWdeXZzDPkmPRiXWjiiApesoUhtFSJJmNBDjtyHK6Og5uHuKmTUOTms9IQvfivpiBKW57mKjZ+OiYjiuM8W1+G9Y0k0/cQNk7o5tPfGfrpK58vT3dpZTLGNw8xBSJ6gTtHirrWtajE+tGFYFLRrhhtAS1ZqPwDFKBmzYN0f+aWYUPdHG+lN2jY7ERRnmzjrOYUHbtHk01+cTJo0rkte0/fWopCuMT39ka6dfJcm9ccywsia8+EqOnRKRLRFY2ShjDyEtQfC9qxl6WkzNpAB8Z3cvy1dsmbc9b6C6rCSXN5BMnz7MRfpqk4+UhTvl2iZRWBDD4vuSp3ttppOVp7BWRE0REVJsU1mEYddBIJ2eaL2V4ZJTBzUOTBqQ8WcdJ+R5henu62X/61FT7fZQ8Uau28PuKIu6zGVO1wbxCuORpbAa+54ffnhP8lS2YYeQhS++GvLiE4Ba9wknL9wgI2rCeduTszNnRg5uHeOGlxjnA6/nMWiVLvZ1wURqzgKfxemic6f+9o0yhDCMvjXRyBgP4zBndsfuUscJJm3339faw8t3HAUxy1Atw7gnxK53AvxAOI545oxwHOGT7zAY3D7Hgstv4yHX3WHJeg0l1hKuqVbY1WoJwSGkRLUpdCUw7Cy+/bVKkEeybLRdd5mTmjO7I882c0c2dy7xeaYtWrIvMkl6/fWfsceP8CzOmFe8AD3B1TCf1/qhCxeB2x6VH+GHAF4FFeN+1HwN/qaqPlSybYTgTlbdw06ahhndR+/SZR8dGRUU1BQpkraeOU9yMurtL+PSZR48/r8e/06zENxc/T1rvjyJltNpUk3EJub0K+Dfg3f7z9/rbzihLKKMatMIPpnbmHqYZs86k2XLUjL9eWeNm27093Sw/y1MYi1as4/HhEab4K5owSb6CrIlvWb8reb5baUqhKL9VqxRpbDQuSmO2ql5V8/wbImK1p9qcVvjBNKJFaT3EzZaLLHMSN9vef7r3k45a0dSS5t/JkkeS9buS97uVFK1Wu6rLO+GpasOsZuPiCH9KRN7r52x0ich78RzjRpsyuHmIj12/pfLF3LK2KG12pE3aDHiKyCSZ4mROMh8l5Tu45oRkySPJWvgvb6HAuGi1wEkPFFK91mpTReOy0vgA8CXgCjyfxk+wtq9tS1JWLlTnBzO4eSgxNwImzozrnd3GzVizbof03Iq9qhNkSpI5yXyUlO/w4IrfT7xntbjmkWQdXPMOxmkO8ygzYD0rBKtNFY2L0jhcVc+q3SAii4BHyhHJaCZps/cq/GCCwTSJcERSPaaGuEF748PPcNOmIeftMHEAvuzmbZERT2GZkmSOUkBBfa3123c2dLCLG1x7Y0KQXQbjNPNSkkIraoWQt9RLu+Jinvqi4zajDUj6YVXlB5Ok2MItSgPirmtoeCTWZBU3aK+669FM22vNLgML+5gxLXmuFsiaNPgNLOzj3BP6JiTsBfW1TjtydkML8S1dPJ/urslFBnftHo1sYRuXj3HakbNZtGIdc5et4ZIc+RdFJXbmLfXSrsR+e0Xkt4E3ALNF5KM1Lx0IuHWgMVqOuFlgmfV/XKideSbVswls2kHkUDBLjbsugfHt4ZVB3KAdZ7pzNem5Rv+kzcjXb9856V6MjO5l/fadfO6cY+qOZjqopxsRGN496vTegYV9LF+9LbKfyLc3PDKpSGSUeem0I2dPWKVFXZereanIFUKeUi/tStKUZxrwCn+fA2q2P4fXK9xoQ+J+cM1WGC51loIihVEmpePnHBQ5ACcNTkkKNEuHvfAM1yX6B9IHv7SVSL3hu7WDv6vvJ66woULkYB+WLykcOaAon4eRj1iloap3AHeIyDdU9eEGymRE0KiciSr+4FyipILBNM6ktOGBXc7nGxoeYd6yNfTO6KZ7ijBa06Oip7uLc0/omzArdpGr9vPr6Y62Cs+c0c2nzzx6/F4PLOxj48PPsOquR9mrSpfIeOmPoO9ElPI6qCe+nEkUaffXZZafpAhdBnuXfbKYl2yFUB4ujvCvici7VXUYQERmAteq6uJyRTMCGp0zkfUHV4RCq6erHHjmpdr9L7nunsj9svbeVjybfHeX0NvTzbMjo+Nmm29veITeGd1MnzqFZ0dGncxlS2/YMq58do+OTbqGC06Zw2cGJhYgDJo7BbLvVeWmTZ5dv3Z7mBde3hNZVTeOIgb1pYvnc8l190TeC5fBPq1ScFX8aYabI/zgQGEAqOou4JDyRDLC5I1rL5O0jnD1HmPpDVtYePltzFu2JraTW19vz6Q+CHEDVFfMMaK37mN0r7L/9KlcsWQBL+0ZY9fu0XGF8tKeMa5YsiCxC19g7x8di1ctcXWglq/e5uxwD8uc5bvhOqgnMbCwjwtOmZO5km5AlHM8OJY5oKuFi9IYE5E5wRMReQ2TTcFGDEUklFU5yagIhRbXujQYoLNkNMdF5px/8uGR2y84Zc54dEwccQlzteGvSdFKUQ7iqHPUMrh5KPZ9LqumLN+NtNLutSa2pO/yZwaOGVei4WijtPdGRSpdsWQBD5XcHKnZCZ+tiIt56pPAj0XkDv/5qcBF5YnUPhRlVqpyklERCs113y4RxlQTTWBxPhmAW7Y8Mf5ZhP0H4Dlj45zlSfb6IvxA4c8y7yoyfLwk819Y/qjoKYgOMKh9f/A4rSptlveWSSuUyqkiLqXRvy8ixwOn4K0YL1HVp0qXrA0oqnZNlZOMXBVa0qCVZs8OcM1oDg8+UdFXL4b8CpCtG15AcJ15BryozzLPKjJ8vEsHt/LtDY+MmweiBsc0+fNkWVe1hlNV5ao6qeYpERHgbcDxqnozMENETipdsjagKLNSlZOMXBrnpPk9XDrfQf0rK1cTWu19dsFVcfcmRDPFfZYu1xrUkurt6Wb/afvu3/Sp+37Wg5uHJiiMgKwmxLTkyCSzTp6oqjKpstm3yriYp74CjOF17rsc+DVwE3BiiXK1BUWalaoaQuhimkmb0UWZR154eQ+jeyeGuta7soobtKK2B/LMW7Ym1nEXjthKY/lZR0+IngKvDevKdx8X+36XVU+w8gqXVRkeGR1/vnLtjtjryDI4Jq0Gk8w6g5uHEKKdoM02rxaxSu5EXJTGyap6vIhsBi96SkSmFXFyEXkb8I94GeZfU9UVodenA98ETsCrrLtEVR8q4tyNoMpmpSJJU2guM7ook1JRP9S4ZLwuv6ps1HniBpS+3p7xjniuxGVAr1y7g0uuu2f8ebjLYJDVHTdYB4NbklJOUgxpvbfD8l73n4/GRoGNjO7lY9dvGb+e4D7GKS2BUn4HWb43Lr9P83tMRjQlEkNE7sIrJ3K3rzxmA7ep6sJcJxbpAv4Lr5nTY8DdwPmq+vOafT4MHKuqfyIi5wHvVNUlScft7+/XjRs35hGtUGyWEu9gdhmAa+9f74xuVL3s4yz3cu6yNbGv9XR3RWa/A5EDShFmQZcM99pzRe1f+3rcqihYEcWVT7nglDmR7XDjzifopByTNPnjcjcAHspQcdeFtPsU956k32ee726rISKbVLU/bT+XlcY/Ad8FDhGRz+KVELk0p3wAJwH3q+oDACJyLXA28POafc4GlvuPbwS+JCKiaZquQjTTrFQVhVXviis8CNRWhs0y4+tLKAcSN0MPBoQi719Sl8EwSeY710CCYL+oarhveN2s2Iq8cSuXLATyJ63YiiToARNeUaY5totYJXcaLtFT3xaRTcCb8b5vA6r6iwLO3Qc8WvP8MeDkuH1UdY+IPAu8ErDorRSqtKyuNyS1iPIWEK+04o4dDAhFKnzX+llhOcKK/4olCybJFOf/2P3yHoDI4oX1mrSy8PjwCFcsWVC6ibbMHjBVDndvFklVbtfg9QYfVNXtwPaCzx2VTxX+1F32QUQuws8dmTNnzqQ3dCLNCCdMywXIet6iahbF5SHEDeBlDAgu9bPC9M7ods5vACZVmt2123OIf+6cYyaZUuLKrQSfXdRAOXNGNy+Ojk26DhGIGq9f7WfEQ7m1zMrsAdMpfsksJIXcXgm8A3hIRK4TkYGiHOA+jwGH1zw/DHg8bh8RmQocBDwTPpCqXqmq/araP3v27AJFbF0avawuopxImCLKWwQMLOzjzmWnTygHEkW9A0JaZnHW+97T3YXqZMUWFyo7sLBvvD+4y/5x9613RndsGPWnzzx6QkhyMKOLUhi19zG49+GSL0VRZg+YKoe7N4tYpaGq31PV84E5wHeAC4FHROTrInJGAee+GzhCROb5yug8YHVon9X+ecHzpaxrJX9GMymqEY0recqJxA24ruUtko7hImdAvQOCi8LMet/PPaEvttx4EW1U4xonPf/iPpNW1EA5sLDPe+8UiXVwN3pgTao3VoQcZSu9VsPFpzECXAdcJyLHAlfjDeS5GjH5PoqLgbX+sb6uqttE5HJgo6quBv4V+JaI3I+3wjgvzzk7iUYvq5MGrCSzlYvvJS16Kov/Jk5OgdhomLSAAhdTYNZs85s2DXFQT3dk/amkCYGr/T0opBg+/uiYjgcCxA2OK9fuiA29bUZUURV7wLQzqUpDRF4FvAdvwD4UuAF4fxEnV9VbgVtD2z5V8/hF4N1FnKvTaIQtuZakPtFJA7pL4l+RZSqyOjZdFJJrHkoga/B5vPDSntiihCOje9mve8okh33QBzyK046czTUbHoncHqX4sq5kXF5vRlRRo7/rnU6SI/xDwPnAfDzz1F+r6p2NEszIj8uA6xKW67JP3GwvyS4/sDC+pWoRBQ/jzDJZVmAuCslVEbnUxKpl1+5RFr1uFj/55TPjpqCgD3i4hSpEl1cHWHPvE5HhtVlXMrWvpyUcNpqqVkxoR5Ic4W8AVgCHq+qfm8JoLVxs/C62eFcHd5zDMG02W4TvJcsxsjo2XRSSS/2tKAJZ4np9ABMURkCcryhO1l27RyMVnwh1yR34NMJ0d0lHRxV1CkntXgsxQRmNx9XG7zKLzmL6iZrtxSWzBQN6XPJZnAkmitOOnD2pKF/S4JdlVuqyishjHgn2iVtxZKkb5VotOGB49yhXLFmQWe6oEN+oUvP1UJWEVCMel4xwo8VwHehdZtFZzUdRNYvC/bTD4ZgbH35mwqCfZIKJOt9Nm4YmDK4C47208+JqzspjHgne95GY3IkoolZRSxfPn1QYMe0Y9codVpQzpuUfSqqUkGrE49K5z2gxXAd6F7NOFtNPlCnrmg2PICi9Pd2x5qD123fWXbo7SkHGtU91pda0t3LtDs49oW/cnNXb081+3VO45Lp7Cu30NrCwL7a0hmsL1YGFfUyb6vaTzmtKKiMvp8ptjY19uPTTmBXxF98gwGg6cQO9QmoeRHhAymKvj8uB2D06Nt5POyqUM48zPK3PQ9Y2nlGD4U2bhli6eP6kPuFpA2XWVqJR97q7S9ive9/PdP9pXUyfGq+0XnjZMetcYePDz9Td6rSMAT7te2CtWauBy5ryp3hZ2bvwJj29wBMi8iTwIVXdVKJ8Rh0k5QQk5UFE2ZCz2OuTBvmkEiZ56vskVXENtmcxc6QNhq7+nThTy8aHn4msLFsrW21eyvMv7mGkprKspxTym29GxzS1m18SZVQcSPoemOmqOriURv8q8F1VXes/fyteJ7/rgX9U1XCRwaZStdLozSKtomoZSVhxZaTD53Utxe2SnHXp4NbI3IS4c6ddc1KZcYh2TAtMakMbdy/CDYmSrtPlfsLE61pw2W2xuR8uuH4vyigZnvQ9iPsut2OJ8mbhWhrdxQDaHygMAFW9DThVVTcA03PIaJRIUPogLpizjCSstLIfwew/bNpxDYONMk9k8V24XHOSDyeLfyfuXFl8N66fUe1+y886OjIc1hXXc9YbZpxE0vfASpRXBxfz1DMi8nHgWv/5EmCX30TJrSOL0RQGNw8xJaZrXRlJWMEgf9nN2yYVBIxq+Zkl8zvOPJGlcmyc8z4t2gvghZf28I7jDp30WqAIF61Y59TjIoq4ga93RndsYcW464oyJ0ZdU94WrGVlYcd9D6xEeXVwMU8dDHwaeCPed+3HwGXAs8AcVb2/bCGzYOYpj6Rs40bU5QkPxkkDaJcI5598OJ8ZOCbyvacdOZtVdz0a27I1aruLGSjOHHLuCX2sufeJSQN28Nr67TsZGh5JPEfUseMG6pkzupkxbeqkwdfF1OT6WbqGQle1XlOcGfK9p8wZ/94Y+Sisc5+qPgX8uYgcCIyp6vM1L1dKYRj7iItkKqryZxrhGWOSfX6v6viA0P+aWZNWFEk+i72qkS1bwwP/9IhQ1Din9zUbHonM0h4Z3cv67Tu5c9npkdeT1m0vaqDu7hKef3HPuJy1Dt64bHrY18o1SxJheL/+18xqmUS6ODNkntBqoz5cChYeA3wTmOU/fwq4UFV/VrJsRg7iTB5jqoUODK4ZvC5VXlfd9Sjrt+/MZHLqq+lEVysDeAmCAcMjo86FBoHULnCuRQrTBuqoooUurVKLcP62Ur0m82lUBxefxr8AH1XV9QAi8ia8Bk1vKFEuIyeNsAFnCYOsnXknrTiyDAKB4zVq8Fu0Yl3dhQaTCO6fy/2NU6i1ss5btibyPI1qldoqmE+jOrhET+0fKAwAVf0RsH9pEhmFUEZ0S5isCV5BRFdSgb6DetzyRtPMbPUWGkyi9v6l3V/XjOmkiKyoaKJzT/DKyXdaglsjvs+GGy5K4wER+VsRmev/XQo8WLZgRj6yVnOth3pNBueffHjsa1GVV8P0dHfxD+85LjEkd0qMYgpHGtW2L42iSyTy/qXdX1eFmjYYBor2iiUL2P3yHq7Z8EihpTtahUZ8nw03XMxTH8CLlvoOnv/tDgpqwmSUS702a1c/Rb0mg/7XzIp1bgcO4SAqqs93IMdlUdfKXGvKifJJJBUarCfBMOn+uipUl9DVpEi4pEz7dqOVfDDtjEv01C7gLxogi1EBsvgp6mkpGxw/jSAqyjWiJylabEw1NTqo6LyDrK1X484zuHmIj12/JdYxD+YMNhpLotIQkQuBv8Tr3gfwC+CfVPWbZQtmNIes/TOC97gOtHGDexRZZtFyqCt4AAAVdElEQVRJ0WLhEh9xFDmTLaJHe6BgkxQGmDPYaCxJ7V7/EPgI8FG8ooUCHA+sFBFMcbQnrmaVsAnriiULcg3uefevWnRNESsXFwVrzmCj0SStND4MvFNVH6rZtk5EzsUrKWJKow1xDSWtt+Jo1jDXKSLMW7YmddAtYmZfNHlXLmkKs7enm+Vn5e+WZxhZSIqeOjCkMADwtx1YlkBGc3EJbczTSyFrmOteVadIoSpH19TbByJuldQlwheWLOCeT7+1EtcXYP0uOoPY2lN+HZITsr7WbKz2VH7SoqfmxiSkgVt5i7Sy7RBfU6rVSmHnKfvu+t4q9NXOc51GNSii9tRvici9UccGXlu3ZEblSTOrxA3owIRVQXCsuOMnDTSXxPTLrjXZNHKwrPdcWQILwtQTjtus5kR5rtNoLRKVRsOkMFqKtGgecBswkgbFuJVIYLJp5GCZ51x5ayalKfAsg3WZStZqQ3UOsUpDVR8ObxORd6jqLeWKZFSdPkdntsuAETcopjm2XQfLIgbKPLPosqO6skS7lalkqxa9ZpSHSxmRWi4vRQqjpXB1ZucZMNIc2y6DpWv9pzTyzKKXLp4/qZNe9xQpLKrLtZtgnuAFF6w2VOfgUkaklvr7SBqlEjWjhmIynKOOHfRtfnx4hN4Z3Tz/4h5Gx/aZrYoYMJJMMy4z26Ls7HHn6p3hVlxx0q+mwF+Ra6hx2eajsjr5GdUjq9L441KkMHIRZXpYesMWEBjdq+Pb6jFHxJk1PnfOMROimBodweMyWBY1UC5dPJ+lN24Zv5cBz7+4Z7zHeRwr1+6Y9L7RvVqYg9h1sG6E+chqQ3UGSRnh58RsPwxAVb9TllBGNqJm1LWz/oB6Ztmus/VGDxjBuWr7kYe78xU1UA4s7GP56m2TmiWNjqUP/o1wELvc+7KTH6sQ9ms0hqSVxpn+/0PwGi6t85+fBvwIr+qtUQGyDEBZB6s4h3fW5kVl8eLo2PjjcHe+IgfKuNarQ8MjLFqxrvBKwEVTpvmoDCe7KaHqkhQ99X4AEbkFOEpVn/CfHwp8uTHiGS5kKc2RdbCKy8lIaqTUKNJWQUUOlEn3uOhKwGVR1mqw6ByNquSeGNG4+DTmBgrD51fA60uSx6iDqIGpe4pM8GlAfYNVXE6GS65G2dTbp7se0nqcF1kJuJZWmHEXbYKzRMFq46I0fiQia4FVeAm/5wHrk99iNJK4gSlqW9YfXVxORlK3u0aRx/STdTCuvcdxK464QTJPM6xWmHEXbYKzRMFq49KE6WIReSdwqr/pSlX9brliGVmJG5jyDi5VMq+EqVe2egfj4B4vWrEucpB07W/uSqvMuIv+jlTFD2RE45rc91NgjapeAqwVkQNKlMnIQdGVRqtcPbZe2fImukUl7AG88PKeQiu7tsqMu+jviCUKVpvUlYaIfAi4CJgFvA7oA74KvLlc0YyslGXOKMIvUJZtvh7Z6m00Fcg8sLBvQqhvQJH5F9BaM+4ineyWKFhtXHwafwacBNwFoKr3icghpUpl1EVVzRlVs80X0WhqOKQwAopcBVTZNFg2lihYXVzMUy+p6svBExGZiucQrxsRmSUiPxCR+/z/M2P2+76IDPthvy1Ds5rRVNWcUWbdo3rudRGNplxrPuWhyqZBo3NxWWncISJ/A/SIyBl4bWBvznneZcDtqrpCRJb5zz8esd9KYAYtVL6kmbPqqpozylJmeRzakGz+SJO5iFWAi8nOZtxG1XBRGsuADwJb8QbvW4Gv5Tzv2cCb/MdX42WYT1Iaqnq7iLwpvL3KNNNEVFVzRhZllsX3kbfBUdI+aTIXkX9RJZOdYbjiEnI7Bvxv/68oXhUkDKrqE+3kI2mmiaiqDkRXZZZ1IC3zXrvInGcVUFX/k2GkkVSwcCsJvgtVPTbpwCLyQ+A3Il76pLN0jojIRXgRXsyZM6fow2ei2SaiKpozXJVZ1oG0zHtdtgKuqv/JMNJIWmm8w///Z/7/b/n/LwB2px1YVd8S95qI/EpEDvVXGYcCT7oIm3CuK4ErAfr7+5ta36KqJqJm46LMsg6kZd3rsInsiiULClfEzZ5cGEa9xEZPqerDfsvXRar616q61f9bBizOed7VwIX+4wuB7+U8XmWoN+KlWRFXVSJrRFIZ0UVFdftLwxLYjFZFNKXwnIjcA1ysqj/2n78B+IqqLqj7pCKvBK4H5gCPAO9W1WdEpB/4E1X9I3+/fweOBF4BPA18UFXXJh27v79fN27cWK9oTSFsywdvAOm08Moq3Ie4EiF9vT0Tmk4VQSsUIzQ6BxHZpKr9afu5RE99EPi6iBzkPx8GPpBHOFV9moiMclXdCPxRzfPfyXOeVsGcoh5VcOQ30tdQRf+TYaThEj21CThORA7EW5k8W75YrUXeGWPcgBQ0+OmkmWizB1LzNRhGMqkZ4SJykIj8L7zOfbeLyD/UrDo6niJs4HEDkvjHK9O2bkzEfA2GkYxLGZGvA78G3uP/PQdcVaZQrUQRJTKiBiphcrxzUaU3wpTphG81B7+V7jCMZFx8Gq9T1XNrnl/mO8cNirGBR9nyszb6qZcyM5OrmPVspTsMIx8uK40REXlj8EREFgGWgeRTVOG6gYV93LnsdB5c8fvcuez02M54RdvWyywmWOax66FR4bSG0c64KI0/Bb4sIg+JyEPAl4A/KVWqFqIsG3ijbOtlRgtVLeu5akrMMFoRl+ipe9gXPYWqPle6VC1EWWGijQo/LTNaqGqRSFVTYobRirh07vsfwOdVddh/PhP4mKpeWrZwrUJZNvBG2NbLLHtStZIqVVNihtGKuJinfi9QGACqugt4e3kiGY2kzGihqkUiWTitYeTHpYzIvcCJqvqS/7wH2KiqRzdAvsxUtYyIlYyoBvY5GEY0RZYRuQYvqe8qvNSBD+A1TjIcqWLoaadi4bSGkY9U85Sqfh74DPBbwNHA3/nbDEcsascwjHbBZaUB8Atgj6r+UERmiMgBqvrrMgVrJyxqxzCMdsGl9tSHgBuBf/E39QGDZQrVbhSVAGgYhtFsXKKn/gxYhFdzClW9D2ibnt6NwKJ2DMNoF1zMUy+p6ssiAoCITCWhd7gxmSr0iagKFr1kGK2Ni9K4Q0T+BugRkTOADwM3lytW+2FROxZFZhjtgIt5ahmwE9gK/DFwK2DZ4EZmLIrMMFofl9pTYyIyCAyq6s4GyGS0KRZFZhitT+xKQzyWi8hTwHZgh4jsFJFPNU48o52wKDLDaH2SzFMfwYuaOlFVX6mqs4CTgUUicklDpDPaCosiM4zWJ0lp/CFwvqo+GGxQ1QeA9/qvGUYmqlbA0DCM7CT5NLpV9anwRlXdKSLdJcpktDEWRWYYrU3SSuPlOl8zDMMw2pSklcZxIhLVpU+A/UqSxzAMw6gwsUpDVbviXjMMwzA6E5fkPsMwDMMA3EujG1jdJMMwDFMajljdpGphCtwwmoMpDUeS6iY1a7Dq1IHTFLhhNA/zaThStbpJwcA5NDyCsm/gHNw81BR5GokVPjSM5mFKw5Gq1U3q5IGzagrcMDoJUxqOVK1uUicPnFVT4IbRSZjScKRqdZM6eeCsmgI3jE7CHOEZqFLdpKWL509wBkPnDJzWPtcwmocpjRal0wfOKilww+gkTGm0MDZwGobRaMynYRiGYThjSsMwDMNwpilKQ0RmicgPROQ+///MiH0WiMh/iMg2EblXRJY0Q1bDMAxjH81aaSwDblfVI4Db/edhdgN/qKpHA28DviAivQ2U0TAMwwjRLKVxNnC1//hqYCC8g6r+l6re5z9+HHgSmN0wCQ3DMIxJNEtpvEpVnwDw/x+StLOInARMA34Z8/pFIrJRRDbu3LmzcGENwzAMj9JCbkXkh8BvRLz0yYzHORT4FnChqo5F7aOqVwJXAvT392tGUStDp1atNQyjdShNaajqW+JeE5FficihqvqErxSejNnvQGANcKmqbihJ1Epg5b4Nw2gFmmWeWg1c6D++EPheeAcRmQZ8F/imqt7QQNmaQidXrTUMo3VoltJYAZwhIvcBZ/jPEZF+Efmav897gFOB94nIPf7fguaIWz6dXLXWMIzWoSllRFT1aeDNEds3An/kP74GuKbBojWNV/f2MBShIDqhaq1hGK2DZYRXBCv3bRhGK2AFCytCp1etNQyjNTClUSGsaq1hGFXHzFOGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnDGlYRiGYThjSsMwDMNwxpSGYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnBHVlm2pHYmI7AQeLuBQBwNPFXCcZmPXUT3a5VrsOqpHnmt5jarOTtup7ZRGUYjIRlXtb7YcebHrqB7tci12HdWjEddi5inDMAzDGVMahmEYhjOmNOK5stkCFIRdR/Vol2ux66gepV+L+TQMwzAMZ2ylYRiGYTjTsUpDRGaJyA9E5D7//8yY/b4vIsMickto+zdE5EERucf/W9AYySNlzHst80TkLv/914nItMZIPkk+1+u40N/nPhG5sGb7j0RkR81nckjjpAcReZt//vtFZFnE69P9+3u/f7/n1rz2CX/7DhFZ3Ei5w9R7HSIyV0RGau7/VxstexiHazlVRH4qIntE5F2h1yK/Z80g53XsrflMVucWRlU78g/4PLDMf7wM+J8x+70ZOBO4JbT9G8C7mn0dBV3L9cB5/uOvAn9a1esAZgEP+P9n+o9n+q/9COhvkuxdwC+B1wLTgC3AUaF9Pgx81X98HnCd//gof//pwDz/OF0teB1zgZ81Q+4c1zIXOBb4Zu3vOel71krX4b/2fJHydOxKAzgbuNp/fDUwELWTqt4O/LpRQtVJ3dciIgKcDtyY9v4G4HIdi4EfqOozqroL+AHwtgbJl8RJwP2q+oCqvgxci3c9tdRe343Am/37fzZwraq+pKoPAvf7x2sGea6jaqRei6o+pKr3AmOh91bpe5bnOgqnk5XGq1T1CQD/fz2mjM+KyL0icoWITC9WvEzkuZZXAsOqusd//hjQV7B8rrhcRx/waM3zsLxX+cvwv23wQJYm14R9/Pv9LN79d3lvo8hzHQDzRGSziNwhIr9TtrAp5LmvrfaZJLGfiGwUkQ0ikntCODXvAaqMiPwQ+I2Ilz5ZwOE/Afw/vOXilcDHgcsLOG4kJV5L1MBaWkhdAdeRJO8FqjokIgcANwF/gLdcbwQu9zFun4Z+BinkuY4ngDmq+rSInAAMisjRqvpc0UI6kue+ttpnksQcVX1cRF4LrBORrar6y3qFaWuloapviXtNRH4lIoeq6hMicijwZMZjP+E/fElErgL+KoeoLucr61qeAnpFZKo/azwMeDynuLEUcB2PAW+qeX4Yni8DVR3y//9aRP4Nb1nfKKXxGHB4SK7wfQz2eUxEpgIHAc84vrdR1H0d6hnQXwJQ1U0i8kvg9cDG0qWOJs99jf2eNYFc3w9Vfdz//4CI/AhYiOcjqYtONk+tBoKIiAuB72V5sz+oBT6BAeBnhUqXjbqvxf+hrweCiIvM96JAXK5jLfBWEZnpR1e9FVgrIlNF5GAAEekG3kFjP5O7gSP8SLRpeA7icKRK7fW9C1jn3//VwHl+VNI84AjgPxskd5i6r0NEZotIF4A/qz0Cz4HcLFyuJY7I71lJcqZR93X48k/3Hx8MLAJ+nkuaZkQDVOEPzwZ7O3Cf/3+Wv70f+FrNfv8O7ARG8DT+Yn/7OmAr3sB0DfCKFr6W1+INUvcDNwDTK34dH/BlvR94v79tf2ATcC+wDfhHGhyBBLwd+C+8Wdwn/W2XA2f5j/fz7+/9/v1+bc17P+m/bwfwe836LuW5DuBc/95vAX4KnNnM63C8lhP938ILwNPAtqTvWatdB/AGf5za4v//YF5ZLCPcMAzDcKaTzVOGYRhGRkxpGIZhGM6Y0jAMwzCcMaVhGIZhOGNKwzAMw3DGlIbRstRU79wmIltE5KMikvs7LSLLReRzoW0LROQXdRxrgYi8Pac8D4rI/NC2L4jIXye8Z66INDN3yGhTTGkYrcyIqi5Q1aOBM/Bi2T9dwHFXAUtC284D/q2OYy3Ak8sZP8u6lmv98wevT8FLqruuDnkMIxemNIy2QFWfBC4CLhaP/UTkKhHZ6hfQOw1ARGaIyPV+ocnrxOsH0R861g5gWEROrtn8HrzBGxF5q4j8h9+/4AYReYW//UQR+Ym/6vlPETkILwFrib8iWiJez5BB//wbRORY/73LReRKEbmNyaVPVlGjNIBTgYdU9WF/RfHvviw/FZE3hO+NiLxPRL5U8/wWEXlTyrWsEJGf+3L+fcaPw2hj2rr2lNFZqFdbZwpeddz3+tuOEZEjgdtE5PV4vSB2qeqxIvLfgHtiDhcM1HeJyCnA06p6n1+K4VLgLar6goh8HPioiKzAm/kvUdW7ReRAYDfwKbweHxcDiMgXgc2qOiAip+MpiKCB1wnAG1V1JHRd94rImIgcp6pbfLlW+S8/CZyhqi+KyBH+9glKMI6Ea/kS8E7gSFVVEel1OZ7RGZjSMNqNoCLoG4EvAqjqdhF5GK943hvxSoygqj8TkXtjjnMt8BMR+RgTB+lT8Jom3emVHWMa8B/AfOAJVb3bP/ZzADK5Ovsb8cptoKrrROSV/ooEYHVYYdSwCq8+1Ta8Xgqf8rd3A18Sr3PkXv8aXYm7lueAF4Gvicga4JbYIxgdhykNo23wi+TtxZt9x/XScOqxoaqPishDwO/iDfK/XfP+H6jq+aFzH4tbueqkMtcvJLxvFXAbcAdwr2+OA7gE+BVwHJ65+cWI9+5hoil6vxpZJl0LgIichNfp8TzgYrxGXYZhPg2jPRCR2Xitar+kXkG1/wtc4L/2emAOXjHAH+P5JxCRo4BjEg67CrgC+KWqPuZv2wAsEpHf9I8xwz/+duDVInKiv/0A36H9a+CAmmPWyvUm4Cl16DehXv+Dp4EV7Fv1gFeW/AlVHcPrH9IV8faHgAUiMkVEDmdfV8DIa/H9Ggep6q3AR9hnPjMMUxpGS9MThNwCP8SbiV/mv/YVoEtEtuL5Gt6nqi/522f7ZqmP41XFfTbm+DcAR+M7wAFUdSfwPmCVf4wNeLb/l/Eirr4oIlvw2oPuh1d2/qjAEQ4sB/r9965gX4lxF1YBRwLfrdn2FeBCEdmAZ5qKWq3cCTyIV+X07/Eq0MZeC56Su8XfdgfeasYwAKzKrdFZiNfvodt3HL8OrwT76/1B3zCMFMynYXQaM4D14jVqEuBPTWEYhju20jAMwzCcMZ+GYRiG4YwpDcMwDMMZUxqGYRiGM6Y0DMMwDGdMaRiGYRjOmNIwDMMwnPn/Ha/6DktjRNcAAAAASUVORK5CYII=\n", 60 | "text/plain": [ 61 | "
" 62 | ] 63 | }, 64 | "metadata": { 65 | "needs_background": "light" 66 | }, 67 | "output_type": "display_data" 68 | } 69 | ], 70 | "source": [ 71 | "# figure out how many dimensions we need\n", 72 | "n = 10; # vocabulary size\n", 73 | "k = 5; # maximum number of terms to be combined\n", 74 | "err = 0.01; # error probability\n", 75 | "d = plate_formula(n, k, err);\n", 76 | "\n", 77 | "dog = embed_gaussian(d, n=1)\n", 78 | "agent = embed_gaussian(d, n=1)\n", 79 | "chase = embed_gaussian(d, n=1)\n", 80 | "verb = embed_gaussian(d, n=1)\n", 81 | "cat = embed_gaussian(d, n=1)\n", 82 | "patient = embed_gaussian(d, n=1)\n", 83 | "\n", 84 | "\n", 85 | "sentance = (encode(dog, agent) + encode(chase, verb)) / np.sqrt(2)\n", 86 | "# devided by sqrt to keep expected lengh = 1\n", 87 | "dog_decoded = decode(sentance, agent)\n", 88 | "dog_decoded /= np.linalg.norm(dog_decoded) # normalize the decoded vector for clarity\n", 89 | "\n", 90 | "plt.scatter(dog, dog_decoded)\n", 91 | "print np.corrcoef(dog, dog_decoded)\n", 92 | "plt.gca().set_xlabel('Dog Vector Values')\n", 93 | "plt.gca().set_ylabel('Decoded-Dog Vector Values')\n", 94 | "plt.show()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "# Compositonality\n", 102 | "Circular convolution preserves the simliarity structure of the underlying vectors. That is, if two vectors are more are similar to each other in vector space, then their convolutions with a third vector will retain that similarity. We can show this buy approximating a circular convolution with a tensor product (Plate, 1995; Doumas and Hummel, 2005). \n", 103 | "\n", 104 | "Formally, this stems from the observation is that if $\\mathbf{a}$, $\\mathbf{b}$, and $\\mathbf{c}$ are $D$-dimensional random vectors drawn from $\\mathcal{N}(0, \\sigma \\text{I})$ then typically\n", 105 | "\n", 106 | "$$\\cos(\\theta_{\\mathbf{a} + \\mathbf{c}, \\mathbf{b} + \\mathbf{c}}) > \\cos(\\theta_{\\mathbf{a}, \\mathbf{b}})$$\n", 107 | "\n", 108 | "or\n", 109 | "\n", 110 | "$$\\frac{(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c})}{||(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c}) ||} > \\frac{\\mathbf{a}^{\\text{T}}\\mathbf{b}}{||\\mathbf{a}^{\\text{T}}\\mathbf{b}||}$$\n", 111 | "\n", 112 | "meaning that the random vectors that share a common (linearly additive) factor are more to each other than the would be if you were to subtract thier common factor.\n", 113 | "\n", 114 | "We can see that this generally the case by noting that $(\\mathbf{a} + \\mathbf{c})^{\\text{T}}(\\mathbf{b} + \\mathbf{c}) = \\mathbf{a}^{\\text{T}}\\mathbf{b} + (\\mathbf{a} + \\mathbf{b})^\\text{T}\\mathbf{c} + \\mathbf{c}^\\text{T}\\mathbf{c}$, hense we can re-arange our claim to that typically $\\mathbf{c}^\\text{T}\\mathbf{c} > (\\mathbf{a} + \\mathbf{b})^\\text{T}\\mathbf{c}$, which is true as long as $\\mathbf{c}$ is not strongly anti-correlated to $\\textbf{a}$ and $\\textbf{b}$. Asymptoically, this will be the case as \n", 115 | "\n", 116 | "$$\\mathbb{E}[(\\mathbf{a}+\\mathbf{b})^\\text{T}\\mathbf{c}] =\\sum_{i=1}^{D}\\mathbb{E}[a_i]\\mathbb{E}[c_i]+\\sum_{i=1}^{D}\\mathbb{E}[b_i]\\mathbb{E}[c_i] + (r_{ac} + r_{bc})\\sigma^2 = 0$$\n", 117 | "\n", 118 | "where $r_{ac}$ and $r_{ac}$ are the correlations between vectors $\\mathbf{a}$ and $\\mathbf{c}$ and vectors $\\mathbf{b}$ and $\\mathbf{c}$, respectively, and is zero for both when $\\mathbf{a}$, $\\mathbf{b}$, $\\mathbf{c}\\sim\\mathcal{N}(0, \\sigma\\text{I})$. Thus, we would expect the presense of a common factor to increase the simliarity of two random vectors.\n", 119 | "\n", 120 | "\n", 121 | "We can be more rigorus with this proof but it's easiest to just to show it is the case empirically, that as we increase the dimensionality of the vecotrs $D$, $\\Pr\\left (\\cos(\\theta_{\\mathbf{a} + \\mathbf{c}, \\mathbf{b} + \\mathbf{c}}) > \\cos(\\theta_{\\mathbf{a}, \\mathbf{b}})\\right )$ approches 1:\n", 122 | "\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 4, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "Text(0.5,1,'$\\\\Pr\\\\left ( \\\\cos\\\\ \\\\theta_{a+c, b+c} > \\\\cos\\\\ \\\\theta_{a, b} \\\\right )$')" 134 | ] 135 | }, 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | }, 140 | { 141 | "data": { 142 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAACsCAYAAABVXDjrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFspJREFUeJzt3XmUHWWZx/HvL53uhHRn7yRm7wRiyAYEkgADjmE1LBLBBTI6CjpwFIOCzigcPcjgKDKiKAgoamDgABllUDNMWBQTOCImJBACIQSy0FkhnT3dnfT6zB9VjZeb2923k6pb3fc+n3Pu6VtLVz337bpPv/VW1fvKzHDOuah0SzoA51x+8aTinIuUJxXnXKQ8qTjnIuVJxTkXKU8qzrlIeVJxzkXKk4pzLlKeVHJI0i2Srk06Dtd1SFoqaVLScXSEJ5XDJOltSQckVUt6V9J9ksraWH8Q8FngFzmMcaqk5yXVhgfnqFztuxDFVN63ATdHsJ2c8aRyZD5qZmXAicB04NvpK0jqHr69HFhoZgdyEZikEcBC4FZgILA+U3xdkaQhSceQLsbyXgCcIWloBNvKCU8qETCzLcATwGR4rxbzTUkrgZowsZwHPNvyO5JGSnpMUpWknZJ+lrJsgqTFkvZIWiXpopRl35S0RdJ+SWskndVKWD8CfmlmC8JENp8g8eWD+8OawJck9cvmF1or74jKGmIqbzM7CCwHzj3SbeWKJ5UISBoJnA+8nDJ7DnAB0M/MGoEpwJpw/SLgcaASqACGExyESCoG/hd4GhgMXAM8JGm8pPHAXGC6mfUGPgK8nSGePsBs4Fcps7sBByP5wMm7CPg+wRetUtLDks6RlPF4bq28oyjrcPtxl/dq4PiIthU/M/PXYbwIDrBqYA/BwXo3cFTKss+nrd8AHBu+PxWoArpn2O6HgHeAbinzHgFuAo4BtgNnA8VtxHYx0BTG1vI6AMzPYfn8kOALGfd+yoGvAC8BG4G5GdbJWN5RlHVU5d1WeQHfA+Yldax39OU1lSPzMTPrZ2ajzexqe397yaa0dXcDvcP3I4FKC2ow6YYBm8ysOWVeJTDczNYC1xIc9NslzZc0LMM2KoAFYWz9zKwfsAh4siMfTtLijqyfZjLBf9i4tt9iJ7ASWAH0B8ZkWKe18o6irCGa8m6rvHoTJKouwZNKfNI7qlkJfDB8vwkYldKIm2orMDKtKj8K2AJgZg+b2enA6HAft2bYRg+gtmVC0hhgGkGjX8u874ZtCW9IOr0jH0zSpZKeC690fKqV1YYBt0l6SdLVHdl+NvuQNE7Sd4ENwE+BV4GxZvb1DJtrrbyjKGtop7yzLOu2ymsC8Eorv9f5JF1V6qovglOcs7NdBnwNuDd8X0RwkNwGlAI9gdPCZSXAOuB6oBiYCewHjgXGA2cSHMQlwDzg/gz7PwvYTHCgjgReAK5MW6c0/DkduL2Vz7E4w7wTCBqli8PpQ04NCE5JNgEDws+2PNvtZ7OP8HPvAO4ETsrib5WxvKMo62zKu72ybqu8wv3vAoYlfcxn/d1IOoCu+jqMpFIeHngt7S6jgN8TVN93AHekrDuJ4ErRXuB14OJw/nHA0vDA30XQ+HjIwQYIuIegzacSuDpt+WDgQYIq+grgaynLRgGLw9eelPejwuXfBs5pp2zOBG5JmX4i2+1nsw9gBlDSwb9XxvI+0rJur7zbKussy+uTwGNJH+8dKuukAyikF8EVi2s7QRx3ArPC9/Na+wKTuabyI+Aj4fvuKfNHpLy/FviP8P1s4JvZbr+tfXTFV2tlnW15AUuAyUl/jo68FAbuCoikywhqAy8S/Kf+qJm9m2G9xWY2M23eBOA+oA5Yamb/FrZVLDKzD4Xr/JqgjWEIwX/vL5pZfTbbb20fR/BxE5WprAlqSx0ur67Ck4o7YpJmAMeb2S+TjqUryPfy8qTinIuUX1J2zkXKk4pzLlKZbr7q1MrLy62ioiLpMJwrOMuXL99hZoPaWy+2pCJpHnAhsN3MJmdYLoI7Ic8naPm+3Mxeam+7FRUVLFu2LOpwnXPtkFSZzXpxnv7cD8xqY/l5wLjwdRXBzUPOuS4utqRiZs8R3InYmtnAAxb4G9CvK3VE45zLLMk2leG8/0nezeG8bcmEE4+Gpmaamo2exUUANDUbq7ftY8OOGhqbmznt6HIGlJawYtMeXtuy95CnEAHqG5up3FXLlt0HaDajqJs4cVR/Th4zgNXb9vHXdTs50NCU2w/m8sbA0hJ+ctnUyLaXZFJRhnkZb5qRdBXBKRKjRnWdblZXbt7DVx55mYMNzcy7fDof6NuTKx9YxvLK3e9br7SkiJr6tpNCv17FjOzfi+IiUVvfxO1/epOWW4xGD+zFwNKSuD6Gy3NHhf/wopJkUtlM8ERnixEEj6IfwszuBe4FmDZtWqe9W+/ljbu5ZeEbbNlzgFEDerGscheDynoA8KlfvMCA0hLe3XeQ786exLSKATQ1G8++WcXm3Qc47ZiBzKgYQEn3Q89Ii7qJ3j2L3zdvR3UdL1Xu5oNDelNRXpqTz+dcNpJMKguAuZLmAycDe82sS536HGxo4vsLV7Pmnf3UNzXz8sY9DO7dg1PGDqRyZw0fPX4YN144kYMNzVxx/4u8u+8gD195CieN7v/eNiYP73tY+y4v68G5kz4Q1UdxLjJxXlJ+hKB/inJJm4HvEPRZgZn9nKDn8fOBtQSXlK+IK5Y47Kyu418eWMaKTXuYNro/JUXdmHvGMXxx5tGU9Ti0WBfMPY36xmZKMyxzLp/EdoSb2Zx2lhvw5bj2H6cVm/ZwzSMvsX1fHfd8+kRmTW7/olVxUTeKi/wGZpf//N9mB+w/2MB9z7/NHc+8xZA+PZl/1SlMHdW//V90roB4UslCc7Nx29NreOCFSqrrGrlgylC+f/EU+vYqbv+XnSswnlSy8MALb3P34nVcMGUoV/3jWI4fmdX4Vc4VJE8q7Vi7vZpbnniDM8YP4mf/NJXgkSXnXGs8qbSitr6R59fu5Md/fJOjSoq49ePHeUJxLgueVDLYvLuWC+/8C3tqGyjr0Z3bLz2BwX16Jh2Wc12CJ5UM7l68jtq6Ju6/YjqnHj2QHt2jvY3ZuXzmSSXN1j0H+O2yTVw6fSQzxw9OOhznuhy/GyvNz59dB8CXZh6TcCTOdU2eVFJs2XOA+Us38YmTRjC831FJh+Ncl+RJJWRmfOcPr1HUTXz5DK+lOHe4PKmEnlr1Ln9avZ3rzhnHiP69kg7HuS7Lkwqwt7aBmxasYsLQPlxx2pikw3GuSyv4pLL3QAP/PG8Ju2rq+cElU/xJYueOUEF/g2rrG/ncvKWs3raPez5zoj/T41wECvo+lSdfe4cVm/Zw55ypnDVhSNLhOJcXCrqm8rf1O+l7VDEXTPGRQZyLSkEnlSUbdjG9YgDduvmDgs5FpWCTyjt7D1K5s5ZTxg5IOhTn8krBJpUlG3YCcPKYgQlH4lx+KeCksouyHt2ZOKxP0qE4l1diTSqSZklaI2mtpOszLB8t6RlJKyUtljQiznhSLVm/k2kV/Sny9hTnIhVbUpFUBNwFnAdMBOZImpi22m0Eg7QfB9wM3BJXPKmq9texrqrGT32ci0GcNZUZwFozW29m9cB8YHbaOhOBZ8L3izIsj8WiN7YDeCOtczGIM6kMBzalTG8O56V6Bfh4+P5ioLekWKsPTc3GPc+uY8LQPpzgd9A6F7k4k0qmxor0wdX/FfiwpJeBDwNbgMZDNiRdJWmZpGVVVVVHFNTjK7eyYUcNXz3rGO/I2rkYxJlUNgMjU6ZHAFtTVzCzrWZ2iZlNBb4VztubviEzu9fMppnZtEGDBh12QE3Nxp1/Xsv4Ib05d6IPbu5cHOJMKi8C4ySNkVQCXAYsSF1BUrmklhhuAObFGA+L3tjO2u3VXHPWMX4XrXMxiS2pmFkjMBd4ClgN/MbMVkm6WdJF4WozgTWS3gSGAN+LKx6AZZW7KSnq5rUU52IU61PKZrYQWJg278aU948Cj8YZQ6rXt+3jmMFllHQv2Hv+nItdQX27Vm/b53fQOhezgkkq2/cfpGp/HROHelJxLk4Fk1RWb9sPwARPKs7FqmCSyutb9wF4TcW5mBVMUlm9bR/D+x1F317FSYfiXF4rmKTy+rZ9furjXA4URFI52NDE+qpqv/LjXA4URFJZ885+mg0mDu2ddCjO5b2CSCqrtwWNtH7641z8CiKprKuqpkf3boz0MZKdi11BJJUNO2qoGFjqDxE6lwMFk1TGlJcmHYZzBSHvk0pjUzMbd9VS4UnFuZzI6illST2Bq4HTCXpv+wtwj5kdjDG2SGzdc5CGJmOsJxXnciLbrg8eAPYDd4bTc4AHgU/GEVSU1u+oBvCainM5km1SGW9mx6dML5L0ShwBRe3tHTUA3qbiXI5k26bysqRTWiYknQw8H09I0dqwo4ayHt0pLytJOhTnCkK2NZWTgc9K2hhOjwJWS3oVsHAwsE5pw85axpSXes/5zuVItkllVqxRxGjDjmpOGNk/6TCcKxhZJRUzq4w7kDjUNTaxZfcBLp6asyGanSt4eX2fyqZdtTQbjCn32/Ody5VYk4qkWZLWSFor6foMy0dJWiTpZUkrJZ0f5f437KgFYEx5WZSbdc61IbakIqkIuAs4j2Ag9jmSJqat9m2C8YCmEgw2dneUMVTuDC8nD/TLyc7lSpw1lRnAWjNbb2b1wHxgdto6BrT0R9CXtGFRj9SumnqKuok+R8U6vJFzLkWc37bhwKaU6c0El6ZT3QQ8LekaoBQ4O8oAauubKC0p8svJzuVQnDWVTN9kS5ueA9xvZiOA84EHU8ZW/vuGpKskLZO0rKqqKusAqusaKevhtRTncinOpLIZGJkyPYJDT2++APwGwMxeAHoC5ekbMrN7zWyamU0bNGhQ1gHU1DVS6knFuZyKM6m8CIyTNEZSCUFD7IK0dTYCZwFImkCQVLKvirSjuq6RXp5UnMup2JKKmTUCc4GngNUEV3lWSbpZ0kXhal8HrgwfTnwEuNzM0k+RDlttfRNlPYqi2pxzLgux/hs3s4XAwrR5N6a8fx04La7919Q1MrDUb3xzLpfy+o5ab6h1LvfyOqnU1DXSy09/nMup/E4q9U1+9ce5HMvbpNLQ1Ex9YzNlJZ5UnMulvE0qNXWNAF5TcS7H8japVL+XVLxNxblcytukUlPXBHhNxblcy9+kUu+nP84lIX+TSnj64/epOJdbeZ9UepV4m4pzuZS3SaU6bFPxmopzuZW3SaXW21ScS0TeJpVqb1NxLhF5m1Rq6hrpJujRPW8/onOdUt5+42rqgud+vH9a53Irj5OKd3vgXBLyN6nUe/+0ziUhb5NKdV0wPIdzLrfyNql4T/rOJcOTinMuUvmbVOq9oda5JMSaVCTNkrRG0lpJ12dYfrukFeHrTUl7otp3cEnZ21Scy7XY/pVLKgLuAs4hGK3wRUkLwmE5ADCz61LWvwaYGtX+q+saKfWuJJ3LuThrKjOAtWa23szqgfnA7DbWn0MwoNgRa+mf1ttUnMu9OJPKcGBTyvTmcN4hJI0GxgB/bmV5hwZor/Ve35xLTJxJJdP98a0NaXoZ8KiZNWVa2NEB2qvrWx4m9DYV53ItzqSyGRiZMj0C2NrKupcR0akPpHbQ5DUV53ItzqTyIjBO0hhJJQSJY0H6SpLGA/2BF6LasXd74FxyYksqZtYIzAWeAlYDvzGzVZJulnRRyqpzgPlm1tqpUYd5m4pzyYn1W2dmC4GFafNuTJu+Ker9+pg/ziUnL++ofW90Qm9TcS7n8jOpeP+0ziUmP5OK96TvXGLyNKkE/dP2LM7Lj+dcp5aX37pJw/pw2YxR3j+tcwnIy/OD86YM5bwpQ5MOw7mClJc1FedccjypOOci5UnFORcpRXh3fE5IqgIq21ilHNiRo3AOl8cYDY8xGtnGONrM2u0moMsllfZIWmZm05KOoy0eYzQ8xmhEHaOf/jjnIuVJxTkXqXxMKvcmHUAWPMZoeIzRiDTGvGtTcc4lKx9rKs65BOVVUmlv8LIkSBopaZGk1ZJWSfpqOH+ApD9Keiv82T/hOIskvSzp8XB6jKQlYXz/HXYJmihJ/SQ9KumNsDxP7YTleF34d35N0iOSeiZdlpLmSdou6bWUeRnLTYE7wu/QSkkndnR/eZNUUgYvOw+YCMyRNDHZqABoBL5uZhOAU4Avh3FdDzxjZuOAZ8LpJH2VoNvPFrcCt4fx7Qa+kEhU7/dT4EkzOxY4niDeTlOOkoYDXwGmmdlkoIigb+aky/J+YFbavNbK7TxgXPi6Crinw3szs7x4AacCT6VM3wDckHRcGeL8A8GojWuAoeG8ocCaBGMaER5YZwKPEwyvsgPonqlsE4qxD7CBsB0wZX5nKseWsa4GEDys+zjwkc5QlkAF8Fp75Qb8ApiTab1sX3lTU6EDg5clRVIFwdCuS4AhZrYNIPw5OLnI+AnwDaA5nB4I7LGg83LoHGU5FqgC7gtP034lqZROVI5mtgW4DdgIbAP2AsvpfGUJrZfbEX+P8impdGTwspyTVAb8D3Ctme1LOp4Wki4EtpvZ8tTZGVZNuiy7AycC95jZVKCG5E8Z3ydsl5hNMNrmMKCU4HQiXdJl2ZYj/tvnU1LpyOBlOSWpmCChPGRmj4Wz35U0NFw+FNieUHinARdJeptgvOszCWou/SS19LfTGcpyM7DZzJaE048SJJnOUo4AZwMbzKzKzBqAx4B/oPOVJbRebkf8PcqnpJLV4GW5pqD7uV8Dq83sxymLFgCfC99/jqCtJefM7AYzG2FmFQRl9mcz+zSwCPhE0vG1MLN3gE3h4HMAZwGv00nKMbQROEVSr/Dv3hJjpyrLUGvltgD4bHgV6BRgb8tpUtaSatSKqTHqfOBNYB3wraTjCWM6naD6uBJYEb7OJ2i3eAZ4K/w5oBPEOhN4PHw/FlgKrAV+C/ToBPGdACwLy/L3BCNbdqpyBP4deAN4DXgQ6JF0WRIMKbwNaCCoiXyhtXIjOP25K/wOvUpwJatD+/M7ap1zkcqn0x/nXCfgScU5FylPKs65SHlScc5FypOKcy5SnlQKmKQmSSvCp2pfkfQ1Sd3CZdMk3ZFQXH9NYr8uGn5JuYBJqjazsvD9YOBh4Hkz+06ykbmuzGsqDgAz207wqPvc8G7KmSl9q9wk6b8kPS3pbUmXSPpPSa9KejJ8DAFJJ0l6VtJySU+l3Aa+WNKtkpZKelPSh8L5k8J5K8K+O8aF86vDn5L0w7BvklclXRrOnxlus6VvlYfCO1iR9ANJr4fbuy3X5ejydCxld3jMbH14+pPpSd+jgTMI+qp5Afi4mX1D0u+ACyT9H3AnMNvMqsIE8D3g8+HvdzezGZLOB75D8JzMF4GfmtlD4aMVRWn7vITgLtrjCcameVHSc+GyqcAkgudSngdOk/Q6cDFwrJmZpH5HXCiuwzypuHSZnlIFeMLMGiS9SvDlfzKc/ypBXx3jgcnAH8NKQxHBreEtWh6kXB6uD0Fy+pakEcBjZvZW2j5PBx4xsyaCB+CeBaYD+4ClZrYZQNKKcJt/Aw4CvwqT3OMd+uQuEn76494jaSzQROYnfesAzKwZaLC/N8Y1E/xzErDKzE4IX1PM7Nz03w+33z3c1sPARcAB4ClJZ6aH1Ea4dSnvmwhqQo3ADIInwj/G3xOfyyFPKg4ASYOAnwM/s8NrvV8DDJJ0ari9YkmT2tnnWGC9md1B8HTscWmrPAdcqqD/3EHAPxI8mNfa9sqAvma2ELiW4NTJ5Zif/hS2o8JTh2KCvnQfBH7c9q9kZmb1kj4B3CGpL8Gx9RNgVRu/dinwGUkNwDvAzWnLf0fQ/eIrBE96f8PM3pF0bCvb6w38QVJPglrOdYfzWdyR8UvKzrlI+emPcy5SnlScc5HypOKci5QnFedcpDypOOci5UnFORcpTyrOuUh5UnHORer/AXFIk8CSR8YOAAAAAElFTkSuQmCC\n", 143 | "text/plain": [ 144 | "
" 145 | ] 146 | }, 147 | "metadata": { 148 | "needs_background": "light" 149 | }, 150 | "output_type": "display_data" 151 | } 152 | ], 153 | "source": [ 154 | "N = 1000\n", 155 | "dot = [None]* 100\n", 156 | "for d in range(1, 101):\n", 157 | " a = (np.random.randn(N, d))\n", 158 | " b = (np.random.randn(N, d))\n", 159 | " c = (np.random.randn(N, d))\n", 160 | " f = np.array([np.dot(a[ii, :] + c[ii, :], b[ii, :] + c[ii, :]) - np.dot(a[ii, :], b[ii, :])\n", 161 | " for ii in range(N)]) \n", 162 | " dot[d-1] = np.mean(f >= 0)\n", 163 | "\n", 164 | "plt.figure(figsize=(4, 2)) \n", 165 | "plt.plot(range(1, 101), dot)\n", 166 | "plt.xlabel('Dimensions')\n", 167 | "plt.ylabel('p')\n", 168 | "plt.title(r'$\\Pr\\left ( \\cos\\ \\theta_{a+c, b+c} > \\cos\\ \\theta_{a, b} \\right )$')" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "We can extend this arguements to tensor products by first noting that tensor products are distributive, so:\n", 176 | "\n", 177 | "$$(\\mathbf{x} + \\mathbf{y}) \\otimes\\mathbf{z} = \\mathbf{x}\\otimes\\mathbf{z} + \\mathbf{y}\\otimes\\mathbf{z}$$\n", 178 | "\n", 179 | "Thus, if we make two random vectors $\\mathbf{a}$ and $\\mathbf{b}$ similar to eachother by adding to each a common factor $\\mathbf{d}$, then taking the tensor product of each of those two vectors with a third random vector $\\mathbf{c}$, we can decompose both tensor products into the sum of two seperate tensors:\n", 180 | "\n", 181 | "$$(\\mathbf{a} + \\mathbf{d}) \\otimes\\mathbf{c} = \\mathbf{a}\\otimes\\mathbf{c} + \\mathbf{d}\\otimes\\mathbf{c}$$\n", 182 | "$$(\\mathbf{b} + \\mathbf{d}) \\otimes\\mathbf{c} = \\mathbf{b}\\otimes\\mathbf{c} + \\mathbf{d}\\otimes\\mathbf{c}$$\n", 183 | "\n", 184 | "Thus, both tensors share a common tensor. Then, by the arguments above we can show that:\n", 185 | "\n", 186 | "$$\\cos \\theta_{(\\mathbf{a} + \\mathbf{d}) \\otimes\\mathbf{c}, (\\mathbf{b} + \\mathbf{d}) \\otimes\\mathbf{c}} > \\cos \\theta_{\\mathbf{a} \\otimes\\mathbf{c}, \\mathbf{b}\\otimes\\mathbf{c} }$$\n", 187 | "\n", 188 | "will be true with probabilty approaching 1 as the dimensionality of the vectors goes to infinity. Thus, taking the tensor product of two similar vectors and a third random vector will result in two similar tensor products. Because circular convolution resembles a tensor product opperation (Plate, 1995; Doumas and Hummel, 2005) this argumemt will hold for it as well. Without getting into a rigorous proof of this, we can demonstrate this empirically:\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 5, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "Dot Product:\n", 201 | "\n", 202 | "dot(Olivia, William) = 0.590\n", 203 | "dot(Olivia, Coffee) = 0.036\n", 204 | "dot(Coffee, William) = 0.045\n", 205 | "\n", 206 | "dot(Olivia(*)Agent, William(*)Agent) = 0.549\n", 207 | "dot(Olivia(*)Agent, Coffee(*)Agent) = 0.075\n", 208 | "dot(Coffee(*)Agent, William(*)Agent) = 0.056\n", 209 | "\n", 210 | "Euclidean Distance:\n", 211 | "\n", 212 | "||Olivia - William|| = 0.952\n", 213 | "||Olivia - Coffee || = 1.362\n", 214 | "||Coffee - William|| = 1.332\n", 215 | "\n", 216 | "||Olivia(*)Agent - William(*)Agent|| = 0.987\n", 217 | "||Olivia(*)Agent - Coffee(*)Agent || = 1.440\n", 218 | "||Coffee(*)Agent - William(*)Agent|| = 1.497\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "from sklearn.preprocessing import normalize\n", 224 | "\n", 225 | "# both Olivia and William will share the property isPerson\n", 226 | "isPerson = embed_gaussian(d)\n", 227 | "\n", 228 | "Olivia = (embed_gaussian(d) + isPerson) / np.sqrt(2)\n", 229 | "William = (embed_gaussian(d) + isPerson) / np.sqrt(2)\n", 230 | "Agent = embed_gaussian(d)\n", 231 | "Coffee = embed_gaussian(d)\n", 232 | "\n", 233 | "\n", 234 | "OliviaAgent = encode(Olivia, Agent)\n", 235 | "WilliamAgent = encode(William, Agent)\n", 236 | "CoffeeAgent = encode(Coffee, Agent)\n", 237 | "\n", 238 | "print \"Dot Product:\"\n", 239 | "print \n", 240 | "print \"dot(Olivia, William) = %.3f\" % np.dot(OliviaAgent, WilliamAgent.T)[0][0]\n", 241 | "print \"dot(Olivia, Coffee) = %.3f\" % np.dot(CoffeeAgent, WilliamAgent.T)[0][0]\n", 242 | "print \"dot(Coffee, William) = %.3f\" % np.dot(OliviaAgent, CoffeeAgent.T)[0][0]\n", 243 | "\n", 244 | "\n", 245 | "print \n", 246 | "print \"dot(Olivia(*)Agent, William(*)Agent) = %.3f\" % np.dot(Olivia, William.T)[0][0]\n", 247 | "print \"dot(Olivia(*)Agent, Coffee(*)Agent) = %.3f\" % np.dot(Coffee, William.T)[0][0]\n", 248 | "print \"dot(Coffee(*)Agent, William(*)Agent) = %.3f\" % np.dot(Olivia, Coffee.T)[0][0]\n", 249 | "\n", 250 | "\n", 251 | "print\n", 252 | "print \"Euclidean Distance:\"\n", 253 | "print \n", 254 | "print \"||Olivia - William|| = %.3f\" % np.linalg.norm(Olivia - William)\n", 255 | "print \"||Olivia - Coffee || = %.3f\" % np.linalg.norm(Olivia - Coffee)\n", 256 | "print \"||Coffee - William|| = %.3f\" % np.linalg.norm(Coffee - William)\n", 257 | "\n", 258 | "print\n", 259 | "print \"||Olivia(*)Agent - William(*)Agent|| = %.3f\" % np.linalg.norm(OliviaAgent - WilliamAgent)\n", 260 | "print \"||Olivia(*)Agent - Coffee(*)Agent || = %.3f\" % np.linalg.norm(CoffeeAgent - WilliamAgent)\n", 261 | "print \"||Coffee(*)Agent - William(*)Agent|| = %.3f\" % np.linalg.norm(OliviaAgent - CoffeeAgent)\n", 262 | "\n" 263 | ] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 2", 269 | "language": "python", 270 | "name": "python2" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 2 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython2", 282 | "version": "2.7.15" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 1 287 | } 288 | -------------------------------------------------------------------------------- /Tutorials/Readme.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | 4 | There are a few prepackaged tutorials in Jupyter notebooks meant to demonstrate basic functions of the model. They can 5 | all be run in Google colab (the accompanying jupyter notebooks have been pre-run and can be viewed in GitHub) 6 | 7 | * Demo - Segmentation and Memory Tutorial 8 | 9 | This brief tutorial walks through some basic functions of segmentation and the memory model in a toy 2-d world. This also includes a comparison between SEM and an HMM in Memory. 10 | 11 | Open in Colab 12 | 13 |   14 | 15 | 16 | 17 | * Demo - Toy Data (Segmentation) 18 | 19 | These simulations demonstrate how SEM can segement simple, 2D dynamical systems with 20 | various different methods of estimating the event dynamics of the system. 21 | 22 | Open in Colab 23 | 24 |   25 | 26 | * Demo - HRR 27 | 28 | Demonstration of the Holographic reduced representation 29 | 30 | Open in Colab 31 | 32 |   33 | 34 | * Demo - Motion Capture Data.ipynb 35 | 36 | Simulations of the SEM model on the 3D motion capture data. 37 | 38 | Open in Colab 39 | 40 |   41 | -------------------------------------------------------------------------------- /data/motion_data.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/data/motion_data.pkl -------------------------------------------------------------------------------- /data/videodata/video_color_Z_embedded_64_5epoch.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/data/videodata/video_color_Z_embedded_64_5epoch.npy -------------------------------------------------------------------------------- /data/zachs_2006_young_unwarned.csv: -------------------------------------------------------------------------------- 1 | 1.0582010582010568, 0 2 | 1.5873015873015888, 0 3 | 2.6455026455026456, 0.0842105263157894 4 | 3.9682539682539684, 0 5 | 4.761904761904766, 0 6 | 5.820105820105823, 0.24736842105263168 7 | 6.87830687830688, 0 8 | 7.936507936507937, 0.16842105263157903 9 | 8.730158730158735, 0.16842105263157903 10 | 9.788359788359784, 0 11 | 10.846560846560848, 0 12 | 11.904761904761905, 0.16842105263157903 13 | 12.962962962962962, 0.0842105263157894 14 | 13.75661375661376, 0.0842105263157894 15 | 14.814814814814817, 0.0052631578947368585 16 | 15.873015873015873, 0.24736842105263168 17 | 16.931216931216937, 0.16842105263157903 18 | 17.724867724867728, 0.24736842105263168 19 | 19.841269841269842, 0.0052631578947368585 20 | 21.957671957671955, 0.16315789473684206 21 | 23.80952380952381, 0.010526315789473717 22 | 25.925925925925924, 0.3263157894736841 23 | 26.984126984126988, 0.0842105263157894 24 | 28.835978835978835, 0.0842105263157894 25 | 29.8941798941799, 0.16315789473684206 26 | 30.952380952380963, 0.0052631578947368585 27 | 32.01058201058201, 0.0842105263157894 28 | 32.80423280423281, 0.0052631578947368585 29 | 34.920634920634924, 0.16315789473684206 30 | 36.77248677248677, 0.0052631578947368585 31 | 37.830687830687836, 0.16842105263157903 32 | 38.8888888888889, 0.09473684210526312 33 | 39.682539682539684, 0.24736842105263168 34 | 41.005291005291014, 0.0052631578947368585 35 | 41.79894179894181, 0.0842105263157894 36 | 42.85714285714286, 0.0842105263157894 37 | 43.915343915343925, 0.0052631578947368585 38 | 44.973544973544975, 0.0842105263157894 39 | 46.82539682539684, 0.0842105263157894 40 | 47.883597883597886, 0.24736842105263168 41 | 50, 0.0052631578947368585 42 | 69.84126984126985, 0 43 | 70.89947089947091, 0.2578947368421052 44 | 71.95767195767196, 0.3315789473684211 45 | 73.01587301587303, 0.17368421052631577 46 | 74.07407407407408, 0.2421052631578947 47 | 74.86772486772486, 0 48 | 79.89417989417991, 0 49 | 80.95238095238095, 0.1578947368421053 50 | 82.01058201058203, 0.0052631578947368585 51 | 82.80423280423281, 0.0842105263157894 52 | 83.86243386243387, 0 53 | 85.97883597883599, 0 54 | 87.03703703703704, 0.3315789473684211 55 | 87.83068783068785, 0.07894736842105265 56 | 88.88888888888889, 0 57 | 89.94708994708996, 0.0842105263157894 58 | 91.00529100529101, 0 59 | 93.12169312169314, 0 60 | 93.9153439153439, 0.0842105263157894 61 | 96.03174603174602, 0.0842105263157894 62 | 97.08994708994709, 0 63 | 98.14814814814815, 0 64 | 98.94179894179896, 0.0842105263157894 65 | 100, 0.0052631578947368585 66 | 101.05820105820106, 0.16315789473684206 67 | 102.11640211640213, 0.0842105263157894 68 | 102.91005291005291, 0.0842105263157894 69 | 105.02645502645504, 0.3315789473684211 70 | 106.08465608465607, 0.16842105263157903 71 | 106.87830687830689, 0.24736842105263168 72 | 107.93650793650795, 0.0052631578947368585 73 | 108.99470899470901, 0.0842105263157894 74 | 110.05291005291005, 0.0052631578947368585 75 | 111.11111111111111, 0.0842105263157894 76 | 111.9047619047619, 0.0052631578947368585 77 | 112.96296296296299, 0.0842105263157894 78 | 114.02116402116403, 0 79 | 115.07936507936509, 0 80 | 116.13756613756615, 0.0842105263157894 81 | 116.93121693121694, 0.0842105263157894 82 | 117.989417989418, 0.0052631578947368585 83 | 119.04761904761907, 0.0842105263157894 84 | 120.1058201058201, 0.0052631578947368585 85 | 120.89947089947091, 0.5 86 | 121.95767195767198, 0 87 | 123.01587301587301, 0 88 | 124.07407407407408, 0.0842105263157894 89 | 125.13227513227514, 0.0842105263157894 90 | 125.92592592592592, 0.0052631578947368585 91 | 126.98412698412699, 0.0842105263157894 92 | 128.04232804232805, 0.0052631578947368585 93 | 129.1005291005291, 0.0842105263157894 94 | 130.15873015873018, 0.0842105263157894 95 | 131.21693121693121, 0 96 | 135.978835978836, 0 97 | 138.0952380952381, 0.4157894736842105 98 | 140.21164021164023, 0 99 | 146.03174603174605, 0 100 | 147.0899470899471, 0.16842105263157903 101 | 148.14814814814818, 0 102 | 155.02645502645504, 0 103 | 156.08465608465607, 0.0842105263157894 104 | 157.14285714285717, 0 105 | 161.11111111111111, -0.0052631578947368585 106 | 162.16931216931218, 0.42105263157894735 107 | 163.22751322751324, 0.0052631578947368585 108 | 165.0793650793651, 0.16842105263157903 109 | 166.13756613756615, 0.08947368421052637 110 | 167.19576719576722, 0.24736842105263168 111 | 168.1216931216931, 0.0842105263157894 112 | 169.1798941798942, 0.1657894736842105 113 | 170.1058201058201, 0.0052631578947368585 114 | 173.01587301587304, 0.3315789473684211 115 | 174.07407407407408, 0.16842105263157903 116 | 175.13227513227514, 0.16842105263157903 117 | 176.1904761904762, 0 118 | 184.12698412698413, 0 119 | -------------------------------------------------------------------------------- /data/zachs_2006_young_warned.csv: -------------------------------------------------------------------------------- 1 | 0.9043209876543195, 0 2 | 4.515432098765427, 0 3 | 5.444444444444439, 0.3277777777777775 4 | 6.638888888888889, 0.1777777777777776 5 | 7.712962962962958, 0.24444444444444424 6 | 9.787037037037042, 0.011111111111111072 7 | 10.58024691358025, 0.08333333333333304 8 | 11.69135802469135, 0.08333333333333304 9 | 12.481481481481485, 0.16111111111111098 10 | 13.67901234567901, 0.005555555555555314 11 | 14.512345679012345, 0.005555555555555314 12 | 15.537037037037035, 0.16111111111111098 13 | 16.685185185185187, 0.09444444444444433 14 | 17.296296296296294, 0.49444444444444424 15 | 18.682098765432098, 0 16 | 19.37962962962963, 0.24444444444444424 17 | 20.62654320987654, 0 18 | 21.737654320987655, 0 19 | 22.524691358024693, 0.08333333333333304 20 | 23.635802469135808, 0.08333333333333304 21 | 24.65740740740741, 0.24444444444444424 22 | 27.57098765432099, 0 23 | 28.682098765432098, 0 24 | 29.42592592592592, 0.16111111111111098 25 | 31.456790123456788, 0.005555555555555314 26 | 32.52777777777777, 0.07777777777777772 27 | 33.67901234567901, 0.005555555555555314 28 | 34.422839506172835, 0.16666666666666652 29 | 35.53395061728395, 0.16666666666666652 30 | 37.5679012345679, 0.005555555555555314 31 | 38.67901234567901, 0.005555555555555314 32 | 39.42283950617285, 0.16666666666666652 33 | 40.580246913580254, 0.08333333333333304 34 | 41.41358024691358, 0.08333333333333304 35 | 42.478395061728406, 0.16666666666666652 36 | 43.404320987654316, 0 37 | 45.62654320987653, 0 38 | 48.26543209876543, 0.24999999999999978 39 | 49.51543209876543, 0 40 | 65.34876543209876, 0 41 | 66.41666666666667, 0.07777777777777772 42 | 67.57098765432097, 0 43 | 69.51543209876543, 0 44 | 71.22839506172838, 0.4166666666666665 45 | 72.5246913580247, 0.08333333333333304 46 | 74.19135802469135, 0.08333333333333304 47 | 77.29320987654322, 0 48 | 75.62654320987654, 0 49 | 78.36111111111111, 0.07777777777777772 50 | 79.23765432098764, 0 51 | 81.1820987654321, 0 52 | 82.1574074074074, 0.24444444444444424 53 | 83.4043209876543, 0 54 | 84.23765432098766, 0 55 | 86, 0.3277777777777775 56 | 88.34876543209877, 0.09999999999999987 57 | 89.14814814814815, 0.16111111111111098 58 | 90.34876543209876, 0 59 | 94.23765432098764, 0 60 | 95.25925925925927, 0.16111111111111098 61 | 96.45987654320987, 0 62 | 100.070987654321, 0 63 | 101.0925925925926, 0.16111111111111098 64 | 102.2932098765432, 0 65 | 103.26851851851852, 0.24444444444444424 66 | 104.10185185185186, 0.24444444444444424 67 | 106.45987654320989, 0 68 | 107.2932098765432, 0 69 | 108.26543209876543, 0.24999999999999978 70 | 109.46913580246915, 0.08333333333333304 71 | 111.96913580246915, 0.08333333333333304 72 | 113.12654320987654, 0 73 | 118.95987654320986, 0 74 | 120.02777777777779, 0.5777777777777777 75 | 122.01234567901234, 0.005555555555555314 76 | 121.13888888888887, 0.07777777777777772 77 | 123.08333333333333, 0.07777777777777772 78 | 124.23765432098766, 0 79 | 125.06790123456788, 0.005555555555555314 80 | 126.08950617283953, 0.16666666666666652 81 | 127.57098765432097, 0 82 | 128.9567901234568, 0.005555555555555314 83 | 129.9814814814815, 0.16111111111111098 84 | 131.17592592592592, 0.011111111111111072 85 | 131.97530864197532, 0.07222222222222197 86 | 133.12345679012347, 0.005555555555555314 87 | 134.19444444444443, 0.07777777777777772 88 | 135.070987654321, 0 89 | 136.1820987654321, 0 90 | 136.87962962962965, 0.24444444444444424 91 | 138.12654320987656, 0 92 | 138.9598765432099, 0 93 | 140.0277777777778, 0.07777777777777772 94 | 141.1820987654321, 0 95 | 146.1820987654321, 0 96 | 146.96913580246914, 0.08333333333333304 97 | 148.08024691358028, 0.08333333333333304 98 | 149.23765432098767, 0 99 | 150.90432098765433, 0 100 | 151.97222222222226, 0.07777777777777772 101 | 153.12654320987656, 0 102 | 154.79320987654322, 0 103 | 155.81172839506175, 0.16666666666666652 104 | 157.01543209876544, 0 105 | 158.1265432098766, 0 106 | 158.91666666666669, 0.07777777777777772 107 | 160.070987654321, 0 108 | 161.78703703703707, 0.411111111111111 109 | 162.8456790123457, 0.005555555555555314 110 | 163.91358024691363, 0.08333333333333304 111 | 165.0246913580247, 0.08333333333333304 112 | 165.67283950617286, 0.4166666666666665 113 | 166.96913580246917, 0.08333333333333304 114 | 168.08024691358028, 0.08333333333333304 115 | 168.9598765432099, 0 116 | 169.79320987654322, 0 117 | 170.76234567901238, 0.2555555555555553 118 | 171.83641975308643, 0.3222222222222222 119 | 172.7037037037037, 0.26111111111111107 120 | 173.7808641975309, 0.3222222222222222 121 | 175.07098765432102, 0 122 | 182.8487654320988, 0 123 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: sem 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | # - appnope=0.1.0=py27_0 7 | - backports=1.0=py27_1 8 | - backports.functools_lru_cache=1.5=py_1 9 | - backports.shutil_get_terminal_size=1.0.0=py_3 10 | - backports_abc=0.5=py27_0 11 | - blas=1.1=openblas 12 | - bleach=2.1.3=py_0 13 | - ca-certificates=2018.4.16=0 14 | - certifi=2018.4.16=py27_0 15 | - configparser=3.5.0=py27_0 16 | - cycler=0.10.0=py_1 17 | - decorator=4.3.0=py_0 18 | - entrypoints=0.2.3=py27_1 19 | - enum34=1.1.6=py27_1 20 | - freetype=2.8.1=0 21 | - functools32=3.2.3.2=py27_2 22 | - futures=3.2.0=py27_0 23 | - html5lib=1.0.1=py_0 24 | - ipykernel=4.8.2=py27_0 25 | - ipython=5.7.0=py27_0 26 | - ipython_genutils=0.2.0=py_1 27 | - ipywidgets=7.2.1=py27_1 28 | - jinja2=2.10=py_1 29 | - jsonschema=2.6.0=py27_1 30 | - jupyter_client=5.2.3=py_1 31 | - jupyter_core=4.4.0=py_0 32 | - kiwisolver=1.0.1=py27_1 33 | - libgfortran # =3.0.0=0 34 | - libpng=1.6.34=ha92aebf_1 35 | - libsodium=1.0.16=0 36 | - markupsafe=1.0=py27_0 37 | - matplotlib=2.2.2=py27_1 38 | - mistune=0.8.3=py27_1 39 | - nbconvert=5.3.1=py_1 40 | - nbformat=4.4.0=py27_0 41 | - ncurses=5.9=10 42 | - notebook=5.5.0=py27_0 43 | - numpy=1.14.5=py27_blas_openblashd3ea46f_201 44 | - openblas=0.2.20=8 45 | - openssl=1.0.2o=0 46 | - pandas=0.23.3=py27_0 47 | # - pandoc=2.2.1=hde52d81_0 48 | - pandocfilters=1.4.2=py27_0 49 | - pathlib2=2.3.2=py27_0 50 | - patsy=0.5.0=py_1 51 | - pexpect=4.6.0=py27_0 52 | - pickleshare=0.7.4=py27_0 53 | - pip=9.0.3=py27_0 54 | - prompt_toolkit=1.0.15=py27_0 55 | - ptyprocess=0.6.0=py27_0 56 | - pygments=2.2.0=py_1 57 | - pyparsing=2.2.0=py_1 58 | - python=2.7.15=0 59 | - python-dateutil=2.7.3=py_0 60 | - pytz=2018.5=py_0 61 | - pyzmq=17.0.0=py27_4 62 | - readline=7.0=0 63 | - scandir=1.7=py27_0 64 | - scikit-learn # =0.19.1=py27_blas_openblas_201 65 | - scipy=1.1.0=py27_blas_openblas_200 66 | - seaborn=0.8.1=py_1 67 | - send2trash=1.5.0=py_0 68 | - setuptools=40.0.0=py27_0 69 | - simplegeneric=0.8.1=py_1 70 | - singledispatch=3.4.0.3=py27_0 71 | - six=1.11.0=py27_1 72 | - sqlite=3.20.1=2 73 | - statsmodels=0.9.0=py27_0 74 | - subprocess32=3.5.2=py27_0 75 | - terminado=0.8.1=py27_0 76 | - testpath=0.3.1=py27_0 77 | - tk=8.6.7=0 78 | - tornado=5.0.2=py27_0 79 | - tqdm=4.23.4=py_0 80 | - traitlets=4.3.2=py27_0 81 | - wcwidth=0.1.7=py_1 82 | # - webencodings=0.5.1=py27_0 83 | - wheel=0.31.1=py27_0 84 | - widgetsnbextension=3.2.1=py27_0 85 | - zeromq=4.2.5=hfc679d8_3 86 | - zlib=1.2.11=h470a237_3 87 | # - anaconda=custom=py27h2cfa9e9_0 88 | - pip: 89 | - absl-py==0.2.2 90 | - astor==0.7.1 91 | - backports.weakref==1.0.post1 92 | - edward==1.3.5 93 | - funcsigs==1.0.2 94 | - gast==0.2.0 95 | - grpcio==1.13.0 96 | - h5py==2.8.0 97 | - keras==2.2.0 98 | - keras-applications==1.0.2 99 | - keras-preprocessing==1.0.1 100 | - markdown==2.6.11 101 | - mock==2.0.0 102 | - pbr==4.1.0 103 | - protobuf==3.6.0 104 | - pyyaml==3.13 105 | - tensorboard==1.9.0 106 | - tensorflow==1.9.0 107 | - termcolor==1.1.0 108 | - werkzeug==0.14.1 109 | #prefix: /anaconda3/envs/sem 110 | 111 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from event_models import * 2 | from sem import * 3 | from memory import * -------------------------------------------------------------------------------- /models/event_models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from utils import unroll_data 4 | import keras 5 | from keras.models import Sequential 6 | from keras.layers import Dense, Activation, SimpleRNN, GRU, Dropout, LSTM, LeakyReLU, Lambda 7 | from keras.initializers import glorot_uniform # Or your initializer of choice 8 | from keras import regularizers 9 | from keras.optimizers import * 10 | from models.utils import fast_mvnorm_diagonal_logprob 11 | 12 | print("TensorFlow Version: {}".format(tf.__version__)) 13 | print("Keras Version: {}".format(keras.__version__)) 14 | 15 | config = tf.ConfigProto() 16 | config.intra_op_parallelism_threads = 4 17 | config.inter_op_parallelism_threads = 4 18 | tf.Session(config=config) 19 | 20 | 21 | # run a check that tensorflow works on import 22 | def check_tf(): 23 | a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') 24 | b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') 25 | c = tf.matmul(a, b) 26 | 27 | with tf.Session() as sess: 28 | sess.run(c) 29 | print "TensorFlow Check Passed" 30 | check_tf() 31 | 32 | 33 | 34 | def reset_weights(session, model): 35 | for layer in model.layers: 36 | if hasattr(layer, "kernel_initializer"): 37 | layer.kernel.initializer.run(session=session) 38 | 39 | 40 | def map_variance(samples, df0, scale0): 41 | """ 42 | This estimator assumes an scaled inverse-chi squared prior over the 43 | variance and a Gaussian likelihood. The parameters d and scale 44 | of the internal function parameterize the posterior of the variance. 45 | Taken from Bayesian Data Analysis, ch2 (Gelman) 46 | 47 | samples: N length array or NxD array 48 | df0: prior degrees of freedom 49 | scale0: prior scale parameter 50 | mu: (optional) mean function 51 | 52 | returns: float or d-length array, mode of the posterior 53 | """ 54 | if np.ndim(samples) > 1: 55 | n, d = np.shape(samples) 56 | else: 57 | n = np.shape(samples)[0] 58 | d = 1 59 | 60 | v = np.var(samples, axis=0) 61 | df = df0 + n 62 | scale = (df0 * scale0 + n * v) / df 63 | return df * scale / (df * 2) 64 | 65 | 66 | class LinearEvent(object): 67 | """ this is the base clase of the event model """ 68 | 69 | def __init__(self, d, var_df0, var_scale0, optimizer=None, n_epochs=10, init_model=False, 70 | kernel_initializer='glorot_uniform', l2_regularization=0.00, batch_size=32, prior_log_prob=0.0, 71 | reset_weights=False, batch_update=True, optimizer_kwargs=None): 72 | """ 73 | 74 | :param d: dimensions of the input space 75 | """ 76 | self.d = d 77 | self.f_is_trained = False 78 | self.f0_is_trained = False 79 | self.f0 = np.zeros(d) 80 | 81 | self.x_history = [np.zeros((0, self.d))] 82 | self.prior_probability = prior_log_prob 83 | 84 | if (optimizer is None) and (optimizer_kwargs is None): 85 | optimizer = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0, amsgrad=False) 86 | elif (optimizer is None) and not (optimizer_kwargs is None): 87 | optimizer = Adam(**optimizer_kwargs) 88 | elif (optimizer is not None) and (type(optimizer) != str): 89 | optimizer = optimizer() 90 | 91 | self.compile_opts = dict(optimizer=optimizer, loss='mean_squared_error') 92 | self.kernel_initializer = kernel_initializer 93 | self.kernel_regularizer = regularizers.l2(l2_regularization) 94 | self.n_epochs = n_epochs 95 | self.batch_size = batch_size 96 | self.var_df0 = var_df0 97 | self.var_scale0 = var_scale0 98 | self.d = d 99 | self.reset_weights = reset_weights 100 | self.batch_update = batch_update 101 | self.training_pairs = [] 102 | self.prediction_errors = np.zeros((0, self.d), dtype=np.float) 103 | self.model_weights = None 104 | 105 | # initialize the covariance with the mode of the prior distribution 106 | self.Sigma = np.ones(d) * var_df0 * var_scale0 / (var_df0 + 2) 107 | 108 | self.is_visited = False # governs the special case of model's first prediction (i.e. with no experience) 109 | 110 | # switch for inheritance -- don't want to init the model for sub-classes 111 | if init_model: 112 | self.init_model() 113 | 114 | def init_model(self): 115 | self._compile_model() 116 | self.model_weights = self.model.get_weights() 117 | return self.model 118 | 119 | def _compile_model(self): 120 | self.model = Sequential([ 121 | Dense(self.d, input_shape=(self.d,), use_bias=True, kernel_initializer=self.kernel_initializer, 122 | kernel_regularizer=self.kernel_regularizer), 123 | Activation('linear') 124 | ]) 125 | self.model.compile(**self.compile_opts) 126 | 127 | def set_model(self, sess, model): 128 | self.sess = sess 129 | self.model = model 130 | self.do_reset_weights() 131 | 132 | def reestimate(self): 133 | self.do_reset_weights() 134 | self.estimate() 135 | 136 | def do_reset_weights(self): 137 | # self._compile_model() 138 | reset_weights(self.sess, self.model) 139 | self.model_weights = self.model.get_weights() 140 | 141 | def update(self, X, Xp, update_estimate=True): 142 | """ 143 | Parameters 144 | ---------- 145 | X: NxD array-like data of inputs 146 | 147 | y: NxD array-like data of outputs 148 | 149 | Returns 150 | ------- 151 | None 152 | 153 | """ 154 | if X.ndim > 1: 155 | X = X[-1, :] # only consider last example 156 | assert X.ndim == 1 157 | assert X.shape[0] == self.d 158 | assert Xp.ndim == 1 159 | assert Xp.shape[0] == self.d 160 | 161 | x_example = X.reshape((1, self.d)) 162 | xp_example = Xp.reshape((1, self.d)) 163 | 164 | # concatenate the training example to the active event token 165 | self.x_history[-1] = np.concatenate([self.x_history[-1], x_example], axis=0) 166 | 167 | # also, create a list of training pairs (x, y) for efficient sampling 168 | # picks random time-point in the history 169 | self.training_pairs.append(tuple([x_example, xp_example])) 170 | 171 | if update_estimate: 172 | self.estimate() 173 | self.f_is_trained = True 174 | 175 | def update_f0(self, Xp, update_estimate=True): 176 | self.update(np.zeros(self.d), Xp, update_estimate=update_estimate) 177 | self.f0_is_trained = True 178 | 179 | # precompute f0 for speed 180 | self.f0 = self._predict_f0() 181 | 182 | def get_variance(self): 183 | # Sigma is stored as a vector corresponding to the entries of the diagonal covariance matrix 184 | return self.Sigma 185 | 186 | def predict_next(self, X): 187 | """ 188 | wrapper for the prediction function that changes the prediction to the identity function 189 | for untrained models (this is an initialization technique) 190 | 191 | """ 192 | if not self.f_is_trained: 193 | if np.ndim(X) > 1: 194 | return np.copy(X[-1, :]).reshape(1, -1) 195 | return np.copy(X).reshape(1, -1) 196 | 197 | return self._predict_next(X) 198 | 199 | def _predict_next(self, X): 200 | """ 201 | Parameters 202 | ---------- 203 | X: 1xD array-like data of inputs 204 | 205 | Returns 206 | ------- 207 | y: 1xD array of prediction vectors 208 | 209 | """ 210 | if X.ndim > 1: 211 | X0 = X[-1, :] 212 | else: 213 | X0 = X 214 | 215 | self.model.set_weights(self.model_weights) 216 | return self.model.predict(np.reshape(X0, newshape=(1, self.d))) 217 | 218 | def predict_f0(self): 219 | """ 220 | wrapper for the prediction function that changes the prediction to the identity function 221 | for untrained models (this is an initialization technique) 222 | 223 | N.B. This answer is cached for speed 224 | 225 | """ 226 | return self.f0 227 | 228 | def _predict_f0(self): 229 | return self._predict_next(np.zeros(self.d)) 230 | 231 | def log_likelihood_f0(self, Xp): 232 | 233 | if not self.f0_is_trained: 234 | return self.prior_probability 235 | 236 | # predict the initial point 237 | Xp_hat = self.predict_f0() 238 | 239 | # return the probability 240 | return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma) 241 | 242 | def log_likelihood_next(self, X, Xp): 243 | if not self.f_is_trained: 244 | return self.prior_probability 245 | 246 | Xp_hat = self.predict_next(X) 247 | return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma) 248 | 249 | def log_likelihood_sequence(self, X, Xp): 250 | if not self.f_is_trained: 251 | return self.prior_probability 252 | 253 | Xp_hat = self.predict_next_generative(X) 254 | return fast_mvnorm_diagonal_logprob(Xp.reshape(-1) - Xp_hat.reshape(-1), self.Sigma) 255 | 256 | # create a new cluster of scenes 257 | def new_token(self): 258 | if len(self.x_history) == 1 and self.x_history[0].shape[0] == 0: 259 | # special case for the first cluster which is already created 260 | return 261 | self.x_history.append(np.zeros((0, self.d))) 262 | 263 | def predict_next_generative(self, X): 264 | self.model.set_weights(self.model_weights) 265 | # the LDS is a markov model, so these functions are the same 266 | return self.predict_next(X) 267 | 268 | def run_generative(self, n_steps, initial_point=None): 269 | self.model.set_weights(self.model_weights) 270 | if initial_point is None: 271 | x_gen = self._predict_f0() 272 | else: 273 | x_gen = np.reshape(initial_point, (1, self.d)) 274 | for ii in range(1, n_steps): 275 | x_gen = np.concatenate([x_gen, self.predict_next_generative(x_gen[:ii, :])]) 276 | return x_gen 277 | 278 | def estimate(self): 279 | if self.reset_weights: 280 | self.do_reset_weights() 281 | else: 282 | self.model.set_weights(self.model_weights) 283 | 284 | n_pairs = len(self.training_pairs) 285 | 286 | if self.batch_update: 287 | def draw_sample_pair(): 288 | # draw a random cluster for the history 289 | idx = np.random.randint(n_pairs) 290 | return self.training_pairs[idx] 291 | else: 292 | # for online sampling, just use the last training sample 293 | def draw_sample_pair(): 294 | return self.training_pairs[-1] 295 | 296 | # run batch gradient descent on all of the past events! 297 | for _ in range(self.n_epochs): 298 | 299 | # draw a set of training examples from the history 300 | x_batch = [] 301 | xp_batch = [] 302 | for _ in range(self.batch_size): 303 | 304 | x_sample, xp_sample = draw_sample_pair() 305 | 306 | # these data aren't 307 | x_batch.append(x_sample) 308 | xp_batch.append(xp_sample) 309 | 310 | x_batch = np.reshape(x_batch, (self.batch_size, self.d)) 311 | xp_batch = np.reshape(xp_batch, (self.batch_size, self.d)) 312 | self.model.train_on_batch(x_batch, xp_batch) 313 | 314 | # cache the model weights 315 | self.model_weights = self.model.get_weights() 316 | 317 | # Update Sigma 318 | x_train_0, xp_train_0 = self.training_pairs[-1] 319 | xp_hat = self.model.predict(x_train_0) 320 | self.prediction_errors = np.concatenate([self.prediction_errors, xp_train_0 - xp_hat], axis=0) 321 | if np.shape(self.prediction_errors)[0] > 1: 322 | self.Sigma = map_variance(self.prediction_errors, self.var_df0, self.var_scale0) 323 | 324 | 325 | class NonLinearEvent(LinearEvent): 326 | 327 | def __init__(self, d, var_df0, var_scale0, n_hidden=None, hidden_act='tanh', batch_size=32, 328 | optimizer=None, n_epochs=10, init_model=False, kernel_initializer='glorot_uniform', 329 | l2_regularization=0.00, dropout=0.50, prior_log_prob=0.0, reset_weights=False, 330 | batch_update=True, 331 | optimizer_kwargs=None): 332 | LinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs, 333 | init_model=False, kernel_initializer=kernel_initializer, batch_size=batch_size, 334 | l2_regularization=l2_regularization, prior_log_prob=prior_log_prob, 335 | reset_weights=reset_weights, batch_update=batch_update, 336 | optimizer_kwargs=optimizer_kwargs) 337 | 338 | if n_hidden is None: 339 | n_hidden = d 340 | self.n_hidden = n_hidden 341 | self.hidden_act = hidden_act 342 | self.dropout = dropout 343 | 344 | if init_model: 345 | self.init_model() 346 | 347 | def _compile_model(self): 348 | self.model = Sequential() 349 | self.model.add(Dense(self.n_hidden, input_shape=(self.d,), activation=self.hidden_act, 350 | kernel_regularizer=self.kernel_regularizer, 351 | kernel_initializer=self.kernel_initializer)) 352 | self.model.add(Dropout(self.dropout)) 353 | self.model.add(Dense(self.d, activation='linear', 354 | kernel_regularizer=self.kernel_regularizer, 355 | kernel_initializer=self.kernel_initializer)) 356 | self.model.compile(**self.compile_opts) 357 | 358 | 359 | class NonLinearEvent_normed(NonLinearEvent): 360 | 361 | def __init__(self, d, var_df0, var_scale0, n_hidden=None, hidden_act='tanh', 362 | optimizer=None, n_epochs=10, init_model=False, kernel_initializer='glorot_uniform', 363 | l2_regularization=0.00, dropout=0.50, prior_log_prob=0.0, reset_weights=False, batch_size=32, 364 | batch_update=True, optimizer_kwargs=None): 365 | 366 | NonLinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs, 367 | l2_regularization=l2_regularization,batch_size=batch_size, 368 | kernel_initializer=kernel_initializer, init_model=False, 369 | prior_log_prob=prior_log_prob, reset_weights=reset_weights, 370 | batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 371 | 372 | if n_hidden is None: 373 | n_hidden = d 374 | self.n_hidden = n_hidden 375 | self.hidden_act = hidden_act 376 | self.dropout = dropout 377 | 378 | if init_model: 379 | self.init_model() 380 | 381 | def _compile_model(self): 382 | self.model = Sequential() 383 | self.model.add(Dense(self.n_hidden, input_shape=(self.d,), activation=self.hidden_act, 384 | kernel_regularizer=self.kernel_regularizer, 385 | kernel_initializer=self.kernel_initializer)) 386 | self.model.add(Dropout(self.dropout)) 387 | self.model.add(Dense(self.d, activation='linear', 388 | kernel_regularizer=self.kernel_regularizer, 389 | kernel_initializer=self.kernel_initializer)) 390 | self.model.add(Lambda(lambda x: K.l2_normalize(x, axis=-1))) 391 | self.model.compile(**self.compile_opts) 392 | 393 | 394 | class StationaryEvent(LinearEvent): 395 | 396 | def _predict_next(self, X): 397 | """ 398 | Parameters 399 | ---------- 400 | X: 1xD array-like data of inputs 401 | 402 | Returns 403 | ------- 404 | y: 1xD array of prediction vectors 405 | 406 | """ 407 | 408 | return self.model.predict(np.zeros((1, self.d))) 409 | 410 | 411 | 412 | class RecurentLinearEvent(LinearEvent): 413 | 414 | # RNN which is initialized once and then trained using stochastic gradient descent 415 | # i.e. each new scene is a single example batch of size 1 416 | 417 | def __init__(self, d, var_df0, var_scale0, t=3, 418 | optimizer=None, n_epochs=10, l2_regularization=0.00, batch_size=32, 419 | kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False, 420 | batch_update=True, optimizer_kwargs=None): 421 | # 422 | # D = dimension of single input / output example 423 | # t = number of time steps to unroll back in time for the recurrent layer 424 | # n_hidden1 = # of nodes in first hidden layer 425 | # n_hidden2 = # of nodes in second hidden layer 426 | # hidden_act1 = activation f'n of first hidden layer 427 | # hidden_act2 = activation f'n of second hidden layer 428 | # sgd_kwargs = arguments for the stochastic gradient descent algorithm 429 | # n_epochs = how many gradient descent steps to perform for each training batch 430 | # dropout = what fraction of nodes to drop out during training (to prevent overfitting) 431 | 432 | LinearEvent.__init__(self, d, var_df0, var_scale0, optimizer=optimizer, n_epochs=n_epochs, 433 | init_model=False, kernel_initializer=kernel_initializer, 434 | l2_regularization=l2_regularization, prior_log_prob=prior_log_prob, 435 | reset_weights=reset_weights, batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 436 | 437 | self.t = t 438 | self.n_epochs = n_epochs 439 | 440 | # list of clusters of scenes: 441 | # each element of list = history of scenes for given cluster 442 | # history = N x D tensor, N = # of scenes in cluster, D = dimension of single scene 443 | # 444 | self.x_history = [np.zeros((0, self.d))] 445 | self.batch_size = batch_size 446 | 447 | if init_model: 448 | self.init_model() 449 | 450 | # cache the initial weights for retraining speed 451 | self.init_weights = None 452 | 453 | def do_reset_weights(self): 454 | # # self._compile_model() 455 | if self.init_weights is None: 456 | for layer in self.model.layers: 457 | new_weights = [glorot_uniform()(w.shape).eval(session=self.sess) for w in layer.get_weights()] 458 | layer.set_weights(new_weights) 459 | self.model_weights = self.model.get_weights() 460 | self.init_weights = self.model.get_weights() 461 | else: 462 | self.model.set_weights(self.init_weights) 463 | 464 | # initialize model once so we can then update it online 465 | def _compile_model(self): 466 | self.model = Sequential() 467 | self.model.add(SimpleRNN(self.d, input_shape=(self.t, self.d), 468 | activation=None, kernel_initializer=self.kernel_initializer, 469 | kernel_regularizer=self.kernel_regularizer)) 470 | self.model.compile(**self.compile_opts) 471 | 472 | # concatenate current example with the history of the last t-1 examples 473 | # this is for the recurrent layer 474 | # 475 | def _unroll(self, x_example): 476 | x_train = np.concatenate([self.x_history[-1][-(self.t - 1):, :], x_example], axis=0) 477 | x_train = np.concatenate([np.zeros((self.t - x_train.shape[0], self.d)), x_train], axis=0) 478 | x_train = x_train.reshape((1, self.t, self.d)) 479 | return x_train 480 | 481 | # predict a single example 482 | def _predict_next(self, X): 483 | self.model.set_weights(self.model_weights) 484 | # Note: this function predicts the next conditioned on the training data the model has seen 485 | 486 | if X.ndim > 1: 487 | X = X[-1, :] # only consider last example 488 | assert np.ndim(X) == 1 489 | assert X.shape[0] == self.d 490 | 491 | x_test = X.reshape((1, self.d)) 492 | 493 | # concatenate current example with history of last t-1 examples 494 | # this is for the recurrent part of the network 495 | x_test = self._unroll(x_test) 496 | return self.model.predict(x_test) 497 | 498 | def _predict_f0(self): 499 | return self.predict_next_generative(np.zeros(self.d)) 500 | 501 | def _update_variance(self): 502 | if np.shape(self.prediction_errors)[0] > 1: 503 | self.Sigma = map_variance(self.prediction_errors, self.var_df0, self.var_scale0) 504 | 505 | def update(self, X, Xp, update_estimate=True): 506 | if X.ndim > 1: 507 | X = X[-1, :] # only consider last example 508 | assert X.ndim == 1 509 | assert X.shape[0] == self.d 510 | assert Xp.ndim == 1 511 | assert Xp.shape[0] == self.d 512 | 513 | x_example = X.reshape((1, self.d)) 514 | xp_example = Xp.reshape((1, self.d)) 515 | 516 | # concatenate the training example to the active event token 517 | self.x_history[-1] = np.concatenate([self.x_history[-1], x_example], axis=0) 518 | 519 | # also, create a list of training pairs (x, y) for efficient sampling 520 | # picks random time-point in the history 521 | _n = np.shape(self.x_history[-1])[0] 522 | x_train_example = np.reshape( 523 | unroll_data(self.x_history[-1][max(_n - self.t, 0):, :], self.t)[-1, :, :], (1, self.t, self.d) 524 | ) 525 | self.training_pairs.append(tuple([x_train_example, xp_example])) 526 | 527 | if update_estimate: 528 | self.estimate() 529 | self.f_is_trained = True 530 | 531 | def predict_next_generative(self, X): 532 | self.model.set_weights(self.model_weights) 533 | X0 = np.reshape(unroll_data(X, self.t)[-1, :, :], (1, self.t, self.d)) 534 | return self.model.predict(X0) 535 | 536 | # optional: run batch gradient descent on all past event clusters 537 | def estimate(self): 538 | if self.reset_weights: 539 | self.do_reset_weights() 540 | else: 541 | self.model.set_weights(self.model_weights) 542 | 543 | n_pairs = len(self.training_pairs) 544 | 545 | if self.batch_update: 546 | def draw_sample_pair(): 547 | # draw a random cluster for the history 548 | idx = np.random.randint(n_pairs) 549 | return self.training_pairs[idx] 550 | else: 551 | # for online sampling, just use the last training sample 552 | def draw_sample_pair(): 553 | return self.training_pairs[-1] 554 | 555 | # run batch gradient descent on all of the past events! 556 | for _ in range(self.n_epochs): 557 | 558 | # draw a set of training examples from the history 559 | x_batch = np.zeros((0, self.t, self.d)) 560 | xp_batch = np.zeros((0, self.d)) 561 | for _ in range(self.batch_size): 562 | 563 | x_sample, xp_sample = draw_sample_pair() 564 | 565 | x_batch = np.concatenate([x_batch, x_sample], axis=0) 566 | xp_batch = np.concatenate([xp_batch, xp_sample], axis=0) 567 | 568 | self.model.train_on_batch(x_batch, xp_batch) 569 | self.model_weights = self.model.get_weights() 570 | 571 | # Update Sigma 572 | x_train_0, xp_train_0 = self.training_pairs[-1] 573 | xp_hat = self.model.predict(x_train_0) 574 | self.prediction_errors = np.concatenate([self.prediction_errors, xp_train_0 - xp_hat], axis=0) 575 | self._update_variance() 576 | 577 | 578 | class RecurrentEvent(RecurentLinearEvent): 579 | 580 | def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None, 581 | n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32, 582 | kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False, 583 | batch_update=True, optimizer_kwargs=None): 584 | 585 | RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs, 586 | l2_regularization=l2_regularization, batch_size=batch_size, 587 | kernel_initializer=kernel_initializer, init_model=False, prior_log_prob=prior_log_prob, 588 | reset_weights=reset_weights, batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 589 | 590 | if n_hidden is None: 591 | self.n_hidden = d 592 | else: 593 | self.n_hidden = n_hidden 594 | self.dropout = dropout 595 | 596 | if init_model: 597 | self.init_model() 598 | 599 | def _compile_model(self): 600 | self.model = Sequential() 601 | # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer 602 | # input_shape[1] = input_dim; each example is a self.d-dimensional vector 603 | self.model.add(SimpleRNN(self.n_hidden, input_shape=(self.t, self.d), 604 | kernel_regularizer=self.kernel_regularizer, 605 | kernel_initializer=self.kernel_initializer)) 606 | self.model.add(LeakyReLU(alpha=0.3)) 607 | self.model.add(Dropout(self.dropout)) 608 | self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer, 609 | kernel_initializer=self.kernel_initializer)) 610 | self.model.compile(**self.compile_opts) 611 | 612 | 613 | class GRUEvent(RecurentLinearEvent): 614 | 615 | def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None, 616 | n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32, 617 | kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False, 618 | batch_update=True, optimizer_kwargs=None): 619 | 620 | RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs, 621 | l2_regularization=l2_regularization, batch_size=batch_size, 622 | kernel_initializer=kernel_initializer, init_model=False, 623 | prior_log_prob=prior_log_prob, reset_weights=reset_weights, 624 | batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 625 | 626 | if n_hidden is None: 627 | self.n_hidden = d 628 | else: 629 | self.n_hidden = n_hidden 630 | self.dropout = dropout 631 | 632 | if init_model: 633 | self.init_model() 634 | 635 | def _compile_model(self): 636 | self.model = Sequential() 637 | # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer 638 | # input_shape[1] = input_dim; each example is a self.d-dimensional vector 639 | self.model.add(GRU(self.n_hidden, input_shape=(self.t, self.d), 640 | kernel_regularizer=self.kernel_regularizer, 641 | kernel_initializer=self.kernel_initializer)) 642 | self.model.add(LeakyReLU(alpha=0.3)) 643 | self.model.add(Dropout(self.dropout)) 644 | self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer, 645 | kernel_initializer=self.kernel_initializer)) 646 | self.model.compile(**self.compile_opts) 647 | 648 | 649 | class GRUEvent_normed(RecurentLinearEvent): 650 | 651 | def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None, 652 | n_epochs=10, dropout=0.50, l2_regularization=0.00, batch_size=32, 653 | kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, reset_weights=False, 654 | batch_update=True, optimizer_kwargs=None): 655 | 656 | RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs, 657 | l2_regularization=l2_regularization, batch_size=batch_size, 658 | kernel_initializer=kernel_initializer, init_model=False, 659 | prior_log_prob=prior_log_prob, reset_weights=reset_weights, 660 | batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 661 | 662 | if n_hidden is None: 663 | self.n_hidden = d 664 | else: 665 | self.n_hidden = n_hidden 666 | self.dropout = dropout 667 | 668 | if init_model: 669 | self.init_model() 670 | 671 | def _compile_model(self): 672 | self.model = Sequential() 673 | # input_shape[0] = timesteps; we pass the last self.t examples for train the hidden layer 674 | # input_shape[1] = input_dim; each example is a self.d-dimensional vector 675 | self.model.add(GRU(self.n_hidden, input_shape=(self.t, self.d), 676 | kernel_regularizer=self.kernel_regularizer, 677 | kernel_initializer=self.kernel_initializer)) 678 | self.model.add(LeakyReLU(alpha=0.3)) 679 | self.model.add(Dropout(self.dropout)) 680 | self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer, 681 | kernel_initializer=self.kernel_initializer)) 682 | self.model.add(Lambda(lambda x: K.l2_normalize(x, axis=-1))) 683 | self.model.compile(**self.compile_opts) 684 | 685 | 686 | 687 | class GRUEvent_spherical_noise(GRUEvent): 688 | 689 | def _update_variance(self): 690 | if np.shape(self.prediction_errors)[0] > 1: 691 | var = map_variance(self.prediction_errors.reshape(-1), self.var_df0, self.var_scale0) 692 | self.Sigma = var * np.ones(self.d) 693 | 694 | 695 | 696 | class LSTMEvent(RecurentLinearEvent): 697 | 698 | def __init__(self, d, var_df0, var_scale0, t=3, n_hidden=None, optimizer=None, 699 | n_epochs=10, dropout=0.50, l2_regularization=0.00, 700 | batch_size=32, kernel_initializer='glorot_uniform', init_model=False, prior_log_prob=0.0, 701 | reset_weights=False, batch_update=True, optimizer_kwargs=None): 702 | 703 | RecurentLinearEvent.__init__(self, d, var_df0, var_scale0, t=t, optimizer=optimizer, n_epochs=n_epochs, 704 | l2_regularization=l2_regularization, batch_size=batch_size, 705 | kernel_initializer=kernel_initializer, init_model=False, 706 | prior_log_prob=prior_log_prob, reset_weights=reset_weights, 707 | batch_update=batch_update, optimizer_kwargs=optimizer_kwargs) 708 | 709 | if n_hidden is None: 710 | self.n_hidden = d 711 | else: 712 | self.n_hidden = n_hidden 713 | self.dropout = dropout 714 | 715 | if init_model: 716 | self.init_model() 717 | 718 | def _compile_model(self): 719 | self.model = Sequential() 720 | # input_shape[0] = time-steps; we pass the last self.t examples for train the hidden layer 721 | # input_shape[1] = input_dim; each example is a self.d-dimensional vector 722 | self.model.add(LSTM(self.n_hidden, input_shape=(self.t, self.d), 723 | kernel_regularizer=self.kernel_regularizer, 724 | kernel_initializer=self.kernel_initializer)) 725 | self.model.add(LeakyReLU(alpha=0.3)) 726 | self.model.add(Dropout(self.dropout)) 727 | self.model.add(Dense(self.d, activation=None, kernel_regularizer=self.kernel_regularizer, 728 | kernel_initializer=self.kernel_initializer)) 729 | self.model.compile(**self.compile_opts) 730 | -------------------------------------------------------------------------------- /models/memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from scipy.stats import multivariate_normal as mvnorm 4 | from scipy.special import logsumexp 5 | from models.utils import fast_mvnorm_diagonal_logprob 6 | np.seterr(divide = 'ignore') 7 | import os 8 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 9 | import multiprocessing 10 | 11 | def sample_pmf(pmf): 12 | return np.sum(np.cumsum(pmf) < np.random.uniform(0, 1)) 13 | 14 | def get_scrp_prob(e, lmda, alfa): 15 | """ 16 | this function isn't used 17 | 18 | :param e: list event labels 19 | :param lmda: float, sCRP lambda 20 | :param alpha: float, sCRP alpha 21 | 22 | :return: total log likelihood of sequence under sCRP 23 | """ 24 | c = {e0: 0 for e0 in set(e)} 25 | log_prob = 0 26 | e0_prev = None 27 | 28 | Z = alfa 29 | log_alfa = np.log(alfa) 30 | for e0 in e: 31 | 32 | l = lmda * (e0 == e0_prev) 33 | 34 | if c[e0] == 0: 35 | log_prob += log_alfa - np.log(Z + l) 36 | else: 37 | log_prob += np.log(c[e0] + l) - np.log(Z + l) 38 | 39 | 40 | # update the counts 41 | c[e0] += 1 42 | Z += 1 43 | e0_prev = e0 44 | 45 | return log_prob 46 | 47 | def reconstruction_accuracy(y_samples, y_mem): 48 | """ 49 | 50 | :param: y_samples - list of y_samples 51 | :param: y_mem - original corrupted memory trace 52 | 53 | :return: item_accuracy, list of probabilities each item in original memory 54 | is in the final reconstruction 55 | 56 | # checked this function on 5/20/19, this function is correct if unintuitive 57 | """ 58 | 59 | 60 | acc = [] 61 | n_orig = len(y_mem) 62 | 63 | for y_sample in y_samples: 64 | 65 | def item_acc(t): 66 | # loop through all of the items in the reconstruction trace, and compare them to 67 | # item t in the corrupted trace. Return 1.0 if there is a match, zero otherwise 68 | return np.float(any( 69 | [np.array_equal(yt_samp[0], y_mem[t][0]) for yt_samp in y_sample if yt_samp != None] 70 | )) 71 | 72 | # evaluate the accuracy for all of the items in the set 73 | acc.append([item_acc(t) for t in range(n_orig)]) 74 | 75 | # return the vector of accuracy 76 | return np.mean(acc, axis=0) 77 | 78 | def evaluate_seg(e_samples, e_true): 79 | acc = [] 80 | for e in e_samples: 81 | acc.append(np.mean(np.array(e) == e_true)) 82 | return np.mean(acc) 83 | 84 | def create_corrupted_trace(x, e, tau, epsilon_e, b, return_random_draws_of_p_e=False): 85 | """ 86 | create a corrupted memory trace from feature vectors and event labels 87 | 88 | :param x: np.array of size nXd, featur vectors 89 | :param e: np.array of length n, event labels 90 | :param tau: float, feature corruption 91 | :param epsilon_e: float, event label precision 92 | :param b: int, time index corruption 93 | 94 | :return y_mem: list of corrupted memory tuples: 95 | """ 96 | 97 | n, d = x.shape 98 | 99 | # create the corrupted memory trace 100 | y_mem = list() # these are list, not sets, for hashability 101 | 102 | # pre-draw the uniform random numbers to determine the event-label corruption noise so that 103 | # we can return them as needed. 104 | e_noise_draws = [np.random.uniform(0, 1) for _ in range(n)] 105 | 106 | for t in range(n): 107 | x_mem = x[t, :] + np.random.normal(scale=tau ** 0.5, size=d) # note, built in function uses stdev, not variance 108 | e_mem = [None, e[t]][e_noise_draws[t] < epsilon_e] 109 | t_mem = t + np.random.randint(-b, b + 1) 110 | y_mem.append([x_mem, e_mem, t_mem]) 111 | 112 | if return_random_draws_of_p_e: 113 | return y_mem, e_noise_draws 114 | 115 | return y_mem 116 | 117 | def init_y_sample(y_mem, b, epsilon): 118 | """ 119 | :param y_mem: list of corrupted memory traces 120 | :param b: time corruption noise 121 | :param epsilon: "forgetting" parameter 122 | :returns: sample of y_mem 123 | """ 124 | n_t = len(y_mem) 125 | y_sample = [None] * n_t 126 | 127 | # create a copy of y_mem for sampling without replacement 128 | y_mem_copy = [[x_i.copy(), e_i, t_mem] for (x_i, e_i, t_mem) in y_mem] 129 | 130 | # loop through timepoints in a random order 131 | for t in np.random.permutation(range(n_t)): 132 | 133 | # create a probability function over the sample sets 134 | log_p = np.zeros(len(y_mem_copy) + 1) - np.inf 135 | for ii, (x_i, e_i, t_mem) in enumerate(y_mem_copy): 136 | if np.abs(t_mem - t) <= b: 137 | log_p[ii] = 0 138 | # draw a sample 139 | log_p[-1] = np.log(epsilon) 140 | p = np.exp(log_p - logsumexp(log_p)) # normalize and exponentiate 141 | 142 | ii = sample_pmf(p) 143 | 144 | if ii < len(y_mem_copy): 145 | # only create a sample for none-None events 146 | y_sample[t] = y_mem_copy[ii] 147 | y_mem_copy = y_mem_copy[:ii] + y_mem_copy[ii + 1:] # remove the item from the list of available 148 | return y_sample 149 | 150 | 151 | def init_x_sample_cond_y(y_sample, n, d, tau): 152 | x_sample = np.random.randn(n, d) * tau 153 | 154 | for ii, y_ii in enumerate(y_sample): 155 | if y_ii is not None: 156 | x_sample[ii, :] = y_ii[0] 157 | return x_sample 158 | 159 | 160 | def sample_y_given_x_e(y_mem, x, e, b, tau, epsilon): 161 | # total number of samples 162 | n, d = np.shape(x) 163 | 164 | # 165 | y_sample = [None] * n 166 | 167 | # create a copy of y_mem for sampling without replacement 168 | y_mem_copy = [[x_i.copy(), e_i, t_mem] for (x_i, e_i, t_mem) in y_mem] 169 | 170 | _ones = np.ones(d) 171 | 172 | for t in np.random.permutation(range(n)): 173 | 174 | # create a probability function over the sample sets 175 | log_p = np.zeros(len(y_mem_copy) + 1) - np.inf 176 | for ii, (x_i, e_i, t_mem) in enumerate(y_mem_copy): 177 | if np.abs(t_mem - t) <= b: 178 | # because we alwasy assume the covariance function is diagonal, we can use the 179 | # univariate normal to speed up the calculations 180 | log_p[ii] = fast_mvnorm_diagonal_logprob(x_i.reshape(-1) - x[t, :].reshape(-1), _ones * tau) 181 | 182 | # set probability to zero if event token doesn't match 183 | if e_i is not None: 184 | if e_i != e[ii]: 185 | log_p[ii] -= np.inf 186 | 187 | # the last token is always the null token 188 | log_p[-1] = np.log(epsilon) 189 | p = np.exp(log_p - logsumexp(log_p)) # normalize and exponentiate 190 | 191 | # draw a sample 192 | ii = sample_pmf(p) 193 | 194 | if ii < len(y_mem_copy): 195 | # only create a sample for none-None events 196 | y_sample[t] = y_mem_copy[ii] 197 | y_mem_copy = y_mem_copy[:ii] + y_mem_copy[ii + 1:] # remove the item from the list of available 198 | 199 | return y_sample 200 | 201 | 202 | def sample_e_given_x_y(x, y, event_models, alpha, lmda): 203 | n, d = np.shape(x) 204 | 205 | # define a special case of the sCRP that caps the number 206 | # of clusters at k, the number of event models 207 | k = len(event_models) 208 | c = np.zeros(k) 209 | 210 | e_prev = None 211 | e_sample = [None] * n 212 | 213 | # keep a list of all the previous scenes within the sampled event 214 | x_current = np.zeros((1, d)) 215 | 216 | # do this as a filtering operation, just via a forward sweep 217 | for t in range(n): 218 | 219 | # first see if there is a valid memory token with a event label 220 | if (y[t] is not None) and (y[t][1] is not None): 221 | e_sample[t] = y[t][1] 222 | e_prev = e_sample[t] 223 | c[e_sample[t]] += 1 224 | else: 225 | 226 | # calculate the CRP prior 227 | p_sCRP = c.copy() 228 | if e_prev is not None: 229 | p_sCRP[e_prev] += lmda 230 | 231 | # add the alpha value to the unvisited clusters 232 | if any(p_sCRP == 0): 233 | p_sCRP[p_sCRP == 0] = alpha / np.sum(p_sCRP == 0) 234 | # no need to normalize yet 235 | 236 | # calculate the probability of x_t|x_{1:t-1} 237 | p_model = np.zeros(k) - np.inf 238 | for idx, e_model in event_models.iteritems(): 239 | if idx != e_prev: 240 | x_t_hat = e_model.predict_next_generative(x_current) 241 | else: 242 | x_t_hat = e_model.predict_f0() 243 | # because we alwasy assume the covariance function is diagonal, we can use the 244 | # univariate normal to speed up the calculations 245 | p_model[idx] = fast_mvnorm_diagonal_logprob(x[t, :] - x_t_hat.reshape(-1), e_model.Sigma) 246 | 247 | log_p = p_model + np.log(p_sCRP) 248 | log_p -= logsumexp(log_p) 249 | 250 | # draw from the model 251 | e_sample[t] = sample_pmf(np.exp(log_p)) 252 | 253 | # update counters 254 | if e_prev == e_sample[t]: 255 | x_current = np.concatenate([x_current, x[t, :].reshape(1, -1)]) 256 | else: 257 | x_current = x[t, :].reshape(1, -1) 258 | e_prev = e_sample[t] 259 | 260 | # update the counts! 261 | c[e_sample[t]] += 1 262 | 263 | return e_sample 264 | 265 | 266 | def sample_x_given_y_e(x_hat, y, e, event_models, tau): 267 | """ 268 | x_hat: n x d np.array 269 | the previous sample, to be updated and returned 270 | 271 | y: list 272 | the sequence of ordered memory traces. Each element is 273 | either a list of [x_y_mem, t_mem] or None 274 | 275 | e: np.array of length n 276 | the sequence of event tokens 277 | 278 | event_models: dict {token: model} 279 | trained event models 280 | 281 | tau: 282 | memory corruption noise 283 | 284 | """ 285 | 286 | # total number of samples 287 | n, d = np.shape(x_hat) 288 | 289 | x_hat = x_hat.copy() # don't want to overwrite the thing outside the loop... 290 | 291 | # Note: this a filtering operation as the backwards pass is computationally difficult. 292 | # (by this, we mean that sampling from Pr(x_t| x_{t+1:n}, x_{1:t-1}, theta, e, y_mem) is intractable 293 | # and we thus only sample from Pr(x_t|, x_{1:t-1}, theta, e, y_mem), which is is Gaussian) 294 | for t in np.random.permutation(range(n)): 295 | # pull the active event model 296 | e_model = event_models[e[t]] 297 | 298 | # pull all preceding scenes within the event 299 | x_idx = np.arange(len(e))[(e == e[t]) & (np.arange(len(e)) < t)] 300 | x_prev = np.concatenate([ 301 | np.zeros((1, d)), x_hat[x_idx, :] 302 | ]) 303 | 304 | # pull the prediction of the event model given the previous estimates of x 305 | f_x = e_model.predict_next_generative(x_prev) 306 | 307 | # is y_t a null tag? 308 | if y[t] is None: 309 | x_bar = f_x 310 | sigmas = e_model.Sigma 311 | else: 312 | # calculate noise lambda for each event model 313 | u_weight = (1. / e_model.Sigma) / (1. / e_model.Sigma + 1. / tau) 314 | 315 | x_bar = u_weight * f_x + (1 - u_weight) * y[t][0] 316 | sigmas = 1. / (1. / e_model.Sigma + 1. / tau) 317 | 318 | # draw a new sample of x_t 319 | # N.B. Handcoding a function to draw random variables introduced error into the algorithm 320 | # and didn't save _any_ time. 321 | x_hat[t, :] = mvnorm.rvs(mean=x_bar.reshape(-1), cov=np.diag(sigmas)) 322 | 323 | return x_hat 324 | 325 | 326 | def gibbs_memory_sampler(y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, b, tau, 327 | n_samples=250, n_burnin=100, progress_bar=True, leave_progress_bar=True): 328 | """ 329 | 330 | :param y_mem: list of 3-tuples (x_mem, e_mem, t_mem), corrupted memory trace 331 | :param sem_mdoel: trained SEM instance 332 | :param memory_alpha: SEM alpha parameter to use in reconstruction 333 | :param memory_labmda: SEM lmbda parameter to use in reconstruction 334 | :param memory_epsilon: (float) parameter controlling propensity to include null trace in reconstruction 335 | :param b: (int) time index corruption noise 336 | :param tau: (float, greater than zero) feature vector corruption noise 337 | :param n_burnin: (int, default 100) number of Gibbs sampling itterations to burn in 338 | :param n_samples: (int, default 250) number of Gibbs sampling itterations to collect 339 | :param progress_bar: (bool) use progress bar for sampling? 340 | :param leave_progress_bar: (bool, default=True) leave the progress bar at the end? 341 | 342 | :return: y_samples, e_samples, x_samples - Gibbs samples 343 | """ 344 | 345 | event_models = { 346 | k: v for k, v in sem_model.event_models.iteritems() if v.f_is_trained 347 | } 348 | 349 | d = np.shape(y_mem[0][0])[0] 350 | n = len(y_mem) 351 | 352 | # 353 | e_samples = [None] * n_samples 354 | y_samples = [None] * n_samples 355 | x_samples = [None] * n_samples 356 | 357 | y_sample = init_y_sample(y_mem, b, memory_epsilon) 358 | x_sample = init_x_sample_cond_y(y_sample, n, d, tau) 359 | e_sample = sample_e_given_x_y(x_sample, y_sample, event_models, memory_alpha, memory_lambda) 360 | 361 | # loop through the other events in the list 362 | if progress_bar: 363 | def my_it(iterator): 364 | return tqdm(iterator, desc='Gibbs Sampler', leave=leave_progress_bar) 365 | else: 366 | def my_it(iterator): 367 | return iterator 368 | 369 | for ii in my_it(range(n_burnin + n_samples)): 370 | 371 | # sample the memory features 372 | x_sample = sample_x_given_y_e(x_sample, y_sample, e_sample, event_models, tau) 373 | 374 | # sample the event models 375 | e_sample = sample_e_given_x_y(x_sample, y_sample, event_models, memory_alpha, memory_lambda) 376 | 377 | # sample the memory traces 378 | y_sample = sample_y_given_x_e(y_mem, x_sample, e_sample, b, tau, memory_epsilon) 379 | 380 | if ii >= n_burnin: 381 | e_samples[ii - n_burnin] = e_sample 382 | y_samples[ii - n_burnin] = y_sample 383 | x_samples[ii - n_burnin] = x_sample 384 | 385 | return y_samples, e_samples, x_samples 386 | 387 | ## there appears to be something wrong with this function! do not use for now 388 | # def multichain_gibbs(y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, b, tau, n_chains=2, 389 | # n_samples=250, n_burnin=50, progress_bar=True, leave_progress_bar=True): 390 | 391 | # """ 392 | 393 | # :param y_mem: list of 3-tuples (x_mem, e_mem, t_mem), corrupted memory trace 394 | # :param sem_mdoel: trained SEM instance 395 | # :param memory_alpha: SEM alpha parameter to use in reconstruction 396 | # :param memory_labmda: SEM lmbda parameter to use in reconstruction 397 | # :param memory_epsilon: (float) parameter controlling propensity to include null trace in reconstruction 398 | # :param b: (int) time index corruption noise 399 | # :param tau: (float, greater than zero) feature vector corruption noise 400 | # :param n_burnin: (int, default 100) number of Gibbs sampling itterations to burn in 401 | # :param n_samples: (int, default 250) number of Gibbs sampling itterations to collect 402 | # :param progress_bar: (bool) use progress bar for sampling? 403 | # :param leave_progress_bar: (bool, default=True) leave the progress bar at the end? 404 | 405 | # :return: y_samples, e_samples, x_samples - Gibbs samples 406 | # """ 407 | 408 | # y_samples, e_samples, x_samples = [], [], [] 409 | # for _ in range(n_chains): 410 | # _y0, _e0, _x0 = gibbs_memory_sampler( 411 | # y_mem, sem_model, memory_alpha, memory_lambda, memory_epsilon, 412 | # b, tau, n_samples, progress_bar, False, leave_progress_bar 413 | # ) 414 | # y_samples += _y0 415 | # e_samples += _e0 416 | # x_samples += _x0 417 | # return y_samples, e_samples, x_samples -------------------------------------------------------------------------------- /models/sem.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from scipy.misc import logsumexp 4 | from tqdm import tqdm 5 | from keras import backend as K 6 | from event_models import GRUEvent 7 | 8 | 9 | class Results(object): 10 | """ placeholder object to store results """ 11 | pass 12 | 13 | 14 | class SEM(object): 15 | """ 16 | This port of SAM's code (done with a different programming logic) 17 | in python. More documentation to come! 18 | """ 19 | 20 | def __init__(self, lmda=1., alfa=10.0, f_class=GRUEvent, f_opts=None): 21 | """ 22 | Parameters 23 | ---------- 24 | 25 | lmda: float 26 | sCRP stickiness parameter 27 | 28 | alfa: float 29 | sCRP concentration parameter 30 | 31 | f_class: class 32 | object class that has the functions "predict" and "update". 33 | used as the event model 34 | 35 | f_opts: dictionary 36 | kwargs for initializing f_class 37 | """ 38 | self.lmda = lmda 39 | self.alfa = alfa 40 | # self.beta = beta 41 | 42 | if f_class is None: 43 | raise ValueError("f_model must be specified!") 44 | 45 | self.f_class = f_class 46 | self.f_opts = f_opts 47 | 48 | # SEM internal state 49 | # 50 | self.k = 0 # maximum number of clusters (event types) 51 | self.c = np.array([]) # used by the sCRP prior -> running count of the clustering process 52 | self.d = None # dimension of scenes 53 | self.event_models = dict() # event model for each event type 54 | 55 | self.x_prev = None # last scene 56 | self.k_prev = None # last event type 57 | 58 | # instead of dumping the results, store them to the object 59 | self.results = None 60 | 61 | def pretrain(self, x, event_types, event_boundaries, progress_bar=True, leave_progress_bar=True): 62 | """ 63 | Pretrain a bunch of event models on sequence of scenes X 64 | with corresponding event labels y, assumed to be between 0 and K-1 65 | where K = total # of distinct event types 66 | """ 67 | assert x.shape[0] == event_types.size 68 | 69 | # update internal state 70 | k = np.max(event_types) + 1 71 | self._update_state(x, k) 72 | del k # use self.k 73 | 74 | n = x.shape[0] 75 | 76 | # loop over all scenes 77 | if progress_bar: 78 | def my_it(l): 79 | return tqdm(range(l), desc='Pretraining', leave=leave_progress_bar) 80 | else: 81 | def my_it(l): 82 | return range(l) 83 | 84 | # store a compiled version of the model and session for reuse 85 | self.session = tf.Session() 86 | K.set_session(self.session) 87 | self.model = None 88 | 89 | for ii in my_it(n): 90 | 91 | x_curr = x[ii, :].copy() # current scene 92 | k = event_types[ii] # current event 93 | 94 | if k not in self.event_models.keys(): 95 | # initialize new event model 96 | new_model = self.f_class(self.d, **self.f_opts) 97 | if self.model is None: 98 | self.model = new_model.init_model() 99 | else: 100 | new_model.set_model(self.session, self.model) 101 | self.event_models[k] = new_model 102 | 103 | # update event model 104 | if not event_boundaries[ii]: 105 | # we're in the same event -> update using previous scene 106 | assert self.x_prev is not None 107 | self.event_models[k].update(self.x_prev, x_curr, update_estimate=True) 108 | else: 109 | # we're in a new event -> update the initialization point only 110 | self.event_models[k].new_token() 111 | self.event_models[k].update_f0(x_curr, update_estimate=True) 112 | 113 | self.c[k] += 1 # update counts 114 | 115 | self.x_prev = x_curr # store the current scene for next trial 116 | self.k_prev = k # store the current event for the next trial 117 | 118 | self.x_prev = None # Clear this for future use 119 | self.k_prev = None # 120 | 121 | def _update_state(self, x, k=None): 122 | """ 123 | Update internal state based on input data X and max # of event types (clusters) K 124 | """ 125 | # get dimensions of data 126 | [n, d] = np.shape(x) 127 | if self.d is None: 128 | self.d = d 129 | else: 130 | assert self.d == d # scenes must be of same dimension 131 | 132 | # get max # of clusters / event types 133 | if k is None: 134 | k = n 135 | self.k = max(self.k, k) 136 | 137 | # initialize CRP prior = running count of the clustering process 138 | if self.c.size < self.k: 139 | self.c = np.concatenate((self.c, np.zeros(self.k - self.c.size)), axis=0) 140 | assert self.c.size == self.k 141 | 142 | def _calculate_unnormed_sCRP(self, prev_cluster=None): 143 | # internal function for consistency across "run" methods 144 | 145 | # calculate sCRP prior 146 | prior = self.c.copy() 147 | idx = len(np.nonzero(self.c)[0]) # get number of visited clusters 148 | 149 | if idx <= self.k: 150 | prior[idx] += self.alfa # set new cluster probability to alpha 151 | 152 | # add stickiness parameter for n>0, only for the previously chosen event 153 | if prev_cluster is not None: 154 | prior[prev_cluster] += self.lmda 155 | 156 | # prior /= np.sum(prior) 157 | return prior 158 | 159 | def run(self, x, k=None, progress_bar=True, leave_progress_bar=True, minimize_memory=False, compile_model=True): 160 | """ 161 | Parameters 162 | ---------- 163 | x: N x D array of 164 | 165 | k: int 166 | maximum number of clusters 167 | 168 | progress_bar: bool 169 | use a tqdm progress bar? 170 | 171 | leave_progress_bar: bool 172 | leave the progress bar after completing? 173 | 174 | minimize_memory: bool 175 | function to minimize memory storage during running --> only returns the log_probability of each 176 | cluster and nothing else 177 | 178 | Return 179 | ------ 180 | post: n by k array of posterior probabilities 181 | 182 | """ 183 | 184 | # update internal state 185 | self._update_state(x, k) 186 | del k # use self.k and self.d 187 | 188 | n = x.shape[0] 189 | 190 | # initialize arrays 191 | if not minimize_memory: 192 | post = np.zeros((n, self.k)) 193 | pe = np.zeros(np.shape(x)[0]) 194 | x_hat = np.zeros(np.shape(x)) 195 | log_boundary_probability = np.zeros(np.shape(x)[0]) 196 | 197 | # these are special case variables to deal with the possibility the current event is restarted 198 | lik_restart_event = -np.inf 199 | repeat_prob = -np.inf 200 | restart_prob = 0 201 | 202 | # 203 | log_like = np.zeros((n, self.k)) - np.inf 204 | log_prior = np.zeros((n, self.k)) - np.inf 205 | 206 | # this code just controls the presence/absence of a progress bar -- it isn't important 207 | if progress_bar: 208 | def my_it(l): 209 | return tqdm(range(l), desc='Run SEM', leave=leave_progress_bar) 210 | else: 211 | def my_it(l): 212 | return range(l) 213 | 214 | # store a compiled version of the model and session for reuse 215 | if compile_model: 216 | self.session = tf.Session() 217 | K.set_session(self.session) 218 | self.model = None 219 | 220 | for ii in my_it(n): 221 | 222 | x_curr = x[ii, :].copy() 223 | 224 | # calculate sCRP prior 225 | prior = self._calculate_unnormed_sCRP(self.k_prev) 226 | # N.B. k_prev should be none for the first event if there wasn't pre-training 227 | 228 | # likelihood 229 | active = np.nonzero(prior)[0] 230 | lik = np.zeros(len(active)) 231 | 232 | for k0 in active: 233 | if k0 not in self.event_models.keys(): 234 | new_model = self.f_class(self.d, **self.f_opts) 235 | if self.model is None: 236 | self.model = new_model.init_model() 237 | else: 238 | new_model.set_model(self.session, self.model) 239 | self.event_models[k0] = new_model 240 | new_model = None # clear the new model variable from memory 241 | 242 | # get the log likelihood for each event model 243 | model = self.event_models[k0] 244 | 245 | # detect when there is a change in event types (not the same thing as boundaries) 246 | current_event = (k0 == self.k_prev) 247 | 248 | if current_event: 249 | assert self.x_prev is not None 250 | lik[k0] = model.log_likelihood_next(self.x_prev, x_curr) 251 | 252 | # special case for the possibility of returning to the start of the current event 253 | lik_restart_event = model.log_likelihood_f0(x_curr) 254 | 255 | else: 256 | lik[k0] = model.log_likelihood_f0(x_curr) 257 | 258 | # determine the event identity (without worrying about event breaks for now) 259 | _post = np.log(prior[:len(active)]) + lik 260 | if ii > 0: 261 | # the probability that the current event is repeated is the OR probability -- but b/c 262 | # we are using a MAP approximation over all possibilities, it is a max of the repeated/restarted 263 | 264 | # is restart higher under the current event 265 | restart_prob = lik_restart_event + np.log(prior[self.k_prev] - self.lmda) 266 | repeat_prob = _post[self.k_prev] 267 | _post[self.k_prev] = np.max([repeat_prob, restart_prob]) 268 | 269 | # get the MAP cluster and only update it 270 | k = np.argmax(_post) # MAP cluster 271 | 272 | # determine whether there was a boundary 273 | event_boundary = (k != self.k_prev) or ((k == self.k_prev) and (restart_prob > repeat_prob)) 274 | 275 | # calculate the event boundary probability 276 | _post[self.k_prev] = restart_prob 277 | if not minimize_memory: 278 | log_boundary_probability[ii] = logsumexp(_post) - logsumexp(np.concatenate([_post, [repeat_prob]])) 279 | 280 | # calculate the probability of an event label, ignoring the event boundaries 281 | if self.k_prev is not None: 282 | _post[self.k_prev] = logsumexp([restart_prob, repeat_prob]) 283 | prior[self.k_prev] -= self.lmda / 2. 284 | lik[self.k_prev] = logsumexp(np.array([lik[self.k_prev], lik_restart_event])) 285 | 286 | # now, the normalized posterior 287 | if not minimize_memory: 288 | p = np.log(prior[:len(active)]) + lik - np.max(lik) # subtracting the max doesn't change proportionality 289 | post[ii, :len(active)] = np.exp(p - logsumexp(p)) 290 | 291 | # this is a diagnostic readout and does not effect the model 292 | log_like[ii, :len(active)] = lik 293 | log_prior[ii, :len(active)] = np.log(prior[:len(active)]) 294 | 295 | # These aren't used again, remove from memory 296 | _post = None 297 | lik = None 298 | prior = None 299 | 300 | else: 301 | log_like[ii, 0] = 0.0 302 | log_prior[ii, 0] = self.alfa 303 | if not minimize_memory: 304 | post[ii, 0] = 1.0 305 | 306 | if not minimize_memory: 307 | # prediction error: euclidean distance of the last model and the current scene vector 308 | if ii > 0: 309 | model = self.event_models[self.k_prev] 310 | x_hat[ii, :] = model.predict_next(self.x_prev) 311 | pe[ii] = np.linalg.norm(x_curr - x_hat[ii, :]) 312 | # surprise[ii] = log_like[ii, self.k_prev] 313 | 314 | self.c[k] += 1 # update counts 315 | # update event model 316 | if not event_boundary: 317 | # we're in the same event -> update using previous scene 318 | assert self.x_prev is not None 319 | self.event_models[k].update(self.x_prev, x_curr) 320 | else: 321 | # we're in a new event token -> update the initialization point only 322 | self.event_models[k].new_token() 323 | self.event_models[k].update_f0(x_curr) 324 | 325 | self.x_prev = x_curr # store the current scene for next trial 326 | self.k_prev = k # store the current event for the next trial 327 | 328 | if minimize_memory: 329 | self.clear_event_models() 330 | self.results = Results() 331 | self.results.log_post = log_like + log_prior 332 | return 333 | 334 | # calculate Bayesian Surprise 335 | log_post = log_like[:-1, :] + log_prior[:-1, :] 336 | log_post -= np.tile(logsumexp(log_post, axis=1), (np.shape(log_post)[1], 1)).T 337 | surprise = np.concatenate([[0], logsumexp(log_post + log_like[1:, :], axis=1)]) 338 | 339 | self.results = Results() 340 | self.results.post = post 341 | self.results.pe = pe 342 | self.results.surprise = surprise 343 | self.results.log_like = log_like 344 | self.results.log_prior = log_prior 345 | self.results.e_hat = np.argmax(log_like + log_prior, axis=1) 346 | self.results.x_hat = x_hat 347 | self.results.log_loss = logsumexp(log_like + log_prior, axis=1) 348 | self.results.log_boundary_probability = log_boundary_probability 349 | # # this is a debugging thing 350 | self.results.restart_prob = restart_prob 351 | self.results.repeat_prob = repeat_prob 352 | 353 | return post 354 | 355 | def update_single_event(self, x, update=True, save_x_hat=False, generative_predicitons=False): 356 | """ 357 | 358 | :param x: this is an n x d array of the n scenes in an event 359 | :param update: boolean (default True) update the prior and posterior of the event model 360 | :param save_x_hat: boolean (default False) normally, we don't save this as the interpretation can be tricky 361 | N.b: unlike the posterior calculation, this is done at the level of individual scenes within the 362 | events (and not one per event) 363 | :return: 364 | """ 365 | if update: 366 | self.k += 1 367 | self._update_state(x, self.k) 368 | 369 | n_scene = np.shape(x)[0] 370 | 371 | # pull the relevant items from the results 372 | if self.results is None: 373 | self.results = Results() 374 | post = np.zeros((1, self.k)) 375 | log_like = np.zeros((1, self.k)) - np.inf 376 | log_prior = np.zeros((1, self.k)) - np.inf 377 | if save_x_hat: 378 | x_hat = np.zeros((n_scene, self.d)) 379 | sigma = np.zeros((n_scene, self.d)) 380 | if generative_predicitons: 381 | x_hat_gen = np.zeros((n_scene, self.d)) 382 | 383 | else: 384 | post = self.results.post 385 | log_like = self.results.log_like 386 | log_prior = self.results.log_prior 387 | if save_x_hat: 388 | x_hat = self.results.x_hat 389 | sigma = self.results.sigma 390 | if generative_predicitons: 391 | x_hat_gen = self.results.x_hat_gen 392 | 393 | # extend the size of the posterior, etc 394 | 395 | n, k0 = np.shape(post) 396 | while k0 < self.k: 397 | post = np.concatenate([post, np.zeros((n, 1))], axis=1) 398 | log_like = np.concatenate([log_like, np.zeros((n, 1)) - np.inf], axis=1) 399 | log_prior = np.concatenate([log_prior, np.zeros((n, 1)) - np.inf], axis=1) 400 | n, k0 = np.shape(post) 401 | 402 | # extend the size of the posterior, etc 403 | post = np.concatenate([post, np.zeros((1, self.k))], axis=0) 404 | log_like = np.concatenate([log_like, np.zeros((1, self.k)) - np.inf], axis=0) 405 | log_prior = np.concatenate([log_prior, np.zeros((1, self.k)) - np.inf], axis=0) 406 | if save_x_hat: 407 | x_hat = np.concatenate([x_hat, np.zeros((n_scene, self.d))], axis=0) 408 | sigma = np.concatenate([sigma, np.zeros((n_scene, self.d))], axis=0) 409 | 410 | if generative_predicitons: 411 | x_hat_gen = np.concatenate([x_hat_gen, np.zeros((n_scene, self.d))], axis=0) 412 | else: 413 | log_like = np.zeros((1, self.k)) - np.inf 414 | log_prior = np.zeros((1, self.k)) - np.inf 415 | 416 | # calculate sCRP prior 417 | prior = self._calculate_unnormed_sCRP(self.k_prev) 418 | 419 | # likelihood 420 | active = np.nonzero(prior)[0] 421 | lik = np.zeros((n_scene, len(active))) 422 | 423 | # again, this is a readout of the model only and not used for updating, 424 | # but also keep track of the within event posterior 425 | map_prediction = np.zeros(np.shape(x)) 426 | k_within_event = np.argmax(prior) # prior to the first scene within an event having been observed, the 427 | # prior determines what the event type will be 428 | 429 | if save_x_hat: 430 | _x_hat = np.zeros((n_scene, self.d)) # temporary storre 431 | _sigma = np.zeros((n_scene, self.d)) 432 | 433 | if generative_predicitons: 434 | _x_hat_gen = np.zeros((n_scene, self.d)) 435 | 436 | for ii, x_curr in enumerate(x): 437 | 438 | # we need to maintain a distribution over possible event types for the current events -- 439 | # this gets locked down after termination of the event. 440 | # Also: none of the event models can be updated until *after* the event has been observed 441 | 442 | # special case the first scene within the event 443 | if ii == 0: 444 | event_boundary = True 445 | else: 446 | event_boundary = False 447 | 448 | # loop through each potentially active event model 449 | for k0 in active: 450 | if k0 not in self.event_models.keys(): 451 | new_model = self.f_class(self.d, **self.f_opts) 452 | if self.model is None: 453 | self.model = new_model.init_model() 454 | else: 455 | new_model.set_model(self.session, self.model) 456 | self.event_models[k0] = new_model 457 | 458 | # get the log likelihood for each event model 459 | model = self.event_models[k0] 460 | 461 | if not event_boundary: 462 | lik[ii, k0] = model.log_likelihood_sequence(x[:ii, :].reshape(-1, self.d), x_curr) 463 | else: 464 | lik[ii, k0] = model.log_likelihood_f0(x_curr) 465 | 466 | if event_boundary: 467 | map_prediction[ii, :] = self.event_models[k_within_event].predict_f0() 468 | else: 469 | map_prediction[ii, :] = self.event_models[k_within_event].predict_next_generative(x[:ii, :]) 470 | 471 | # for the purpose of calculating a prediction error and a prediction error only, calculate 472 | # a within event estimate of the event type (the real estimate is at the end of the event, 473 | # taking into account the accumulated evidence 474 | k_within_event = np.argmax(np.sum(lik[:ii+1, :len(active)], axis=0) + np.log(prior[:len(active)])) 475 | if save_x_hat: 476 | model = self.event_models[k_within_event] 477 | _sigma[ii, :] = model.get_variance() 478 | if ii > 0: 479 | _x_hat[ii, :] = model.predict_next_generative(x[:ii, :]) 480 | else: 481 | _x_hat[ii, :] = model.predict_f0() 482 | 483 | if ii == 1 and generative_predicitons: 484 | # create a generative prediction of the model, conditioned on the first experienced scene 485 | # for now, this is code specific to silvy's simluations 486 | model = self.event_models[k_within_event] 487 | _x_hat_gen[0, :] = x[0, :] 488 | _x_hat_gen[1, :] = x[1, :] 489 | for jj in range(2, n_scene): 490 | _x_hat_gen[jj, :] = model.predict_next_generative(x[:jj, :]) 491 | 492 | 493 | # cache the diagnostic measures 494 | log_like[-1, :len(active)] = np.sum(lik, axis=0) 495 | log_prior[-1, :len(active)+1] = np.log(prior[:len(active)+1]) 496 | 497 | # calculate surprise 498 | bayesian_surprise = logsumexp(lik + np.tile(log_prior[-1, :len(active)], (np.shape(lik)[0], 1)), axis=1) 499 | 500 | if update: 501 | 502 | # at the end of the event, find the winning model! 503 | log_post = log_prior[-1, :len(active)] + log_like[-1, :len(active)] 504 | post[-1, :len(active)] = np.exp(log_post - logsumexp(log_post)) 505 | k = np.argmax(log_post) 506 | 507 | # update the prior 508 | self.c[k] += n_scene 509 | # cache for next event 510 | self.k_prev = k 511 | 512 | # update the winning model's estimate 513 | self.event_models[k].update_f0(x[0]) 514 | x_prev = x[0] 515 | for X0 in x[1:]: 516 | self.event_models[k].update(x_prev, X0) 517 | x_prev = X0 518 | 519 | self.results.post = post 520 | self.results.log_like = log_like 521 | self.results.log_prior = log_prior 522 | self.results.e_hat = np.argmax(post, axis=1) 523 | self.results.log_loss = logsumexp(log_like + log_prior, axis=1) 524 | 525 | if save_x_hat: 526 | x_hat[-n_scene:, :] = _x_hat 527 | sigma[-n_scene:, :] = _sigma 528 | self.results.x_hat = x_hat 529 | self.results.sigma = sigma 530 | 531 | if generative_predicitons: 532 | x_hat_gen[-n_scene:, :] = _x_hat_gen 533 | self.results.x_hat_gen = x_hat_gen 534 | 535 | return bayesian_surprise, map_prediction 536 | 537 | def init_for_boundaries(self, list_events): 538 | # update internal state 539 | 540 | k = 0 541 | self._update_state(np.concatenate(list_events, axis=0), k) 542 | del k # use self.k and self.d 543 | 544 | # store a compiled version of the model and session for reuse 545 | if self.k_prev is None: 546 | self.session = tf.Session() 547 | K.set_session(self.session) 548 | 549 | # initialize the first event model 550 | new_model = self.f_class(self.d, **self.f_opts) 551 | self.model = new_model.init_model() 552 | 553 | self.event_models[0] = new_model 554 | 555 | def run_w_boundaries(self, list_events, progress_bar=True, leave_progress_bar=True, save_x_hat=False, 556 | generative_predicitons=False): 557 | """ 558 | This method is the same as the above except the event boundaries are pre-specified by the experimenter 559 | as a list of event tokens (the event/schema type is still inferred). 560 | 561 | One difference is that the event token-type association is bound at the last scene of an event type. 562 | N.B. ! also, all of the updating is done at the event-token level. There is no updating within an event! 563 | 564 | evaluate the probability of each event over the whole token 565 | 566 | 567 | Parameters 568 | ---------- 569 | list_events: list of n x d arrays -- each an event 570 | 571 | 572 | progress_bar: bool 573 | use a tqdm progress bar? 574 | 575 | leave_progress_bar: bool 576 | leave the progress bar after completing? 577 | 578 | save_x_hat: bool 579 | save the MAP scene predictions? 580 | 581 | Return 582 | ------ 583 | post: n_e by k array of posterior probabilities 584 | 585 | """ 586 | 587 | # loop through the other events in the list 588 | if progress_bar: 589 | def my_it(iterator): 590 | return tqdm(iterator, desc='Run SEM', leave=leave_progress_bar) 591 | else: 592 | def my_it(iterator): 593 | return iterator 594 | 595 | self.init_for_boundaries(list_events) 596 | 597 | for x in my_it(list_events): 598 | self.update_single_event(x, save_x_hat=save_x_hat, generative_predicitons=generative_predicitons) 599 | 600 | def clear_event_models(self): 601 | for e in self.event_models.itervalues(): 602 | e.model = None 603 | self.event_models = None 604 | tf.reset_default_graph() # for being sure 605 | K.clear_session() 606 | 607 | 608 | 609 | def clear_sem(sem_model): 610 | """ This function deletes sem from memory""" 611 | assert type(sem_model) == SEM 612 | sem_model.clear_event_models() 613 | sem_model.results = None 614 | return None 615 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def unroll_data(x, t=1): 5 | """ 6 | This function is used by recurrent neural nets to do back-prop through time. 7 | 8 | Unrolls a data_set for with time-steps, truncated for t time-steps 9 | appends t-1 D-dimensional zero vectors at the beginning. 10 | 11 | Parameters: 12 | x: array, shape (N, D) or shape (D,) 13 | 14 | t: int 15 | time-steps to truncate the unroll 16 | 17 | output 18 | ------ 19 | 20 | X_unrolled: array, shape (N-1, t, D) 21 | 22 | """ 23 | if np.ndim(x) == 2: 24 | n, d = np.shape(x) 25 | elif np.ndim(x): 26 | n, d = 1, np.shape(x)[0] 27 | x = np.reshape(x, (1, d)) 28 | 29 | x_unrolled = np.zeros((n, t, d)) 30 | 31 | # append a t-1 blank (zero) input patterns to the beginning 32 | data_set = np.concatenate([np.zeros((t - 1, d)), x]) 33 | 34 | for ii in range(n): 35 | x_unrolled[ii, :, :] = data_set[ii: ii + t, :] 36 | 37 | return x_unrolled 38 | 39 | # precompute for speed (doesn't really help but whatever) 40 | log_2pi = np.log(2.0 * np.pi) 41 | 42 | def fast_mvnorm_diagonal_logprob(x, variances): 43 | """ 44 | Assumes a zero-mean mulitivariate normal with a diagonal covariance function 45 | 46 | Parameters: 47 | 48 | x: array, shape (D,) 49 | observations 50 | 51 | variances: array, shape (D,) 52 | Diagonal values of the covariance function 53 | 54 | output 55 | ------ 56 | 57 | log-probability: float 58 | 59 | """ 60 | return -0.5 * (log_2pi * np.shape(x)[0] + np.sum(np.log(variances) + (x**2) / variances )) 61 | -------------------------------------------------------------------------------- /opt/__init__.py: -------------------------------------------------------------------------------- 1 | from hrr import * -------------------------------------------------------------------------------- /opt/csw_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/opt/csw_utils.pyc -------------------------------------------------------------------------------- /opt/hrr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import normalize 3 | 4 | 5 | def embed_gaussian(d, n=1): 6 | """ 7 | returns n normal vectors with variance = 1/n, inline with Plate's caluclations 8 | 9 | :param d: (int), dimensions of the embedding 10 | :param n: (int, default=1), number of embeddings to return 11 | 12 | :return: d-length np.array 13 | """ 14 | return np.random.normal(loc=0., scale=1./np.sqrt(d), size=(n, d)) 15 | 16 | 17 | def conv_circ(signal, kernal, n=None): 18 | ''' 19 | Parameters 20 | ---------- 21 | 22 | signal: array of length D 23 | 24 | ker: array of length D 25 | 26 | Returns 27 | ------- 28 | 29 | array of length D 30 | 31 | ''' 32 | if n == None: 33 | n = len(signal) + len(kernal) - 1 34 | 35 | return np.real(np.fft.ifft(np.fft.fft(signal, n) * np.fft.fft(kernal, n))) 36 | 37 | 38 | def plate_formula(n, k, err): 39 | ''' 40 | Determine the number of dimensions needed according to Plate's (2003) 41 | formula: 42 | D = 3.16(K-0.25)ln(N/err^3) 43 | where D is the number of dimensions, K is the maximum number of terms 44 | to be combined, N is the number of atomic values in the language, and 45 | err is the probability of error. 46 | 47 | USAGE: D = plate_formula(n, k, err) 48 | ''' 49 | return int(round(3.16 * (k - 0.25) * (np.log(n) - 3 * np.log(err)))) 50 | 51 | 52 | def embed(n, d, distr='spikeslab_gaussian', param=None): 53 | # Embed symbols in a vector space. 54 | # 55 | # USAGE: X = embed(n, d, distr='spikeslab_gaussian', param=None) 56 | # 57 | # INPUTS: 58 | # n - number of symbols 59 | # d - number of dimensions 60 | # distr - string specifying the distribution on the vector space: 61 | # 'spikeslab_gaussian' - mixture of Gaussian "slab" and Bernoulli "spike" 62 | # 'spikeslab_uniform' - mixture of uniform "slab" and Bernoulli "spike" 63 | # 64 | # param (optional) - parameters of the distribution: 65 | # 'spikeslab_gaussian' - param = [variance, spike probability] (default: [1 1]) 66 | # 'spikeslab_uniform' - param = [bound around 0, spike probability] (default: [1 1]) 67 | # OUTPUTS; 68 | # X - [N x D] matrix 69 | # 70 | # Sam Gershman, Jan 2013 71 | 72 | if param is None: 73 | param = [1, 1] 74 | spike = np.round(np.random.rand(n, d) < param[1]) 75 | 76 | if distr == 'spikeslab_gaussian': 77 | slab = np.random.randn(n, d) * param[1] 78 | elif distr == 'spikeslab_uniform': 79 | slab = np.random.uniform(-param[1], param[1], (n, d)) 80 | else: 81 | raise (Exception) 82 | 83 | return spike * slab 84 | 85 | 86 | def encode(a, b): 87 | return conv_circ(a, b, np.size(a)) 88 | 89 | 90 | def embed_onehot(n, d): 91 | v = np.zeros((n, d)) 92 | for ii in range(n): 93 | v[ii][np.random.randint(d)] = 1 94 | return v 95 | 96 | 97 | def decode(a, b): 98 | c = np.real(np.fft.ifft(np.fft.fft(a, np.size(a)) * np.conj(np.fft.fft(b, np.size(a))))) 99 | return c / np.size(a) -------------------------------------------------------------------------------- /opt/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import models 4 | import cPickle as pickle 5 | from sklearn.metrics import adjusted_rand_score 6 | 7 | 8 | def generate_random_events(n_events, data_file=None): 9 | """ 10 | 11 | Parameters 12 | ---------- 13 | n_events: int 14 | 15 | data_file: str 16 | full file path of the Reynolds, Braver, & Zachs data. 17 | contains pandas dataframe (pickled) with 13 events of 18 | 8-12 time-points and 54 dimensions 19 | 20 | :return: 21 | """ 22 | 23 | if data_file is None: 24 | data_file = './datasets/motion_data.pkl' 25 | motion_data = pd.read_pickle(data_file) 26 | n_patterns = len(set(motion_data.EventNumber)) 27 | 28 | z = np.mean(np.linalg.norm(motion_data.values[:, :-1], axis=1)) 29 | 30 | X = [] 31 | y = [] 32 | p_prev = -1 33 | for _ in range(n_events): 34 | while True: 35 | p = np.random.randint(n_patterns) 36 | if p != p_prev: 37 | p_prev = p 38 | break 39 | e = motion_data.loc[motion_data.EventNumber == p, :].values[:, :-1] 40 | X.append(e / z) 41 | y.append([p] * e.shape[0]) 42 | return np.concatenate(X), np.concatenate(y) 43 | 44 | 45 | def evaluate(x, y, omega, k=None, number=0, save=False, list_event_boundaries=None): 46 | """ 47 | 48 | Parameters 49 | ---------- 50 | x: NxD array 51 | scene vectors 52 | 53 | y: array of length N 54 | true class labels 55 | 56 | omega: dict 57 | dictionary of kwargs for the SEM model 58 | 59 | k: int 60 | maximum number of clusters 61 | 62 | 63 | Return 64 | ------ 65 | r: int, adjusted rand score 66 | """ 67 | 68 | sem = models.SEM(**omega) 69 | 70 | if k is None: 71 | k = x.shape[0] / 2 72 | 73 | sem.run(x, k=k) 74 | 75 | y_hat = np.argmax(sem.results.post, axis=1) 76 | 77 | r = adjusted_rand_score(y, y_hat) 78 | 79 | if save: 80 | f = open('SEM_sample_%d.save' % number, 'wb') 81 | 82 | pickle.dump({'AdjRandScore': r, 'Omega': omega}, f) 83 | f.close() 84 | return 85 | 86 | return sem, r 87 | 88 | 89 | # generate random string 90 | # 91 | def randstr(N=10): 92 | import string 93 | import random 94 | return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N)) 95 | 96 | 97 | if __name__ == '__main__': 98 | pass 99 | -------------------------------------------------------------------------------- /simulations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/__init__.py -------------------------------------------------------------------------------- /simulations/exp_dubrow.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from models import SEM, clear_sem 4 | from models.memory import evaluate_seg 5 | from models.memory import gibbs_memory_sampler 6 | from tqdm import tqdm 7 | from sklearn.metrics import adjusted_rand_score 8 | import sys 9 | 10 | def generate_experiment(seed=None, scaling_factor=1.0, event_duration=5, n_events=5, d=25): 11 | 12 | n = event_duration * n_events 13 | 14 | if seed: 15 | np.random.seed(seed) 16 | 17 | x = np.random.randn(n, d) 18 | e = np.zeros(n, dtype=int) 19 | 20 | # embed a similarity structure within the items of each category 21 | # by adding the same random vector to all of the items within the 22 | # category 23 | categ_one = (np.random.randn(1, d)) * scaling_factor 24 | categ_two = (np.random.randn(1, d)) * scaling_factor 25 | 26 | for ii in range(n_events): 27 | if ii % 2 == 0: 28 | x[ii * event_duration:ii * event_duration + event_duration, :] += categ_one 29 | e[ii * event_duration:ii * event_duration + event_duration] = 0 30 | else: 31 | x[ii * event_duration:ii * event_duration + event_duration, :] += categ_two 32 | e[ii * event_duration:ii * event_duration + event_duration] = 1 33 | 34 | x /= np.sqrt(d) 35 | 36 | # give the model boundaries.... 37 | e_tokens = np.concatenate([[False], e[1:] != e[:-1]]).cumsum() 38 | x_list_items = [] 39 | for e0 in set(e_tokens): 40 | x_list_items.append(x[e0 == e_tokens, :]) 41 | 42 | return x_list_items, e_tokens 43 | 44 | 45 | # diagnostics functions 46 | 47 | def hash_y(y): 48 | if y is not None: 49 | return np.concatenate([y[0], [y[1]], [y[2]]]) 50 | else: 51 | return y 52 | 53 | 54 | def eval_acc(y_samples, y_mem): 55 | acc = [] 56 | for y_sample in y_samples: 57 | def item_acc(t0): 58 | return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample])) 59 | # evaluate the accuracy of the boundary items (here, items 10 and 11) 60 | acc.append(np.mean([item_acc(t) for t in range(20)])) 61 | return np.mean(acc) 62 | 63 | 64 | def evaluate_item_position_acc(y_samples, y_mem, t): 65 | acc = [] 66 | for y_sample in y_samples: 67 | def item_acc(t0): 68 | return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample])) 69 | acc.append(item_acc(t)) 70 | return np.mean(acc) 71 | 72 | 73 | def eval_item_acc(y_samples, y_mem, times): 74 | acc = [] 75 | for y_sample in y_samples: 76 | def item_acc(t0): 77 | return np.float(any([all(hash_y(yt) == hash_y(y_mem[t0])) for yt in y_sample])) 78 | # evaluate the accuracy of the boundary items (here, items 10 and 11) 79 | acc.append(np.mean([item_acc(t) for t in times])) 80 | return np.mean(acc) 81 | 82 | 83 | def score_transitions(y_samples, y_mem, t): 84 | acc = [] 85 | idx = np.arange(len(y_mem)) 86 | for y_sample in y_samples: 87 | y_t = [all(hash_y(y0) == hash_y(y_mem[t])) for y0 in y_sample] 88 | y_t1 = [all(hash_y(y0) == hash_y(y_mem[t - 1])) for y0 in y_sample] 89 | # position accuracy is conditioned on recall 90 | if any(y_t): 91 | if any(y_t1): 92 | acc.append(idx[y_t][0] == (idx[y_t1][0] + 1)) 93 | else: 94 | acc.append(False) 95 | return np.mean(acc) 96 | 97 | def run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=0): 98 | 99 | # generate an experiment 100 | x_list_items, e_tokens = generate_experiment() 101 | n, d = np.concatenate(x_list_items).shape 102 | 103 | pre_locs = [ii for ii in range(len(e_tokens) - 1) if e_tokens[ii] != e_tokens[ii + 1]] 104 | pst_locs = [ii for ii in range(1, len(e_tokens)) if e_tokens[ii] != e_tokens[ii - 1]] 105 | 106 | # Train SEM on the stimuli 107 | sem = SEM(**sem_kwargs) 108 | sem.run_w_boundaries(list_events=x_list_items, progress_bar=False) 109 | 110 | e_seg = np.reshape([[ii] * np.sum(e_tokens == t, dtype=int) for t, ii in enumerate(sem.results.e_hat)], -1) 111 | 112 | # create the corrupted memory trace 113 | y_mem = list() # these are list, not sets, for hashability 114 | 115 | for t in range(n): 116 | # n.b. python uses stdev, not var 117 | x_mem = np.concatenate(x_list_items)[t, :] + np.random.normal(scale= gibbs_kwargs['tau'] ** 0.5, size=d) 118 | e_mem = [None, e_seg[t]][np.random.rand() < epsilon_e] 119 | t_mem = t + np.random.randint(-gibbs_kwargs['b'], gibbs_kwargs['b'] + 1) 120 | y_mem.append([x_mem, e_mem, t_mem]) 121 | 122 | # add the models to the kwargs 123 | y_samples, e_samples, x_samples = gibbs_memory_sampler(y_mem, sem, **gibbs_kwargs) 124 | 125 | results = pd.DataFrame({ 126 | 'Block': [block_number], 127 | 'Adj-r2': [adjusted_rand_score(sem.results.e_hat, np.array([0, 1, 0, 1, 0]))], 128 | 'Recon Segment': evaluate_seg(e_samples, e_seg), 129 | 'Overall Acc': eval_acc(y_samples, y_mem), 130 | 'Pre-Boundary': np.mean([evaluate_item_position_acc(y_samples, y_mem, t) for t in pre_locs]), 131 | 'Boundary': np.mean([evaluate_item_position_acc(y_samples, y_mem, t) for t in pst_locs]), 132 | 'Transitions Pre-Boundary': np.mean([score_transitions(y_samples, y_mem, t) for t in pre_locs]), 133 | 'Transitions Boundary': np.mean([score_transitions(y_samples, y_mem, t) for t in pst_locs]), 134 | 'Pre-boundary Acc': eval_item_acc(y_samples, y_mem, pre_locs), 135 | 'Boundary Acc': eval_item_acc(y_samples, y_mem, pst_locs), 136 | }) 137 | clear_sem(sem) 138 | sem = None 139 | return results 140 | 141 | def run_subject(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=16, subj_n=0, progress_bar=True): 142 | subject_results = [] 143 | 144 | if progress_bar: 145 | for ii in tqdm(range(n_runs), desc='Running Subject'): 146 | subject_results.append(run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=ii)) 147 | else: 148 | for ii in range(n_runs): 149 | subject_results.append(run_block(sem_kwargs, gibbs_kwargs, epsilon_e, block_number=ii)) 150 | 151 | subject_results = pd.concat(subject_results) 152 | subject_results['Subject'] = [subj_n] * len(subject_results) 153 | return subject_results 154 | 155 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=16, n_batch=8): 156 | batch_results = [] 157 | for ii in range(n_batch): 158 | sys.stdout.write("Beginning batch {} of {}\n".format(ii, n_batch)) 159 | batch_results.append(run_subject(sem_kwargs, gibbs_kwargs, epsilon_e, n_runs=n_runs, subj_n=ii)) 160 | return pd.concat(batch_results) 161 | 162 | -------------------------------------------------------------------------------- /simulations/exp_pettijohn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from models.sem import SEM, clear_sem 4 | from sklearn.metrics import adjusted_rand_score 5 | from models.memory import reconstruction_accuracy, evaluate_seg 6 | from models.memory import gibbs_memory_sampler 7 | 8 | 9 | def generate_task(n=20, d=25): 10 | items = np.random.randn(n, d) 11 | 12 | no_switch_context = np.tile(np.random.randn(1, d), (n, 1)) 13 | switch_context = np.concatenate([np.tile(np.random.randn(1, d), (n / 2, 1)), 14 | np.tile(np.random.randn(1, d), (n / 2, 1))], axis=0) 15 | 16 | x_noswitch = items + no_switch_context 17 | x_switch = items + switch_context 18 | 19 | x_noswitch /= np.sqrt(d) 20 | x_switch /= np.sqrt(d) 21 | 22 | # break the stimuli into two lists for one set of stim and one list for the other 23 | x_list_no_switch = [x_noswitch] 24 | x_list_switch = [x_switch[:n / 2, :], x_switch[n / 2:, :]] 25 | return x_list_no_switch, x_list_switch 26 | 27 | 28 | def evaluate_bound_acc(y_samples, y_mem): 29 | acc = [] 30 | for y_sample in y_samples: 31 | # 32 | def item_acc(t): 33 | return np.float(any([all(yt[0] == y_mem[t][0]) for yt in y_sample if yt != None])) 34 | 35 | # evaluate the accuracy of the boundary items (here, items 10 and 11) 36 | acc.append(np.mean([item_acc(t) for t in [10, 11]])) 37 | return np.mean(acc) 38 | 39 | 40 | def evaluate_non_bound_acc(y_samples, y_mem): 41 | acc = [] 42 | for y_sample in y_samples: 43 | def item_acc(t): 44 | return np.float(any([all(yt[0] == y_mem[t][0]) for yt in y_sample if yt != None])) 45 | 46 | # evaluate the accuracy of the boundary items (here, items 10 and 11) 47 | acc.append(np.mean([item_acc(t) for t in range(20) if (t != 10) & (t != 11)])) 48 | return np.mean(acc) 49 | 50 | 51 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e, batch_number=0): 52 | 53 | x_list_no_switch, x_list_switch = generate_task() 54 | n, d = np.concatenate(x_list_switch).shape 55 | 56 | # run through with the switch condition 57 | sem_switch = SEM(**sem_kwargs) 58 | sem_switch.run_w_boundaries(list_events=x_list_switch, leave_progress_bar=False) 59 | 60 | # create the corrupted memory traces 61 | y_mem_switch = list() # these are list, not sets, for hashability 62 | y_mem_noswitch = list() # these are list, not sets, for hashability 63 | 64 | for t in range(n): 65 | # n.b. python uses stdev, not var 66 | x_mem = x_list_switch[t / 10][t % 10, :] + np.random.normal(scale=gibbs_kwargs['tau'] ** 0.5, size=d) 67 | e_mem = [None, sem_switch.event_models.keys()[t / (n / 2)]][np.random.rand() < epsilon_e] 68 | t_mem = t + np.random.randint(-gibbs_kwargs['b'], gibbs_kwargs['b'] + 1) 69 | y_mem_switch.append([x_mem, e_mem, t_mem]) 70 | 71 | # do the no-switch condition ahead of time 72 | e_mem = [None, 0][np.random.rand() < epsilon_e] 73 | y_mem_noswitch.append([x_mem, e_mem, t_mem]) 74 | 75 | # sample from memory 76 | gibbs_kwargs['y_mem'] = y_mem_switch 77 | gibbs_kwargs['sem_model'] = sem_switch 78 | y_samples, e_samples, _ = gibbs_memory_sampler(**gibbs_kwargs) 79 | 80 | results = pd.DataFrame({ 81 | 'Condition': 'Shift', 82 | 'r2': adjusted_rand_score(sem_switch.results.e_hat, np.array([0, 1])), 83 | 'Reconstruction Segementation': evaluate_seg(e_samples, np.concatenate([[e0] * 10 for e0 in sem_switch.event_models])), 84 | 'Overall Acc': reconstruction_accuracy(y_samples, y_mem_switch).mean(), 85 | 'Non-boundary Acc': evaluate_bound_acc(y_samples, y_mem_switch), 86 | 'Boundary Acc': evaluate_non_bound_acc(y_samples, y_mem_switch), 87 | 'Batch': [batch_number], 88 | }, index=[batch_number]) 89 | clear_sem(sem_switch) 90 | sem_switch = None 91 | 92 | # run through with the no-switch condition 93 | sem_no_switch = SEM(**sem_kwargs) 94 | sem_no_switch.run_w_boundaries(list_events=x_list_no_switch, leave_progress_bar=False) 95 | 96 | gibbs_kwargs['y_mem'] = y_mem_noswitch 97 | gibbs_kwargs['sem_model'] = sem_no_switch 98 | y_samples, e_samples, x_samples = gibbs_memory_sampler(**gibbs_kwargs) 99 | 100 | results = pd.concat([results, pd.DataFrame({ 101 | 'Condition': 'No-Shift', 102 | 'Overall Acc': reconstruction_accuracy(y_samples, y_mem_noswitch).mean(), 103 | 'Non-boundary Acc': evaluate_bound_acc(y_samples, y_mem_noswitch), 104 | 'Boundary Acc': evaluate_non_bound_acc(y_samples, y_mem_noswitch), 105 | 'Batch': [batch_number], 106 | }, index=[batch_number])], sort=True) 107 | clear_sem(sem_no_switch) 108 | sem_no_switch = None 109 | 110 | return results -------------------------------------------------------------------------------- /simulations/exp_radvansky.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import sys 4 | sys.path.append('./') 5 | sys.path.append('../') 6 | from opt import encode 7 | import pandas as pd 8 | from models.memory import reconstruction_accuracy, evaluate_seg 9 | from models.memory import gibbs_memory_sampler 10 | from scipy.special import logsumexp 11 | from sklearn.preprocessing import normalize 12 | from models.sem import clear_sem, SEM 13 | 14 | 15 | def make_task(d=25, n_rooms=15): 16 | # note: in the experiment there were 66 events and 51 probes 17 | verbs = {v: np.random.randn(1, d) / np.sqrt(d) for v in 'enter put_down pick_up leave'.split()} 18 | objects_a = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)} 19 | objects_b = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)} 20 | ctx = {ii: np.random.randn(1, d) / np.sqrt(d) for ii in range(n_rooms)} 21 | 22 | # to control the variance of the embedded verbs, each is bound to the same null token if 23 | # the sentence has not object 24 | null_token = np.random.randn(1, d) / np.sqrt(d) 25 | 26 | list_events = [] 27 | 28 | list_objects = [] 29 | for ii in range(n_rooms): 30 | event = np.tile(ctx[ii], (4, 1)) 31 | event += np.concatenate([ 32 | verbs['enter'], 33 | objects_a[ii], 34 | objects_b[ii], 35 | verbs['leave'], 36 | ]) 37 | list_events.append(event) 38 | list_objects.append([objects_a[ii], objects_b[ii]]) 39 | 40 | return list_events, list_objects 41 | 42 | 43 | 44 | def batch(sem_kwargs, gibbs_kwargs, epsilon_e_switch=0.25, epsilon_e_noswitch=0.75, 45 | gamma=2.5, n_rooms=25, progress_bar=True): 46 | 47 | sem_model = SEM(**sem_kwargs) 48 | _gibbs_kwargs = {k: v for k, v in gibbs_kwargs.iteritems() if k != 'e_true'} 49 | 50 | acc = [] 51 | list_events, list_objects = make_task(n_rooms=n_rooms) 52 | sem_model.init_for_boundaries(list_events) 53 | 54 | if progress_bar: 55 | def my_it(iterator): 56 | return tqdm(iterator, desc='Run SEM', leave=False, total=len(list_events)) 57 | else: 58 | def my_it(iterator): 59 | return iterator 60 | 61 | y_mem_switch = list() 62 | for itt, x in my_it(enumerate(list_events)): 63 | 64 | sem_model.update_single_event(x) 65 | n_items, d = np.shape(x) 66 | e_list = np.concatenate([[sem_model.results.e_hat[itt]] * n_items for t in range(n_rooms)]) 67 | 68 | 69 | # create a corrupted memory trace for the switch condition 70 | y_mem_noswitch = [yi for yi in y_mem_switch] 71 | for t in range(n_items): 72 | x_mem = x[t, :] + np.random.normal(scale= _gibbs_kwargs['tau'] ** 0.5, size=d) # note, python uses stdev, not var 73 | e_mem = [None, sem_model.results.e_hat[-1]][np.random.rand() < epsilon_e_switch] 74 | t_mem = t + np.random.randint(-_gibbs_kwargs['b'], _gibbs_kwargs['b'] + 1) 75 | y_mem_switch.append([x_mem, e_mem, t_mem]) 76 | 77 | # for the no-switch condition 78 | e_mem = [None, sem_model.results.e_hat[-1]][np.random.rand() < epsilon_e_noswitch] 79 | y_mem_noswitch.append([x_mem, e_mem, t_mem]) 80 | 81 | # for speed, just reconstruct the past 3 events at max 82 | if len(y_mem_switch) > 3 * 2: 83 | y_mem_switch = y_mem_switch[-6:] 84 | y_mem_noswitch = y_mem_noswitch[-6:] 85 | e_list = e_list[-6:] 86 | 87 | # reconstruct (Switch) 88 | _gibbs_kwargs['y_mem'] = y_mem_switch 89 | _gibbs_kwargs['sem_model'] = sem_model 90 | y_samples, e_samples, x_samples = gibbs_memory_sampler(**_gibbs_kwargs) 91 | x_samples = np.array(x_samples) 92 | 93 | item_acc = reconstruction_accuracy(y_samples=y_samples, y_mem=y_mem_switch) 94 | 95 | # evaluate the probability of the associated vs dissociated items 96 | obj_a, obj_b = list_objects[itt] 97 | x_samples_ii = np.reshape(x_samples[:, -2:, :], (-1, d)) 98 | p_a_greater_than_b = \ 99 | -logsumexp(-np.linalg.norm(x_samples_ii - obj_a, axis=1) * gamma) < \ 100 | -logsumexp(-np.linalg.norm(x_samples_ii - obj_b, axis=1) * gamma) 101 | 102 | # use the correct scoring method 103 | acc.append({ 104 | 'Room Number': itt, 105 | 'Condition': 'Switch', 106 | 'Reconstruction Accuracy': item_acc.mean(), 107 | 'Last Room Reconstruction Acc': item_acc[-2:].mean(), 108 | 'Pr(A > B)': p_a_greater_than_b, 109 | 'Reconstruction Segementation': evaluate_seg(e_samples, e_list), 110 | }) 111 | 112 | # clear things from memory 113 | y_samples, e_samples, x_samples = None, None, None 114 | 115 | # reconstruct (No-Switch) 116 | _gibbs_kwargs['y_mem'] = y_mem_noswitch 117 | y_samples, e_samples, x_samples = gibbs_memory_sampler(**_gibbs_kwargs) 118 | x_samples = np.array(x_samples) 119 | item_acc = reconstruction_accuracy(y_samples=y_samples, y_mem=y_mem_noswitch) 120 | 121 | # evaluate the probability of the associated vs dissociated items 122 | obj_a, obj_b = list_objects[itt] 123 | x_samples_ii = np.reshape(x_samples[:, -2:, :], (-1, d)) 124 | p_a_greater_than_b = \ 125 | -logsumexp(-np.linalg.norm(x_samples_ii - obj_a, axis=1) * gamma) < \ 126 | -logsumexp(-np.linalg.norm(x_samples_ii - obj_b, axis=1) * gamma) 127 | 128 | # use the correct scoring method 129 | acc.append({ 130 | 'Room Number': itt, 131 | 'Condition': 'No-Switch', 132 | 'Last Room Reconstruction Acc': item_acc[-2:].mean(), 133 | 'Reconstruction Accuracy': item_acc.mean(), 134 | 'Pr(A > B)': p_a_greater_than_b, 135 | 'Reconstruction Segementation': evaluate_seg(e_samples, e_list), 136 | }) 137 | # clear things from memory 138 | y_samples, e_samples, x_samples = None, None, None 139 | 140 | # clear SEM from memory 141 | clear_sem(sem_model) 142 | sem_model = None 143 | 144 | return pd.DataFrame(acc) 145 | 146 | 147 | if __name__ == "__main__": 148 | pass -------------------------------------------------------------------------------- /simulations/exp_schapiro.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models import SEM, clear_sem 3 | from sklearn import metrics 4 | import pandas as pd 5 | from scipy.special import logsumexp 6 | 7 | def logsumexp_mean(x): 8 | return logsumexp(x) - np.log(len(x)) 9 | 10 | def batch_experiment(sem_kwargs, n_train=1400, n_test=600, progress_bar=True): 11 | 12 | # define the graph structure for the experiment 13 | 14 | g = np.array([ 15 | [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 16 | [1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 17 | [1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 18 | [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 19 | [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 20 | [0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], 21 | [0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0], 22 | [0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0], 23 | [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0], 24 | [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0], 25 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0], 26 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1], 27 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1], 28 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1], 29 | [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0], 30 | ], dtype=float) 31 | 32 | # define the random vectors 33 | d = 25 34 | items = np.random.randn(15, d) / np.sqrt(d) 35 | 36 | # draw random walks on the graph 37 | def sample_pmf(pmf): 38 | return np.sum(np.cumsum(pmf) < np.random.uniform(0, 1)) 39 | 40 | train_nodes = [np.random.randint(15)] 41 | for _ in range(n_train-1): 42 | train_nodes.append(sample_pmf(g[train_nodes[-1]] / g[train_nodes[-1]].sum())) 43 | 44 | # draw hamiltonian paths from the graph 45 | 46 | # this graph defines the same thing but a preference order as well 47 | # higher number are c 48 | preferred_nodes = np.array([ 49 | [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0], 50 | ], dtype=float) 51 | 52 | def sample_hamilton(node0): 53 | is_visited = np.zeros(15, dtype=bool) 54 | counter = 0 55 | nodes = [] 56 | while counter < (len(is_visited)): 57 | p = g[node0] * ~is_visited * preferred_nodes 58 | if np.sum(p) == 0: 59 | p = g[node0] * ~is_visited 60 | 61 | node0 = sample_pmf(p / np.sum(p)) 62 | nodes.append(node0) 63 | is_visited[node0] = True 64 | counter += 1 65 | return nodes 66 | 67 | test_nodes = [] 68 | node0 = np.random.randint(15) 69 | for _ in range(n_test / 15): 70 | test_nodes += sample_hamilton(node0) 71 | node0 = test_nodes[-1] 72 | 73 | # embed the vectors 74 | all_nodes = train_nodes + test_nodes 75 | x = [] 76 | for node in all_nodes: 77 | x.append(items[node]) 78 | x = np.array(x) 79 | 80 | sem_model = SEM(**sem_kwargs) 81 | sem_model.run(x, progress_bar=progress_bar) 82 | 83 | 84 | # prepared diagnostic measures 85 | clusters = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] 86 | node_cluster = [] 87 | for node in test_nodes: 88 | node_cluster.append(clusters[node]) 89 | node_cluster = np.array(node_cluster) 90 | 91 | all_node_cluster = [] 92 | for node in all_nodes: 93 | all_node_cluster.append(clusters[node]) 94 | all_node_cluster = np.array(all_node_cluster) 95 | all_boundaries_true = np.concatenate([[False], (all_node_cluster[1:] != all_node_cluster[:-1])]) 96 | 97 | test_boundaries = sem_model.results.e_hat[n_train-1:-1] != sem_model.results.e_hat[n_train:] 98 | boundaries = sem_model.results.e_hat[:n_train-1] != sem_model.results.e_hat[1:n_train] 99 | 100 | test_bound_prob = sem_model.results.log_boundary_probability[n_train:] 101 | bound_prob = sem_model.results.log_boundary_probability[1:n_train] 102 | 103 | # pull the prediction error (Bayesian Suprise) 104 | 105 | test_pe = sem_model.results.surprise[n_train:] 106 | bound_pe = sem_model.results.surprise[1:n_train] 107 | 108 | # cache the correlation between log boundary probability and log surprise 109 | r = np.corrcoef( 110 | sem_model.results.log_boundary_probability, sem_model.results.surprise 111 | )[0][1] 112 | 113 | 114 | output = { 115 | 'Community Transitions (Hamilton)': np.exp(logsumexp_mean(test_bound_prob[all_boundaries_true[1400:]])), 116 | 'Other Parse (Hamilton)': np.exp(logsumexp_mean(test_bound_prob[all_boundaries_true[1400:]==False])), 117 | 'Community Transitions (All Other Trials)': np.exp(logsumexp_mean(bound_prob[all_boundaries_true[1:n_train]])), 118 | 'Other Parse (All Other Trials)': np.exp(logsumexp_mean(bound_prob[all_boundaries_true[1:n_train]==False])), 119 | 'PE Community Transitions (Hamilton)': logsumexp_mean(test_pe[all_boundaries_true[1400:]]), 120 | 'PE Other Parse (Hamilton)': logsumexp_mean(test_pe[all_boundaries_true[1400:]==False]), 121 | 'PE Community Transitions (All Other Trials)': logsumexp_mean(bound_pe[all_boundaries_true[1:n_train]]), 122 | 'PE Other Parse (All Other Trials)': logsumexp_mean(bound_pe[all_boundaries_true[1:n_train]==False]), 123 | 'r':r 124 | } 125 | 126 | # clear_sem_model 127 | clear_sem(sem_model) 128 | sem_model = None 129 | 130 | return output 131 | -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_param_sensitivity.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_param_sensitivity.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_0.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_1.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_10.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_10.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_11.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_11.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_12.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_12.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_13.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_13.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_14.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_14.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_15.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_15.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_16.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_16.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_17.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_17.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_18.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_18.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_19.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_19.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_2.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_20.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_20.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_21.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_21.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_22.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_22.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_23.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_23.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_24.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_24.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_3.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_4.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_5.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_6.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_6.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_7.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_7.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_8.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_8.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/Dubrow_sim_9.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/Dubrow_sim_9.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/EventR2_GRU_comp_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/EventR2_GRU_summary_df0_10.0_scale0_0.06_l2_0.0_do_0.5.pkl -------------------------------------------------------------------------------- /simulations/saved_simulations/radvansky_sims.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ProjectSEM/SEM/0db00e38ad9156dd9583ae5f7d063fdc9c33da0a/simulations/saved_simulations/radvansky_sims.pkl -------------------------------------------------------------------------------- /simulations/video_segmentation.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import seaborn as sns 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from models import SEM, GRUEvent, clear_sem 8 | from scipy.stats import multivariate_normal 9 | from scipy.special import logsumexp 10 | 11 | 12 | def segment_video(event_sequence, sem_kwargs): 13 | """ 14 | :param event_sequence: (NxD np.array) the sequence of N event vectors in D dimensions 15 | :param sem_kwargs: (dict) all of the parameters for SEM 16 | :return: 17 | """ 18 | sem_model = SEM(**sem_kwargs) 19 | sem_model.run(event_sequence, k=event_sequence.shape[0], leave_progress_bar=True) 20 | log_posterior = sem_model.results.log_like + sem_model.results.log_prior 21 | 22 | # clean up memory 23 | clear_sem(sem_model) 24 | sem_model = None 25 | 26 | return log_posterior 27 | 28 | def bin_times(array, max_seconds, bin_size=1.0): 29 | """ Helper function to learn the bin the subject data""" 30 | cumulative_binned = [np.sum(array <= t0 * 1000) for t0 in np.arange(bin_size, max_seconds + bin_size, bin_size)] 31 | binned = np.array(cumulative_binned)[1:] - np.array(cumulative_binned)[:-1] 32 | binned = np.concatenate([[cumulative_binned[0]], binned]) 33 | return binned 34 | 35 | def load_comparison_data(data, bin_size=1.0): 36 | 37 | # Movie A is Saxaphone (185s long) 38 | # Movie B is making a bed (336s long) 39 | # Movie C is doing dishes (255s long) 40 | 41 | # here, we'll collapse over all of the groups (old, young; warned, unwarned) for now 42 | n_subjs = len(set(data.SubjNum)) 43 | 44 | sax_times = np.sort(list(set(data.loc[data.Movie == 'A', 'MS']))).astype(np.float32) 45 | binned_sax = bin_times(sax_times, 185, bin_size) / np.float(n_subjs) 46 | 47 | bed_times = np.sort(list(set(data.loc[data.Movie == 'B', 'MS']))).astype(np.float32) 48 | binned_bed = bin_times(bed_times, 336, bin_size) / np.float(n_subjs) 49 | 50 | dishes_times = np.sort(list(set(data.loc[data.Movie == 'C', 'MS']))).astype(np.float32) 51 | binned_dishes = bin_times(dishes_times, 255, bin_size) / np.float(n_subjs) 52 | 53 | return binned_sax, binned_bed, binned_dishes 54 | 55 | def get_binned_boundary_prop(e_hat, log_post, bin_size=1.0, frequency=30.0): 56 | """ 57 | :param results: SEM.Results 58 | :param bin_size: seconds 59 | :param frequency: in Hz 60 | :return: 61 | """ 62 | 63 | # normalize 64 | log_post0 = log_post - np.tile(np.max(log_post, axis=1).reshape(-1, 1), (1, log_post.shape[1])) 65 | log_post0 -= np.tile(logsumexp(log_post0, axis=1).reshape(-1, 1), (1, log_post.shape[1])) 66 | 67 | boundary_probability = [0] 68 | for ii in range(1, log_post0.shape[0]): 69 | idx = range(log_post0.shape[0]) 70 | idx.remove(e_hat[ii - 1]) 71 | boundary_probability.append(logsumexp(log_post0[ii, idx])) 72 | boundary_probability = np.array(boundary_probability) 73 | 74 | frame_time = np.arange(1, len(boundary_probability) + 1) / float(frequency) 75 | 76 | index = np.arange(0, np.max(frame_time), bin_size) 77 | boundary_probability_binned = [] 78 | for t in index: 79 | boundary_probability_binned.append( 80 | # note: this operation is equivalent to the log of the average boundary probability in the window 81 | logsumexp(boundary_probability[(frame_time >= t) & (frame_time < (t + bin_size))]) - \ 82 | np.log(bin_size * 30.) 83 | ) 84 | boundary_probability_binned = pd.Series(boundary_probability_binned, index=index) 85 | return boundary_probability_binned 86 | 87 | def get_binned_boundaries(e_hat, bin_size=1.0, frequency=30.0): 88 | """ get the binned boundaries from the model""" 89 | 90 | frame_time = np.arange(1, len(e_hat) + 1) / float(frequency) 91 | index = np.arange(0, np.max(frame_time), bin_size) 92 | 93 | boundaries = np.concatenate([[0], e_hat[1:] !=e_hat[:-1]]) 94 | 95 | boundaries_binned = [] 96 | for t in index: 97 | boundaries_binned.append(np.sum( 98 | boundaries[(frame_time >= t) & (frame_time < (t + bin_size))] 99 | )) 100 | return np.array(boundaries_binned, dtype=bool) 101 | 102 | def get_point_biserial(boundaries_binned, binned_comp): 103 | 104 | 105 | M_1 = np.mean(binned_comp[boundaries_binned == 1]) 106 | M_0 = np.mean(binned_comp[boundaries_binned == 0]) 107 | 108 | n_1 = np.sum(boundaries_binned == 1) 109 | n_0 = np.sum(boundaries_binned == 0) 110 | n = n_1 + n_0 111 | 112 | s = np.std(binned_comp) 113 | r_pb = (M_1 - M_0) / s * np.sqrt(n_1 * n_0 / (float(n)**2)) 114 | return r_pb 115 | 116 | 117 | def get_subjs_rpb(data, bin_size=1.0): 118 | """get the distribution of subjects' point bi-serial correlation coeffs""" 119 | grouped_data = np.concatenate(load_comparison_data(data)) 120 | 121 | r_pbs = [] 122 | 123 | for sj in set(data.SubjNum): 124 | _binned_sax = bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'A'), 'MS'], 185, 1.0) 125 | _binned_bed = bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'B'), 'MS'], 336, 1.0) 126 | _binned_dishes = bin_times(data.loc[(data.SubjNum == sj) & (data.Movie == 'C'), 'MS'], 255, 1.0) 127 | subs = np.concatenate([_binned_sax, _binned_bed, _binned_dishes]) 128 | 129 | r_pbs.append(get_point_biserial(subs, grouped_data)) 130 | return r_pbs 131 | 132 | def plot_boundaries(binned_subj_data, binned_model_bounds, label, batch=0): 133 | 134 | # boundaries = get_binned_boundaries(log_poseterior) 135 | # boundaries = binned_model_bounds 136 | 137 | plt.figure(figsize=(4.5, 2.0)) 138 | plt.plot(binned_subj_data, label='Subject Boundaries') 139 | plt.xlabel('Time (seconds)') 140 | plt.ylabel('Boundary Probability') 141 | 142 | b = np.arange(len(binned_model_bounds))[binned_model_bounds][0] 143 | plt.plot([b, b], [0, 1], 'k:', label='Model Boundary', alpha=0.75) 144 | for b in np.arange(len(binned_model_bounds))[binned_model_bounds][1:]: 145 | plt.plot([b, b], [0, 1], 'k:', alpha=0.75) 146 | 147 | plt.legend(loc='upper right', framealpha=1.0) 148 | plt.ylim([0, 0.6]) 149 | plt.title('"' + label + '"') 150 | 151 | sns.despine() 152 | plt.savefig('video_segmentation_{}_batch_{}.png'.format(label.replace(" ", ""), batch), 153 | dpi=600, bbox_inches='tight') 154 | 155 | 156 | def convert_type_token(event_types): 157 | tokens = [0] 158 | for ii in range(len(event_types)-1): 159 | if event_types[ii] == event_types[ii+1]: 160 | tokens.append(tokens[-1]) 161 | else: 162 | tokens.append(tokens[-1] + 1) 163 | return tokens 164 | 165 | def get_event_duration(event_types, frequency=30): 166 | tokens = convert_type_token(event_types) 167 | n_tokens = np.max(tokens)+1 168 | lens = [] 169 | for ii in range(n_tokens): 170 | lens.append(np.sum(np.array(tokens) == ii)) 171 | return np.array(lens, dtype=float) / frequency 172 | 173 | 174 | def run_batch(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, batch=0, bin_size=1.0): 175 | Z = np.load(embedded_data_path) 176 | 177 | # the "Sax" movie is from time slices 0 to 5537 178 | sax = Z[0:5537, :] 179 | bed = Z[5537:5537 + 10071, :] 180 | dishes = Z[5537 + 10071: 5537 + 10071 + 7633, :] 181 | 182 | # remove the first three seconds of the sax video for clean up 183 | sax = sax[3*30:, :] 184 | 185 | # divide each of the videos by the average norm such that they are, in expectation, unit length 186 | sax /= np.mean(np.linalg.norm(sax, axis=1)) 187 | bed /= np.mean(np.linalg.norm(bed, axis=1)) 188 | dishes /= np.mean(np.linalg.norm(dishes, axis=1)) 189 | 190 | # Z[0:5537, :] = sax 191 | # Z[5537:5537 + 10071, :] = bed 192 | # Z[5537 + 10071: 5537 + 10071 + 7633, :] = dishes 193 | 194 | # calibrate prior 195 | mode = f_opts['var_df0'] * f_opts['var_scale0'] / (f_opts['var_df0'] + 2) 196 | f_opts['prior_log_prob'] = multivariate_normal.logpdf( 197 | np.mean(Z, axis=0), mean=np.zeros(Z.shape[1]), cov=np.eye(Z.shape[1]) * mode 198 | ) 199 | 200 | sem_kwargs = { 201 | 'lmda': lmda, # Stickyness (prior) 202 | 'alfa': alfa, # Concentration parameter (prior) 203 | 'f_class': f_class, 204 | 'f_opts': f_opts 205 | } 206 | 207 | sax_log_post = segment_video(sax, sem_kwargs) 208 | bed_log_post = segment_video(bed, sem_kwargs) 209 | dis_log_post = segment_video(dishes, sem_kwargs) 210 | 211 | e_hat_sax = np.argmax(sax_log_post, axis=1) 212 | e_hat_bed = np.argmax(bed_log_post, axis=1) 213 | e_hat_dis = np.argmax(dis_log_post, axis=1) 214 | 215 | binned_sax_bounds = get_binned_boundaries(e_hat_sax, bin_size=bin_size) 216 | binned_bed_bounds = get_binned_boundaries(e_hat_bed, bin_size=bin_size) 217 | binned_dis_bounds = get_binned_boundaries(e_hat_dis, bin_size=bin_size) 218 | 219 | binned_sax_log_post = get_binned_boundary_prop(e_hat_sax, sax_log_post, bin_size=bin_size) 220 | binned_bed_log_post = get_binned_boundary_prop(e_hat_bed, bed_log_post, bin_size=bin_size) 221 | binned_dis_log_post = get_binned_boundary_prop(e_hat_dis, dis_log_post, bin_size=bin_size) 222 | 223 | # pull the subject data for comparions 224 | data = pd.read_csv(human_data_path, delimiter='\t') 225 | binned_sax_subj, binned_bed_subj, binned_dis_subj = load_comparison_data(data) 226 | 227 | # remove the first three seconds of the sax video 228 | binned_sax_subj = binned_sax_subj[3:] 229 | 230 | # save the plots 231 | plot_boundaries(binned_sax_subj, binned_sax_bounds, "Cleaning Saxophone", batch=batch) 232 | plot_boundaries(binned_bed_subj, binned_bed_bounds, "Making a Bed", batch=batch) 233 | plot_boundaries(binned_dis_subj, binned_dis_bounds, 'Washing Dishes', batch=batch) 234 | 235 | # concatenate all of the data to caluclate the r2 values 236 | binned_subj_bound_freq = np.concatenate([binned_sax_subj, binned_bed_subj, binned_dis_subj]) 237 | binned_model_prob = np.concatenate([binned_sax_log_post, binned_bed_log_post, binned_dis_log_post]) 238 | r2 = np.corrcoef(binned_subj_bound_freq, binned_model_prob)[0][1] ** 2 239 | 240 | # calculate the point-biserial correlation 241 | binned_bounds = np.concatenate([binned_sax_bounds, binned_bed_bounds, binned_dis_bounds]) 242 | r_pb = get_point_biserial(binned_bounds, binned_subj_bound_freq) 243 | 244 | # pull the average duration of the events 245 | sax_duration = np.mean(get_event_duration(binned_sax_log_post)) 246 | bed_duration = np.mean(get_event_duration(binned_bed_log_post)) 247 | dis_duration = np.mean(get_event_duration(binned_dis_log_post)) 248 | 249 | # create a data frame with the model's MAP boundaries, boundary log-probabilities and 250 | # human boundary frequencies for later permutation testing 251 | comp_data = { 252 | 'MAP-Boundaries': binned_bounds, 253 | 'Boundary-LogProb': binned_model_prob, 254 | 'Human Boundary Freq': binned_subj_bound_freq, 255 | 'Video': ['Sax'] * len(binned_sax_subj) + ['Bed'] * len(binned_bed_subj) + ['Dishes'] * len(binned_dis_subj), 256 | 't': range(len(binned_sax_subj)) + range(len(binned_bed_subj)) + range(len(binned_dis_subj)) 257 | } 258 | 259 | # and summary data as well 260 | summary_data = { 261 | 'Bin Size': bin_size, 262 | 'Event Length (Sax)': sax_duration, 263 | 'Event Length (Bed)': bed_duration, 264 | 'Event Length (Dishes)': dis_duration, 265 | 'Model r2': r2, 266 | 'Model rpb': r_pb, 267 | 'Batch': batch 268 | } 269 | 270 | return summary_data, comp_data 271 | 272 | def main(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, output_tag='', n_batch=25): 273 | 274 | args = [embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts] 275 | 276 | summary = [] 277 | comp_data = [] 278 | for batch in range(n_batch): 279 | summary_stats, _comp_data = run_batch(*args, batch=batch) 280 | summary.append(summary_stats) 281 | pd.DataFrame(summary).to_pickle('simulations/saved_simulations/EventR2_GRU_summary' + output_tag + '.pkl') 282 | 283 | _comp_data['Batch'] = [batch] * len(_comp_data['t']) 284 | comp_data.append(pd.DataFrame(_comp_data)) 285 | pd.DataFrame(comp_data).to_pickle('simulations/saved_simulations/EventR2_GRU_comp' + output_tag + '.pkl') 286 | 287 | return 288 | 289 | 290 | 291 | 292 | 293 | if __name__ == "__main__": 294 | import os 295 | 296 | os.chdir('../') 297 | 298 | embedded_data_path = 'data/videodata/video_color_Z_embedded_64_5epoch.npy' 299 | human_data_path = './data/zachs2006_data021011.dat' 300 | 301 | f_class = GRUEvent 302 | 303 | f_opts=dict( 304 | var_df0=10., 305 | var_scale0=0.06, 306 | l2_regularization=0.0, 307 | dropout=0.5, 308 | n_epochs=10, 309 | t=4 310 | ) 311 | 312 | lmda = 10**4 313 | alfa = 10**-1 314 | 315 | output_tag = '_df0_{}_scale0_{}_l2_{}_do_{}'.format( 316 | f_opts['var_df0'], f_opts['var_scale0'], f_opts['l2_regularization'], 317 | f_opts['dropout'] 318 | ) 319 | 320 | main(embedded_data_path, human_data_path, lmda, alfa, f_class, f_opts, output_tag, n_batch=25) 321 | 322 | --------------------------------------------------------------------------------