├── datasets ├── __init__.py ├── VCLA_GAZE │ ├── __init__.py │ ├── finetune │ │ ├── __init__.py │ │ ├── vcla_gaze_finetune.py │ │ └── model.py │ ├── vcla_gaze_config.py │ ├── metadata.py │ ├── vcla_gaze_prior.py │ ├── vcla_gaze.py │ └── dataparser.py ├── CAD │ ├── __init__.py │ ├── finetune │ │ ├── __init__.py │ │ ├── model.py │ │ ├── cad_finetune.py │ │ └── parse_features.py │ ├── cad_config.py │ ├── metadata.py │ ├── cad.py │ └── dataparser.py ├── WNP │ ├── __init__.py │ ├── wnp_config.py │ ├── metadata.py │ └── wnp.py ├── Breakfast │ ├── __init__.py │ ├── breakfast_config.py │ ├── metadata.py │ ├── breakfast.py │ └── dataparser.py └── helmert.py ├── models ├── __init__.py ├── parser │ ├── __init__.py │ ├── test.py │ └── GEP_old.py ├── MLP.py ├── BiLSTM.py ├── LSTM_pred.py ├── grammar_gen.py └── parsegraph.py ├── utils ├── __init__.py ├── qualitative.py ├── evalutils.py ├── plyutils.py ├── logutils.py └── vizutils.py ├── experiments ├── __init__.py ├── GEP │ ├── __init__.py │ ├── gep_ablation.py │ ├── gep_seg.py │ ├── gep_pred_parse_prediction.py │ ├── gep.py │ └── gep_pred_topdown.py ├── LSTM │ └── __init__.py ├── STAOG │ ├── __init__.py │ └── prob_utils.py └── exp_config.py ├── requirements.txt ├── gep_breakfast_det.sh ├── basemeta.py ├── config.py ├── breakfast_det.sh ├── cad_pred.sh ├── visualization ├── prediction_plot.py └── detection_plot.py └── README.md /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/2/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /utils/qualitative.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /datasets/CAD/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /datasets/WNP/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/2/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /models/parser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 2/15/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /datasets/Breakfast/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 4/16/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /experiments/GEP/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/11/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /experiments/LSTM/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /experiments/STAOG/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /datasets/CAD/finetune/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/8/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/finetune/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/30/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | scipy 4 | numpy 5 | matplotlib 6 | scikit-image 7 | tqdm 8 | opencv-python 9 | sklearn 10 | nltk 11 | seaborn 12 | pandas -------------------------------------------------------------------------------- /utils/evalutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/18/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import numpy as np 10 | 11 | def upsample(prediction, freq=10, length=None): 12 | upsampled_prediction = [i for i in prediction for _ in range(freq)] 13 | if length: 14 | if len(upsampled_prediction) > length: 15 | upsampled_prediction = upsampled_prediction[:length] 16 | return upsampled_prediction -------------------------------------------------------------------------------- /models/MLP.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Created on 11/5/19 4 | 5 | @author: Baoxiong Jia 6 | 7 | Description: 8 | 9 | """ 10 | 11 | import torch.nn as nn 12 | 13 | class MLP(nn.Module): 14 | def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.): 15 | super(MLP, self).__init__() 16 | self.linear1 = nn.Linear(input_size, 2 * hidden_size) 17 | self.linear2 = nn.Linear(2 * hidden_size, hidden_size) 18 | self.linear3 = nn.Linear(hidden_size, num_classes) 19 | self.dropout = nn.Dropout(p=dropout_rate) 20 | 21 | def forward(self, x): 22 | return self.linear3(self.dropout(self.linear2(self.linear1(x)))) -------------------------------------------------------------------------------- /gep_breakfast_det.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TRAINED_EPOCHS=$1 3 | 4 | LOG_PATH="/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/gep_results" 5 | 6 | subsample=("1" "2" "5" "10" "20" "50") 7 | batch_size=("20" "32" "32" "32" "32" "32") 8 | 9 | if [ ! -d ${LOG_PATH} ] 10 | then 11 | mkdir ${LOG_PATH} 12 | fi 13 | 14 | for subs in "${!subsample[@]}" 15 | do 16 | echo GEP_${subsample[$subs]}_b${batch_size[$subs]}_t${TRAINED_EPOCHS} 17 | python experiments/GEP/gep.py --task activity --dataset Breakfast --using_batch_size ${batch_size[$subs]} --subsample ${subsample[$subs]} --lr 1e-3 --lr_decay 0.8 --epochs 50 --trained_epochs ${TRAINED_EPOCHS} > ${LOG_PATH}/eval_s${subsample[$subs]}_b${batch_size[$subs]}_t${TRAINED_EPOCHS}.txt 18 | done 19 | -------------------------------------------------------------------------------- /basemeta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/18/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | class Metadata(object): 11 | def __init__(self): 12 | # list for constant strings 13 | self.activities = list() 14 | self.subactivities = list() 15 | self.actions = list() 16 | self.objects = list() 17 | self.affordances = list() 18 | 19 | # reverse index of strings 20 | self.activity_index = dict() 21 | self.subactivity_index = dict() 22 | self.action_index = dict() 23 | self.object_index = dict() 24 | self.affordance_index = dict() 25 | 26 | # Macro constant 27 | self.ACTIVITY_NUM = -1 28 | self.SUBACTIVITY_NUM = -1 29 | self.ACTION_NUM = -1 30 | self.OBJECT_NUM = -1 31 | self.AFFORDANCE_NUM = -1 32 | self.MAXIMUM_OBJ_VIDEO = -1 -------------------------------------------------------------------------------- /datasets/CAD/finetune/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import torch 11 | from datasets.CAD.metadata import CAD_METADATA 12 | metadata = CAD_METADATA() 13 | 14 | class TaskNet(torch.nn.Module): 15 | def __init__(self, feature_dim, task='affordance', hidden_dim=1500): 16 | super(TaskNet, self).__init__() 17 | if task == 'affordance': 18 | num_classes = metadata.AFFORDANCE_NUM 19 | else: 20 | num_classes = metadata.ACTION_NUM 21 | self.module = torch.nn.Sequential( 22 | torch.nn.Linear(feature_dim, 2 * hidden_dim), 23 | torch.nn.ReLU(), 24 | torch.nn.Linear(2 * hidden_dim, hidden_dim), 25 | ) 26 | self.fc = torch.nn.Linear(hidden_dim, num_classes) 27 | 28 | def forward(self, x): 29 | features = self.module(x) 30 | output = self.fc(x) 31 | return features, output -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/18/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | 12 | class Paths(object): 13 | 14 | def __init__(self): 15 | self.project_root = '/mnt/hdd/home/baoxiong/Projects/TPAMI2019' 16 | self.vcla_data_root = '/mnt/hdd/home/baoxiong/Datasets/VCLA/' 17 | self.wnp_root = '/mnt/hdd/home/baoxiong/Datasets/Watch-n-Patch/' 18 | self.cad_root = '/mnt/hdd/home/baoxiong/Datasets/CAD120/' 19 | self.breakfast_root = '/mnt/hdd/home/baoxiong/Datasets/Breakfast/' 20 | 21 | self.tmp_root = os.path.join(self.project_root, 'tmp') 22 | if not os.path.exists(self.tmp_root): 23 | os.makedirs(self.tmp_root) 24 | self.vis_root = os.path.join(self.project_root, 'vis') 25 | if not os.path.exists(self.vis_root): 26 | os.makedirs(self.vis_root) 27 | self.log_root = os.path.join(self.project_root, 'log') 28 | if not os.path.exists(self.log_root): 29 | os.makedirs(self.log_root) 30 | -------------------------------------------------------------------------------- /breakfast_det.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | EPOCHS=$1 3 | LOG_PATH="media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/nn_results" 4 | subsample=("1" "2" "5" "10" "20" "50" "100") 5 | batch_size=("20" "32" "32" "32" "32" "32" "32") 6 | training_epochs=("5" "10" "15" "20" "25" "30" "35" "40" "45" "50") 7 | 8 | if [ ! -d ${LOG_PATH} ] 9 | then 10 | mkdir ${LOG_PATH} 11 | fi 12 | 13 | for sub in "${!subsample[@]}" 14 | do 15 | python experiments/LSTM/detect.py --task activity --dataset Breakfast --batch_size ${batch_size[$sub]} --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --subsample ${subsample[$sub]} --save_interval 5 > ${LOG_PATH}/s${subsample[$sub]}_b${batch_size[$sub]}.txt 16 | done 17 | 18 | for sub in "${!subsample[@]}" 19 | do 20 | for trainepochs in "${training_epochs[@]}" 21 | do 22 | python experiments/LSTM/detect.py --task activity --dataset Breakfast --batch_size ${batch_size[$sub]} --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --subsample ${subsample[$sub]} --save_interval 5 --trained_epochs ${trainepochs} --eval True > ${LOG_PATH}/eval_s${subsample[$sub]}_b${batch_size[$sub]}_t${trainepochs}.txt 23 | done 24 | done 25 | -------------------------------------------------------------------------------- /cad_pred.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | EPOCHS=$1 3 | LOG_PATH="/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/log/nn_results" 4 | pred_duration=("15" "30" "45" "60" "75" "90" "105" "120" "135" "150") 5 | 6 | if [ ! -d ${LOG_PATH} ] 7 | then 8 | mkdir ${LOG_PATH} 9 | fi 10 | 11 | for pred in "${!pred_duration[@]}" 12 | do 13 | python experiments/LSTM/pred_baseline.py --task activity --dataset CAD --batch_size 1 --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} > ${LOG_PATH}/pred${pred_duration[$pred]}_train.txt 14 | done 15 | 16 | for pred in "${!pred_duration[@]}" 17 | do 18 | python experiments/LSTM/pred_baseline.py --task activity --dataset CAD --batch_size 1 --lr 5e-4 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} --eval True > ${LOG_PATH}/pred${pred_duration[$pred]}_eval.txt 19 | done 20 | 21 | for pred in "${!pred_duration[@]}" 22 | do 23 | python experiments/GEP/gep_pred_topdown.py --task activity --dataset CAD --batch_size 1 --lr 5e-4 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} --using_pred_duration ${pred_duration[$pred]} > ${LOG_PATH}/gep_pred${pred_duration[$pred]}_eval.txt 24 | done 25 | -------------------------------------------------------------------------------- /datasets/WNP/wnp_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: Watch-n-Patch dataset config 7 | No feature extraction, using kernel descriptor results 8 | 9 | """ 10 | import os 11 | import config 12 | 13 | class Paths(config.Paths): 14 | def __init__(self): 15 | super(Paths, self).__init__() 16 | self.data_root = self.wnp_root 17 | self.tmp_root = os.path.join(self.tmp_root, 'wnp') 18 | 19 | self.inter_root = os.path.join(self.tmp_root, 'intermediate') 20 | if not os.path.exists(self.inter_root): 21 | os.makedirs(self.inter_root) 22 | 23 | self.log_root = os.path.join(self.tmp_root, 'log') 24 | self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints') 25 | 26 | self.grammar_root = os.path.join(self.tmp_root, 'grammar') 27 | self.prior_root = os.path.join(self.tmp_root, 'prior') 28 | 29 | self.visualize_root = os.path.join(self.tmp_root, 'visualization') 30 | if not os.path.exists(self.visualize_root): 31 | os.makedirs(self.visualize_root) 32 | self.metadata_root = os.path.join(self.tmp_root, 'metadata') 33 | 34 | 35 | if __name__ == '__main__': 36 | a = Paths() -------------------------------------------------------------------------------- /models/BiLSTM.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/2/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import torch.nn as nn 10 | import torch.nn.utils.rnn as rnn_utils 11 | 12 | class BiLSTM(nn.Module): 13 | def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate=0.): 14 | super(BiLSTM, self).__init__() 15 | self.hidden_layer = hidden_size 16 | self.num_layers = num_layers 17 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False, bidirectional=True, dropout=dropout_rate) 18 | self.fc = nn.Linear(hidden_size * 2, num_classes) 19 | self.dropout = nn.Dropout(p=dropout_rate) 20 | 21 | def forward(self, features): 22 | # # # Initialize hidden states, 2 for bidirectional RNN 23 | # h0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device) 24 | # c0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device) 25 | 26 | # out, _ = self.lstm(features, (h0, c0)) 27 | packed = rnn_utils.pack_sequence(features) 28 | out, _ = self.lstm(features) 29 | out = self.dropout(out) 30 | out = self.fc(out) 31 | return out -------------------------------------------------------------------------------- /models/LSTM_pred.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/21/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.utils.rnn as rnn_utils 12 | 13 | 14 | class LSTM_Pred(nn.Module): 15 | def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate=0.): 16 | super(LSTM_Pred, self).__init__() 17 | self.hidden_layer = hidden_size 18 | self.num_layers = num_layers 19 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False, bidirectional=False, dropout=dropout_rate) 20 | self.fc = nn.Linear(hidden_size, num_classes) 21 | self.dropout = nn.Dropout(p=dropout_rate) 22 | 23 | def forward(self, features): 24 | # # # Initialize hidden states, 2 for bidirectional RNN 25 | # h0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device) 26 | # c0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device) 27 | 28 | # out, _ = self.lstm(features, (h0, c0)) 29 | packed = rnn_utils.pack_sequence(features) 30 | out, _ = self.lstm(features) 31 | out = self.dropout(out) 32 | out = self.fc(out) 33 | return out -------------------------------------------------------------------------------- /models/grammar_gen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/1/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | import subprocess 12 | import tempfile 13 | 14 | 15 | def main(): 16 | project_path = '/media/hdd/home/baoxiong/Projects' 17 | breakfast_path = os.path.join(project_path, 'TPAMI2019', 'tmp', 'breakfast') 18 | corpus_dir = os.path.join(breakfast_path, 'corpus') 19 | grammar_dir = os.path.join(breakfast_path, 'grammar') 20 | madios_path = os.path.join(project_path, 'Tools', 'madios', 'build', 'madios') 21 | 22 | eta = 1 23 | alpha = 0.1 24 | context_size = 2 25 | coverage = 0.5 26 | 27 | if not os.path.exists(grammar_dir): 28 | os.makedirs(grammar_dir) 29 | 30 | for f in os.listdir(corpus_dir): 31 | corpus_path = os.path.join(corpus_dir, f) 32 | grammar_path = os.path.splitext(os.path.join(grammar_dir, f))[0] + '.pcfg' 33 | cmd = '{} {} {} {} {} {}'.format(madios_path, corpus_path, eta, alpha, context_size, coverage) 34 | 35 | grammar = False 36 | with open(grammar_path, 'w') as grammar_file: 37 | for line in os.popen(cmd).readlines(): 38 | if grammar: 39 | if line.strip() != '': 40 | grammar_file.write(line) 41 | if line.startswith('Time'): 42 | grammar = True 43 | print('Finishing {}'.format(corpus_path)) 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/vcla_gaze_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/18/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | import config 12 | 13 | class Paths(config.Paths): 14 | """ 15 | Configuration of data paths 16 | data_root: root folder of all videos and annotations 17 | tmp_root: intermediate result for vcla_gaze dataset 18 | """ 19 | def __init__(self): 20 | super(Paths, self).__init__() 21 | self.data_root = self.vcla_data_root 22 | self.tmp_root = os.path.join(self.tmp_root, 'vcla_gaze') 23 | 24 | self.inter_root = os.path.join(self.tmp_root, 'intermediate') 25 | if not os.path.exists(self.inter_root): 26 | os.makedirs(self.inter_root) 27 | 28 | self.log_root = os.path.join(self.tmp_root, 'log') 29 | self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints') 30 | self.vis_root = os.path.join(self.vis_root, 'vcla_gaze') 31 | if not os.path.exists(self.vis_root): 32 | os.makedirs(self.vis_root) 33 | 34 | self.prior_root = os.path.join(self.tmp_root, 'prior') 35 | if not os.path.exists(self.prior_root): 36 | os.makedirs(self.prior_root) 37 | 38 | self.grammar_root = os.path.join(self.tmp_root, 'grammar') 39 | self.label_root = os.path.join(self.data_root, 'labels') 40 | self.metadata_root = os.path.join(self.label_root, 'metadata') 41 | self.anno_root =os.path.join(self.label_root, 'clean_annotations') 42 | self.img_root = os.path.join(self.data_root, 'images') 43 | self.bbox_root = os.path.join(self.label_root, 'ObjBbox') 44 | -------------------------------------------------------------------------------- /datasets/CAD/cad_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/7/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | import config 12 | 13 | class Paths(config.Paths): 14 | """ 15 | Configuration of data paths 16 | data_root: root folder of all videos and annotations 17 | tmp_root: intermediate result for vcla_gaze dataset 18 | """ 19 | def __init__(self): 20 | super(Paths, self).__init__() 21 | self.data_root = self.cad_root 22 | self.tmp_root = os.path.join(self.tmp_root, 'cad') 23 | 24 | self.inter_root = os.path.join(self.tmp_root, 'intermediate') 25 | if not os.path.exists(self.inter_root): 26 | os.makedirs(self.inter_root) 27 | 28 | self.log_root = os.path.join(self.tmp_root, 'log') 29 | self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints') 30 | self.vis_root = os.path.join(self.vis_root, 'cad') 31 | if not os.path.exists(self.vis_root): 32 | os.makedirs(self.vis_root) 33 | 34 | self.prior_root = os.path.join(self.tmp_root, 'prior') 35 | if not os.path.exists(self.prior_root): 36 | os.makedirs(self.prior_root) 37 | 38 | self.visualize_root = os.path.join(self.tmp_root, 'visualization') 39 | if not os.path.exists(self.visualize_root): 40 | os.makedirs(self.visualize_root) 41 | 42 | self.grammar_root = os.path.join(self.tmp_root, 'grammar') 43 | self.label_root = os.path.join(self.data_root, 'labels') 44 | self.metadata_root = os.path.join(self.label_root, 'metadata') 45 | self.anno_root =os.path.join(self.label_root, 'clean_annotations') 46 | self.img_root = os.path.join(self.data_root, 'images') 47 | self.bbox_root = os.path.join(self.label_root, 'ObjBbox') 48 | -------------------------------------------------------------------------------- /datasets/Breakfast/breakfast_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 4/20/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | 11 | import os 12 | import config 13 | 14 | class Paths(config.Paths): 15 | """ 16 | Configuration of data paths 17 | data_root: root folder of all videos and annotations 18 | tmp_root: intermediate result for vcla_gaze dataset 19 | """ 20 | def __init__(self): 21 | super(Paths, self).__init__() 22 | self.data_root = self.breakfast_root 23 | self.tmp_root = os.path.join(self.tmp_root, 'breakfast') 24 | 25 | self.inter_root = os.path.join(self.tmp_root, 'intermediate') 26 | if not os.path.exists(self.inter_root): 27 | os.makedirs(self.inter_root) 28 | 29 | self.log_root = os.path.join(self.tmp_root, 'log') 30 | self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints') 31 | self.vis_root = os.path.join(self.vis_root, 'breakfast') 32 | if not os.path.exists(self.vis_root): 33 | os.makedirs(self.vis_root) 34 | 35 | self.prior_root = os.path.join(self.tmp_root, 'prior') 36 | if not os.path.exists(self.prior_root): 37 | os.makedirs(self.prior_root) 38 | 39 | self.visualize_root = os.path.join(self.tmp_root, 'visualization') 40 | if not os.path.exists(self.visualize_root): 41 | os.makedirs(self.visualize_root) 42 | 43 | self.grammar_root = os.path.join(self.tmp_root, 'grammar') 44 | self.label_root = os.path.join(self.data_root, 'labels') 45 | self.metadata_root = os.path.join(self.label_root, 'metadata') 46 | self.anno_root =os.path.join(self.label_root, 'clean_annotations') 47 | self.img_root = os.path.join(self.data_root, 'images') 48 | self.bbox_root = os.path.join(self.label_root, 'ObjBbox') -------------------------------------------------------------------------------- /datasets/WNP/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | from basemeta import Metadata 10 | 11 | class WNP_METADATA(Metadata): 12 | def __init__(self): 13 | super(WNP_METADATA, self).__init__() 14 | 15 | self.activities = ['office', 'kitchen'] 16 | self.subactivities = [ 17 | 'null', 18 | 'fetch_from_fridge', 'put_back_to_fridge', 'prepare_food', 'microwaving', 'fetch_from_oven', 19 | 'pouring', 'drinking', 'leave_kitchen', 'fill_kettle', 'plug_in_kettle', 'move_kettle', 20 | 'reading', 'walking', 'leave_office', 'fetch_book', 'put_back_book', 'put_down_item', 21 | 'take_item', 'play_computer', 'turn_on_monitor', 'turn_off_monitor' 22 | ] 23 | self.actions = [ 24 | 'null', 25 | 'fetch_from_fridge', 'put_back_to_fridge', 'prepare_food', 'microwaving', 'fetch_from_oven', 26 | 'pouring', 'drinking', 'leave_kitchen', 'fill_kettle', 'plug_in_kettle', 'move_kettle', 27 | 'reading', 'walking', 'leave_office', 'fetch_book', 'put_back_book', 'put_down_item', 28 | 'take_item', 'play_computer', 'turn_on_monitor', 'turn_off_monitor' 29 | ] 30 | 31 | for a in self.activities: 32 | self.activity_index[a] = self.activities.index(a) 33 | 34 | for s in self.subactivities: 35 | self.subactivity_index[s] = self.subactivities.index(s) 36 | 37 | for a in self.actions: 38 | self.action_index[a] = self.actions.index(a) 39 | 40 | self.ACTIVITY_NUM = len(self.activities) 41 | self.SUBACTIVITY_NUM = len(self.subactivities) 42 | self.ACTION_NUM = len(self.actions) 43 | 44 | 45 | if __name__ == '__main__': 46 | metadata = WNP_METADATA() -------------------------------------------------------------------------------- /visualization/prediction_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 6/1/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import re 11 | import numpy as np 12 | import seaborn as sns 13 | rc={'axes.labelsize': 20, 'font.size': 20, 'legend.fontsize': 20.0, 'axes.titlesize': 20, 'xtick.labelsize': 20.0, 'ytick.labelsize': 24.0,} 14 | sns.set(rc=rc) 15 | import pandas as pd 16 | import matplotlib.pyplot as plt 17 | 18 | path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/log/nn_results' 19 | save_path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/fig' 20 | pred_duration = [15, 30, 45, 60, 75, 90, 105, 120, 135, 150] 21 | gep_paths = [os.path.join(path, 'gep_pred{}_eval.txt'.format(i)) for i in pred_duration] 22 | nn_paths = [os.path.join(path, 'pred{}_eval.txt'.format(i)) for i in pred_duration] 23 | 24 | pattern = '[0-9]+.[0-9]+' 25 | 26 | df_columns = ['Prediction duration (s)', 'LSTM + GEP', 'LSTM', 'Random'] 27 | df = [] 28 | for idx, paths in enumerate(zip(gep_paths, nn_paths)): 29 | gep_path, nn_path = paths 30 | with open(gep_path, 'r') as f: 31 | results_gep = f.readlines() 32 | gep_acc = float(re.findall(pattern, results_gep[-1])[-1]) 33 | # gep_acc = 0 34 | print('gep acc', gep_acc) 35 | with open(nn_path, 'r') as f: 36 | results_nn = f.readlines() 37 | nn_acc = float(re.findall(pattern, results_nn[-1])[-1]) 38 | print('nn acc', nn_acc) 39 | df.append([pred_duration[idx] / 15, gep_acc, nn_acc, 0.1]) 40 | df = pd.DataFrame(df, columns=df_columns) 41 | fig, ax = plt.subplots() 42 | plt.axes([0, 0, 1 / 0.618, 1]) 43 | df = pd.melt(df, id_vars=df_columns[0], value_vars=df_columns[1 : ], var_name='Method', value_name='F1 score') 44 | ax = sns.lineplot(x=df_columns[0], y='F1 score', hue='Method', data=df) 45 | ax.lines[2].set_linestyle('--') 46 | ax.set_title(r'Frame prediction over time') 47 | ax.set(xticks = np.array(pred_duration) / 15) 48 | plt.ylim(0, 0.7) 49 | ax.legend(loc='upper right') 50 | print(sns.plotting_context()) 51 | plt.savefig(os.path.join(save_path, 'cad_prediction.pdf'), bbox_inches='tight') 52 | -------------------------------------------------------------------------------- /datasets/CAD/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | from basemeta import Metadata 10 | class CAD_METADATA(Metadata): 11 | def __init__(self): 12 | super(CAD_METADATA, self).__init__() 13 | 14 | self.activities = [ 15 | 'arranging_objects', 'picking_objects', 'taking_medicine', 16 | 'making_cereal', 'cleaning_objects', 'stacking_objects', 'having_meal', 17 | 'microwaving_food', 'unstacking_objects', 'taking_food' 18 | ] 19 | 20 | self.subactivities = [ 21 | 'reaching', 'moving', 'pouring', 'eating', 'drinking', 22 | 'opening', 'placing', 'closing', 'null', 'cleaning', 'prior' 23 | ] 24 | 25 | self.actions = [ 26 | 'reaching', 'moving', 'pouring', 'eating', 'drinking', 27 | 'opening', 'placing', 'closing', 'null', 'cleaning' 28 | ] 29 | 30 | self.objects = ['medcinebox', 'cup', 'bowl', 'box', 'milk', 'book', 'microwave', 'plate', 'remote', 'cloth'] 31 | 32 | self.affordances = [ 33 | 'movable', 'stationary', 'reachable', 'pourable', 'pourto', 'containable', 34 | 'drinkable', 'openable', 'placeable', 'closeable', 'cleanable', 'cleaner' 35 | ] 36 | 37 | for a in self.activities: 38 | self.activity_index[a] = self.activities.index(a) 39 | 40 | for s in self.subactivities: 41 | self.subactivity_index[s] = self.subactivities.index(s) 42 | 43 | for a in self.actions: 44 | self.action_index[a] = self.actions.index(a) 45 | 46 | for o in self.objects: 47 | self.object_index[o] = self.objects.index(o) 48 | 49 | for u in self.affordances: 50 | self.affordance_index[u] = self.affordances.index(u) 51 | 52 | self.ACTIVITY_NUM = len(self.activities) 53 | self.SUBACTIVITY_NUM = len(self.subactivities) 54 | self.ACTION_NUM = len(self.actions) 55 | self.OBJECT_NUM = len(self.objects) 56 | self.AFFORDANCE_NUM = len(self.affordances) 57 | -------------------------------------------------------------------------------- /datasets/CAD/finetune/cad_finetune.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import pickle 11 | import torch.utils.data 12 | import torch 13 | from random import shuffle 14 | import numpy as np 15 | import datasets.CAD.cad_config as config 16 | from datasets.CAD.metadata import CAD_METADATA 17 | metadata = CAD_METADATA() 18 | 19 | class CAD_FEATURE(torch.utils.data.Dataset): 20 | def __init__(self, paths, sequence_ids, task, verbose=False): 21 | self.root = paths.img_root 22 | self.tmp_root = paths.tmp_root 23 | self.inter_root = paths.inter_root 24 | self.task = task 25 | self.verbose = verbose 26 | self.sequence_ids = sequence_ids 27 | with open(os.path.join(paths.tmp_root, 'features.p'), 'rb') as f: 28 | self.data_list = pickle.load(f) 29 | with open(os.path.join(paths.tmp_root, 'label_list.p'), 'rb') as f: 30 | self.label_list = pickle.load(f) 31 | 32 | # Using framewise information for prediction purposes 33 | def __getitem__(self, index): 34 | sequence_id = self.sequence_ids[index] 35 | video_id, frame = sequence_id.split('$') 36 | label = self.label_list[sequence_id] 37 | sequence_info = self.data_list[video_id][int(frame)] 38 | feature = list() 39 | if self.task == 'affordance': 40 | object_affordance_feature = np.array(sequence_info['o_fea']) 41 | skeleton_object_feature = np.array(sequence_info['s_o_fea']) 42 | feature = np.hstack((object_affordance_feature, skeleton_object_feature)) 43 | else: 44 | h_feature = np.array(sequence_info['h_fea']) 45 | # with open(os.path.join(self.inter_root, 'finetune', 'affordance'), ) 46 | 47 | feature = torch.FloatTensor(feature) 48 | label = torch.LongTensor(label) 49 | return feature, label 50 | 51 | def __len__(self): 52 | return len(self.sequence_ids) 53 | 54 | def main(): 55 | paths = config.Paths() 56 | with open(os.path.join(paths.tmp_root, 'label_list.p'), 'rb') as f: 57 | sequence_ids = pickle.load(f) 58 | train_num = 10 59 | keys = list(sequence_ids.keys()) 60 | shuffle(keys) 61 | train_ids = ['1130144242$4'] 62 | train_set = CAD_FEATURE(paths, train_ids, 'affordance') 63 | feature, label = train_set[0] 64 | print('Finished') 65 | 66 | if __name__ == '__main__': 67 | main() -------------------------------------------------------------------------------- /visualization/detection_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 6/1/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import re 11 | import os 12 | import glob 13 | import seaborn as sns 14 | rc={'axes.labelsize': 28, 'font.size': 20, 'legend.fontsize': 20.0, 'axes.titlesize': 20, 'xtick.labelsize': 24.0, 'ytick.labelsize': 28.0,} 15 | sns.set(rc=rc) 16 | import pandas as pd 17 | import matplotlib.pyplot as plt 18 | 19 | path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/' 20 | 21 | save_path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/fig' 22 | subsample_rate = [1, 2, 5, 10, 20, 50] 23 | # trained_epochs = [5, 10, 15, 20, 25, 30, 35, 40] 24 | trained_epochs = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50] 25 | 26 | pattern = '[0-9]+.[0-9]+' 27 | 28 | gep_all_paths = [[glob.glob(os.path.join(path, 'gep_results', 'eval_s{}_*_t{}.txt'.format(s, t)))[0] for t in trained_epochs] for s in subsample_rate] 29 | nn_all_paths = [[glob.glob(os.path.join(path, 'nn_results', 'eval_s{}_*_t{}.txt'.format(s, t)))[0] for t in trained_epochs] for s in subsample_rate] 30 | 31 | df_columns = ['Trained epochs', 'Bi-LSTM + GEP', 'Bi-LSTM'] 32 | for s_idx, (gep_paths, nn_paths) in enumerate(zip(gep_all_paths, nn_all_paths)): 33 | df = [] 34 | for t_idx, (gep_path, nn_path) in enumerate(zip(gep_paths, nn_paths)): 35 | with open(gep_path, 'r') as f: 36 | results_gep = f.readlines() 37 | gep_acc = float(re.findall(pattern, results_gep[-3])[0]) 38 | with open(nn_path, 'r') as f: 39 | results_nn = f.readlines() 40 | nn_acc = float(re.findall(pattern, results_nn[-1])[0]) 41 | print(gep_acc, nn_acc) 42 | df.append([trained_epochs[t_idx], gep_acc, nn_acc]) 43 | df = pd.DataFrame(df, columns=df_columns) 44 | fig, ax = plt.subplots() 45 | df = pd.melt(df, id_vars=df_columns[0], value_vars=df_columns[1 :], var_name='Method', value_name='Accuracy') 46 | sns.lineplot(x=df_columns[0], y='Accuracy', hue='Method', data=df) 47 | plt.xticks(trained_epochs) 48 | plt.ylim(0, 0.7) 49 | # plt.title(r'Detection result with {} frame subsample'.format(subsample_rate[s_idx]) if subsample_rate[s_idx] != 1 50 | # else r'Detection result w/o subsample') 51 | plt.legend(loc='lower right') 52 | plt.savefig(os.path.join(save_path, 'breakfast_subsample_{}.pdf'.format(subsample_rate[s_idx])), bbox_inches='tight') 53 | print('Finished for {}'.format(subsample_rate[s_idx])) 54 | 55 | -------------------------------------------------------------------------------- /utils/plyutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import matplotlib.patches as patches 13 | 14 | def visualize_bbox_image(sequence_id, data, objects_dict): 15 | object_labels, object_images = data 16 | for obj_idx, obj_label in enumerate(object_labels): 17 | object_name = objects_dict[np.argmax(obj_label.numpy())] 18 | fig, ax = plt.subplots(1) 19 | plt.title(object_name) 20 | ax.imshow(object_images[obj_idx].permute(1, 2, 0).numpy().astype(np.uint8)) 21 | plt.show() 22 | 23 | def visualize_bbox_rgb(sequence_id, data, objects_dict): 24 | rgb_image, object_pair = data 25 | color = ['r', 'g', 'b', 'y'] 26 | fig, ax = plt.subplots(1) 27 | ax.imshow(rgb_image.numpy().astype(np.uint8)) 28 | plt.title('{} bboxs in rgb'.format(sequence_id)) 29 | for idx, vec in enumerate(object_pair): 30 | object_id = list(vec[:-4]).index(1) 31 | if object_id == 0: 32 | continue 33 | bbox = vec[-4:] 34 | # Code for showing wrong bounding boxes 35 | rect = patches.Rectangle((bbox[1], bbox[0]), bbox[3] - bbox[1], bbox[2] - bbox[0], 36 | linewidth=1, edgecolor=color[idx], facecolor='none') 37 | ax.add_patch(rect) 38 | print(objects_dict[object_id]) 39 | plt.show() 40 | 41 | 42 | def visualize_skeleton_depth(sequence_id, data): 43 | image, skeleton = data 44 | skeleton_depth = skeleton[:, 5 : 7] 45 | line_pairs = [ 46 | (23, 11), (24, 11), (11, 10), (10, 9), (9, 8), (8, 20), # right arm 47 | (21, 7), (22, 7), (7, 6), (6, 5), (5, 4), (4, 20), # left arm 48 | (3, 2), (2, 20), # head 49 | (20, 1), (1, 0), # torso 50 | (19, 18), (18, 17), (17, 16), (16, 0), # right leg 51 | (15, 14), (14, 13), (13, 12), (12, 0) # left leg 52 | ] 53 | fig, ax = plt.subplots(1) 54 | ax.imshow(image.numpy().astype(np.uint8)) 55 | plt.title('{} skeleton in depth'.format(sequence_id)) 56 | for line in line_pairs: 57 | point1 = [skeleton_depth[line[0], 0], skeleton_depth[line[0], 1]] 58 | point2 = [skeleton_depth[line[1], 0], skeleton_depth[line[1], 1]] 59 | ax.scatter(point1[0], point1[1], c='y') 60 | ax.scatter(point2[0], point2[1], c='y') 61 | ax.plot([point1[0], point2[0]], [point1[1], point2[1]], 'r') 62 | plt.show() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GEP 2 | 3 | This repo is adapted from the [original GEP repo](https://github.com/SiyuanQi/generalized-earley-parser) and contains code and adjustments for our TPAMI 2020 paper. 4 | 5 | [A Generalized Earley Parser for Human Activity Parsing and Prediction](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9018126) 6 | 7 | Siyuan Qi, Baoxiong Jia, Siyuan Huang, Ping Wei, and Song-Chun Zhu 8 | 9 | *IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)*, 2020 10 | 11 | 12 | # Dependencies 13 | 14 | Please check that all required packages from ```requirements.txt``` are properly installed. 15 | 16 | 17 | # Experiments 18 | 19 | This repo contains code for reproducing the results reported in our TPAMI paper. 20 | 21 | To run your experments properly, please download the datasets and adjust the paths information properly in ```config.py```. 22 | 23 | We provide three example scripts for showing how to use this code for the purpose of activity parsing and also future prediction. 24 | 25 | First, we show how to run experiments for activity parsing in ```breakfast_det.sh``` and ```gep_breakfast_det.sh```. These two shell scripts run ```baseline``` and ```gep``` for recognizing human actions respectively. As the breakfast dataset is big in frame number, we tried subsampling frames as one hyper-parameter which could be tuned during experiment. Please change the ```LOG_PATH``` to your correct logging path for storing the results before running the scripts. 26 | 27 | Next, for activity prediction, we use prediction on CAD dataset as an example. As shown in ```cad_pred.sh```, we run baseline training/eval and also gep prediction. We report and store models' performance under different prediction duration, which could be set in the shell script. Please also change the ```LOG_PATH``` to your correct loggging path for storing the results. 28 | 29 | # Data 30 | For features and grammar files used for reproducing experimental results, please find at [here](https://drive.google.com/drive/folders/1_3rr3O1AtbZsGHwy33JPkSSQAOzq5Z8j?usp=sharing). Please put the unzipped directory at a valid location and fix path configurations inside ```config.py``` to match the usage of features path used in ```datasets/{dataset}.py```. 31 | 32 | 33 | 34 | # Citation 35 | 36 | If you find the paper and/or the code helpful, please cite 37 | ``` 38 | @inproceedings{qi2018future, 39 | title={Generalized Earley Parser: Bridging Symbolic Grammars and Sequence Data for Future Prediction}, 40 | author={Qi, Siyuan and Jia, Baoxiong and Zhu, Song-Chun}, 41 | booktitle={International Conference on Machine Learning (ICML)}, 42 | year={2018} 43 | } 44 | @article{qi2020generalized, 45 | title={A Generalized Earley Parser for Human Activity Parsing and Prediction}, 46 | author={Qi, Siyuan and Jia, Baoxiong and Huang, Siyuan and Wei, Ping and Zhu, Song-Chun}, 47 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 48 | year={2020}, 49 | publisher={IEEE} 50 | } 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /datasets/Breakfast/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 4/20/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | from basemeta import Metadata 10 | class BREAKFAST_METADATA(Metadata): 11 | def __init__(self): 12 | super(BREAKFAST_METADATA, self).__init__() 13 | 14 | self.activities = [ 15 | "salat", "tea", "coffee", "scrambledegg", "pancake", 16 | "sandwich", "milk", "cereals", "friedegg", "juice" 17 | ] 18 | 19 | self.subactivities = [ 20 | "fry_egg", "add_saltnpepper", "cut_fruit", "pour_milk", "take_cup", "pour_water", 21 | "spoon_flour", "SIL", "stir_coffee", "pour_cereals", "butter_pan", "put_egg2plate", 22 | "take_glass", "pour_sugar", "stir_milk", "take_butter", "peel_fruit", "take_knife", 23 | "stirfry_egg", "pour_oil", "pour_flour", "spoon_powder", "put_pancake2plate", 24 | "stir_fruit", "squeeze_orange", "fry_pancake", "pour_dough2pan", "put_fruit2bowl", 25 | "stir_egg", "take_eggs", "put_bunTogether", "pour_coffee", "smear_butter", 26 | "cut_orange", "take_bowl", "cut_bun", "stir_tea", "take_squeezer", "pour_juice", 27 | "stir_cereals", "pour_egg2pan", "take_topping", "add_teabag", "crack_egg", 28 | "take_plate", "put_toppingOnTop", "stir_dough", "spoon_sugar" 29 | ] 30 | 31 | self.actions = [ 32 | "fry_egg", "add_saltnpepper", "cut_fruit", "pour_milk", "take_cup", "pour_water", 33 | "spoon_flour", "SIL", "stir_coffee", "pour_cereals", "butter_pan", "put_egg2plate", 34 | "take_glass", "pour_sugar", "stir_milk", "take_butter", "peel_fruit", "take_knife", 35 | "stirfry_egg", "pour_oil", "pour_flour", "spoon_powder", "put_pancake2plate", 36 | "stir_fruit", "squeeze_orange", "fry_pancake", "pour_dough2pan", "put_fruit2bowl", 37 | "stir_egg", "take_eggs", "put_bunTogether", "pour_coffee", "smear_butter", 38 | "cut_orange", "take_bowl", "cut_bun", "stir_tea", "take_squeezer", "pour_juice", 39 | "stir_cereals", "pour_egg2pan", "take_topping", "add_teabag", "crack_egg", 40 | "take_plate", "put_toppingOnTop", "stir_dough", "spoon_sugar" 41 | ] 42 | 43 | for a in self.activities: 44 | self.activity_index[a] = self.activities.index(a) 45 | 46 | for s in self.subactivities: 47 | self.subactivity_index[s] = self.subactivities.index(s) 48 | 49 | for a in self.actions: 50 | self.action_index[a] = self.actions.index(a) 51 | 52 | 53 | self.ACTIVITY_NUM = len(self.activities) 54 | self.SUBACTIVITY_NUM = len(self.subactivities) 55 | self.ACTION_NUM = len(self.actions) -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/18/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | from basemeta import Metadata 10 | class VCLA_METADATA(Metadata): 11 | def __init__(self): 12 | super(VCLA_METADATA, self).__init__() 13 | self.activities = [ 14 | 'c01_sweep_floor', 'c02_mop_floor', 'c03_write_on_blackboard', 15 | 'c04_clean_blackboard', 'c05_use_elevator', 'c06_pour_liquid_from_jug', 16 | 'c07_make_coffee', 'c08_read_book', 'c09_throw_trash', 17 | 'c10_heat_food_with_microwave', 'c11_use_computer', 'c12_search_drawer', 18 | 'c13_move_bottle_to_dispenser', 'c14_open_door' 19 | ] 20 | 21 | self.subactivities = [ 22 | 'null', 23 | 'search', 'tear', 'read', 'throw', 'wring', 'open', 'use', 'walk', 'scrub', 'pour', 24 | 'write', 'sweep', 'grab', 'mop', 'close', 'push', 'stand', 'sit', 'grag', 'place', 'prior' 25 | ] 26 | 27 | self.actions = [ 28 | 'null', 29 | 'search', 'tear', 'read', 'throw', 'wring', 'open', 'use', 'walk', 'scrub', 'pour', 30 | 'write', 'sweep', 'grab', 'mop', 'close', 'push', 'stand', 'sit', 'grag', 'place' 31 | ] 32 | 33 | self.objects = [ 34 | 'null', 35 | 'blackboard', 'chair', 'dispenser', 'dustpan', 'eraser', 'cup', 'drawer', 36 | 'bucket', 'microwave', 'broom', 'button', 'handle', 'paper', 'door', 'mop', 37 | 'jug', 'bottle', 'monitor', 'book', 'food', 'can', 'chalk' 38 | ] 39 | 40 | self.affordances = [ 41 | 'null', 42 | 'usable', 'scrubber', 'searchable', 'wringable', 'scrubbable', 43 | 'throwable', 'sittable', 'sweepable', 'pourable', 'pourto', 'writer', 44 | 'writable', 'tearable', 'moppable', 'closeable', 'statuibar', 'placeable', 45 | 'stationary', 'readable', 'grabbable', 'openable', 'pushable' 46 | ] 47 | 48 | for a in self.activities: 49 | self.activity_index[a] = self.activities.index(a) 50 | 51 | for s in self.subactivities: 52 | self.subactivity_index[s] = self.subactivities.index(s) 53 | 54 | for a in self.actions: 55 | self.action_index[a] = self.actions.index(a) 56 | 57 | for o in self.objects: 58 | self.object_index[o] = self.objects.index(o) 59 | 60 | for u in self.affordances: 61 | self.affordance_index[u] = self.affordances.index(u) 62 | 63 | self.ACTIVITY_NUM = len(self.activities) 64 | self.SUBACTIVITY_NUM = len(self.subactivities) 65 | self.ACTION_NUM = len(self.actions) 66 | self.OBJECT_NUM = len(self.objects) 67 | self.AFFORDANCE_NUM = len(self.affordances) 68 | self.MAXIMUM_OBJ_VIDEO = 3 69 | 70 | -------------------------------------------------------------------------------- /datasets/WNP/wnp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import pickle 11 | import numpy as np 12 | import torch 13 | import torch.utils.data 14 | from datasets.WNP.metadata import WNP_METADATA 15 | 16 | class WNP(torch.utils.data.Dataset): 17 | def __init__(self, paths, mode, task='activity', subsample=None): 18 | self.path = paths.inter_root 19 | self.sequence_ids = list() 20 | with open(os.path.join(self.path, 'features', 'wnp_{}.p'.format(mode)), 'rb') as f: 21 | self.data = pickle.load(f, encoding='latin1') 22 | for key in self.data.keys(): 23 | self.sequence_ids.append(key) 24 | self.task = task 25 | self.mode = mode 26 | 27 | def __getitem__(self, index): 28 | sequence_id = self.sequence_ids[index] 29 | return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \ 30 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \ 31 | self.data[sequence_id]['activity'], sequence_id, None 32 | 33 | def __len__(self): 34 | return len(self.sequence_ids) 35 | 36 | @staticmethod 37 | def collate_fn(batch): 38 | metadata = WNP_METADATA() 39 | features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0] 40 | feature_size = features.shape[1] 41 | label_num = len(metadata.subactivities) 42 | 43 | max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch])) 44 | features_batch = np.zeros((max_seq_length, len(batch), feature_size)) 45 | labels_batch = np.ones((max_seq_length, len(batch))) * -1 46 | probs_batch = np.zeros((max_seq_length, len(batch), label_num)) 47 | total_lengths = np.zeros(len(batch)) 48 | ctc_labels = list() 49 | ctc_lengths = list() 50 | activities = list() 51 | sequence_ids = list() 52 | 53 | for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch): 54 | features_batch[:total_length, batch_i, :] = np.nan_to_num(features) 55 | labels_batch[:total_length, batch_i] = labels 56 | for frame in range(features.shape[0]): 57 | probs_batch[frame, batch_i, int(labels[frame])] = 1.0 58 | 59 | merged_labels = list() 60 | current_label = -1 61 | for label in labels: 62 | if label != current_label: 63 | current_label = label 64 | merged_labels.append(current_label) 65 | ctc_labels.append(merged_labels) 66 | ctc_lengths.append(len(merged_labels)) 67 | total_lengths[batch_i] = total_length 68 | activities.append(activity) 69 | sequence_ids.append(sequence_id) 70 | 71 | features_batch = torch.FloatTensor(features_batch) 72 | labels_batch = torch.LongTensor(labels_batch) 73 | probs_batch = torch.FloatTensor(probs_batch) 74 | total_lengths = torch.IntTensor(total_lengths) 75 | ctc_lengths = torch.IntTensor(ctc_lengths) 76 | 77 | # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional_label 78 | return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None -------------------------------------------------------------------------------- /models/parsegraph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | class SParseGraph(object): 11 | def __init__(self, start_frame, end_frame, subactivity=None, action=None, objects=list(), affordance_labels=list()): 12 | self._start_frame = start_frame 13 | self._end_frame = end_frame 14 | self._action = action 15 | self._subactivity = subactivity 16 | self._objects = objects 17 | self._affordance_labels = affordance_labels 18 | 19 | self._skeletons = None 20 | self._obj_positions = list() 21 | 22 | def __str__(self): 23 | return '{}-{} {} {} {}'.format(self._start_frame, self._end_frame, self._subactivity, 24 | self._objects, self._affordance_labels) 25 | 26 | def __repr__(self): 27 | return self.__str__() 28 | 29 | @property 30 | def id(self): 31 | return self._subactivity 32 | 33 | @property 34 | def subactivity(self): 35 | return self._subactivity 36 | 37 | @property 38 | def action(self): 39 | return self._action 40 | 41 | @property 42 | def start_frame(self): 43 | return self._start_frame 44 | 45 | @property 46 | def end_frame(self): 47 | return self._end_frame 48 | 49 | @property 50 | def objects(self): 51 | return self._objects 52 | 53 | @property 54 | def affordance(self): 55 | return self._affordance_labels 56 | 57 | @property 58 | def skeletons(self): 59 | return self._skeletons 60 | 61 | @property 62 | def obj_positions(self): 63 | return self._obj_positions 64 | 65 | def set_skeletons(self, skeletons): 66 | assert self._end_frame - self._start_frame + 1 == skeletons.shape[0] 67 | self._skeletons = skeletons 68 | 69 | def set_obj_positions(self, obj_positions): 70 | for obj in obj_positions: 71 | self._obj_positions.append(obj[self._start_frame : self._end_frame + 1]) 72 | 73 | @subactivity.setter 74 | def subactivity(self, value): 75 | self._subactivity = value 76 | 77 | @end_frame.setter 78 | def end_frame(self, value): 79 | self._end_frame = value 80 | 81 | 82 | class TParseGraph(object): 83 | def __init__(self, activity=None, sequence_id=None, subject=None): 84 | self._activity = activity 85 | self._sequence_id = sequence_id 86 | self._subject = subject 87 | self._terminals = list() 88 | 89 | def __str__(self): 90 | sequence = '{} * '.format(self._sequence_id) 91 | for t in self._terminals: 92 | sequence += t.id + ' ' 93 | sequence += '#' 94 | return sequence 95 | 96 | def __repr__(self): 97 | return self.__str__() 98 | 99 | @property 100 | def activity(self): 101 | return self._activity 102 | 103 | @property 104 | def id(self): 105 | return self._sequence_id 106 | 107 | @property 108 | def subject(self): 109 | return self._subject 110 | 111 | @property 112 | def terminals(self): 113 | return self._terminals 114 | 115 | @property 116 | def length(self): 117 | if len(self._terminals) == 0: 118 | return 0 119 | else: 120 | return self._terminals[-1].end_frame - self._terminals[0].start_frame + 1 121 | 122 | @property 123 | def start_frame(self): 124 | assert(len(self._terminals) > 0, 'No spg added') 125 | return self._terminals[0].start_frame 126 | 127 | @activity.setter 128 | def activity(self, value): 129 | self._activity = value 130 | 131 | def append_terminal(self, spg): 132 | self._terminals.append(spg) 133 | -------------------------------------------------------------------------------- /experiments/exp_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import torch 10 | 11 | import datasets.VCLA_GAZE.vcla_gaze_config as vcla_gaze_config 12 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 13 | import datasets.VCLA_GAZE.vcla_gaze as vcla_gaze 14 | 15 | import datasets.CAD.cad_config as cad_config 16 | from datasets.CAD.metadata import CAD_METADATA 17 | import datasets.CAD.cad as cad 18 | 19 | import datasets.WNP.wnp_config as wnp_config 20 | from datasets.WNP.metadata import WNP_METADATA 21 | import datasets.WNP.wnp as wnp 22 | 23 | import datasets.Breakfast.breakfast_config as breakfast_config 24 | from datasets.Breakfast.metadata import BREAKFAST_METADATA 25 | import datasets.Breakfast.breakfast as breakfast 26 | 27 | class Experiment(object): 28 | def __init__(self, dataset='VCLA_GAZE'): 29 | self.paths_dict = { 30 | 'WNP': wnp_config.Paths(), 31 | 'VCLA_GAZE': vcla_gaze_config.Paths(), 32 | 'CAD': cad_config.Paths(), 33 | 'Breakfast': breakfast_config.Paths() 34 | } 35 | self.metadata_dict = { 36 | 'WNP': WNP_METADATA(), 37 | 'VCLA_GAZE': VCLA_METADATA(), 38 | 'CAD': CAD_METADATA(), 39 | 'Breakfast': BREAKFAST_METADATA() 40 | } 41 | self.dataset_dict = { 42 | 'WNP': lambda path, mode, task, subsample: wnp.WNP(path, mode, task, subsample), 43 | 'VCLA_GAZE': lambda path, mode, task, subsample: vcla_gaze.VCLA_GAZE(path, mode, task, subsample), 44 | 'CAD': lambda path, mode, task, subsample: cad.CAD(path, mode, task, subsample), 45 | 'Breakfast': lambda path, mode, task, subsample: breakfast.Breakfast(path, mode, task, subsample) 46 | } 47 | self.dataset = self.dataset_dict[dataset] 48 | self.paths = self.paths_dict[dataset] 49 | self.metadata = self.metadata_dict[dataset] 50 | 51 | def get_dataset(self, args, save=False): 52 | all_set = None 53 | train_set = self.dataset(args.paths, 'train', args.task, args.subsample) 54 | val_set = self.dataset(args.paths, 'val', args.task, args.subsample) 55 | test_set = self.dataset(args.paths, 'test', args.task, args.subsample) 56 | if save: 57 | all_set = self.dataset(args.paths, 'all', args.task, args.subsample) 58 | all_loader = None 59 | train_loader = torch.utils.data.DataLoader(train_set, collate_fn=train_set.collate_fn, 60 | batch_size=args.batch_size, num_workers=args.workers, 61 | pin_memory=True, shuffle=True) 62 | val_loader = torch.utils.data.DataLoader(val_set, collate_fn=train_set.collate_fn, 63 | batch_size=args.batch_size, num_workers=args.workers, pin_memory=True) 64 | test_loader = torch.utils.data.DataLoader(test_set, collate_fn=train_set.collate_fn, 65 | batch_size=args.batch_size, num_workers=args.workers, pin_memory=True) 66 | if save: 67 | all_loader = torch.utils.data.DataLoader(all_set, collate_fn=train_set.collate_fn, 68 | batch_size=args.batch_size, num_workers=args.workers, pin_memory=True) 69 | features, labels, seg_lengths, total_length, activity, sequence_id, additional = train_set[0] 70 | feature_size = features[0].shape[-1] 71 | return feature_size, train_loader, val_loader, test_loader, all_loader 72 | 73 | def get_label_num(self, args): 74 | if args.task == 'affordance': 75 | return self.metadata.AFFORDANCE_NUM 76 | else: 77 | return self.metadata.ACTION_NUM -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/vcla_gaze_prior.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/2/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: Prior calculation for VCLA_GAZE dataset 7 | Need to first generate the activity_corpus.p using dataparser.py 8 | 9 | """ 10 | 11 | import os 12 | import sys 13 | import pickle 14 | import json 15 | 16 | import numpy as np 17 | import scipy.stats 18 | 19 | import datasets.VCLA_GAZE.vcla_gaze_config as config 20 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 21 | metadata = VCLA_METADATA() 22 | 23 | def learn_prior(paths): 24 | def normalize_prob(cpt): 25 | for s in range(cpt.shape[0]): 26 | cpt[s, :] = cpt[s, :]/np.sum(cpt[s, :]) 27 | 28 | return cpt 29 | 30 | if not os.path.exists(os.path.join(paths.tmp_root, 'activity_corpus.p')): 31 | sys.exit('Ground truth pickle file not found.') 32 | with open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb') as f: 33 | activity_corpus = pickle.load(f) 34 | 35 | action_cpt = np.ones((len(metadata.subactivities), len(metadata.actions))) * 0.3 36 | affordance_cpt = np.ones((len(metadata.subactivities), len(metadata.affordances))) * 0.1 37 | object_cpt = np.ones((len(metadata.subactivities), len(metadata.objects))) * 0.0001 38 | object_affordance_cpt = np.ones((len(metadata.objects), len(metadata.affordances))) * 0.0001 39 | duration_dict = dict() 40 | 41 | for s in metadata.subactivities: 42 | duration_dict[s] = list() 43 | 44 | for activity in activity_corpus: 45 | for tpg in activity_corpus[activity]: 46 | for t in tpg.terminals: 47 | s = t.subactivity 48 | duration_dict[s].append(t.end_frame - t.start_frame) 49 | duration_dict['prior'].append(t.end_frame - t.start_frame) 50 | 51 | a = t.subactivity 52 | action_cpt[metadata.subactivity_index[s], metadata.action_index[a]] += 1 53 | action_cpt[metadata.subactivity_index['prior'], metadata.action_index[a]] += 1 54 | for u in t.affordance: 55 | affordance_cpt[metadata.subactivity_index[s], metadata.affordance_index[u]] += 1 56 | affordance_cpt[metadata.subactivity_index['prior'], metadata.affordance_index[u]] += 1 57 | for io, o in enumerate(t.objects): 58 | object_cpt[metadata.subactivity_index[s], metadata.object_index[o]] += 1 59 | object_cpt[metadata.subactivity_index['prior'], metadata.object_index[o]] += 1 60 | object_affordance_cpt[metadata.object_index[o], metadata.affordance_index[t.affordance[io]]] += 1 61 | 62 | object_affordance_cpt[:, -1] = 0 63 | object_affordance_cpt[:, -1] = np.max(object_affordance_cpt, axis=1) 64 | 65 | action_cpt = normalize_prob(action_cpt) 66 | affordance_cpt = normalize_prob(affordance_cpt) 67 | object_cpt = normalize_prob(object_cpt) 68 | object_affordance_cpt = normalize_prob(object_affordance_cpt) 69 | with open(os.path.join(paths.prior_root, 'action_cpt.json'), 'w') as output_file: 70 | json.dump(action_cpt.tolist(), output_file, indent=4, separators=(',', ': ')) 71 | with open(os.path.join(paths.prior_root, 'affordance_cpt.json'), 'w') as output_file: 72 | json.dump(affordance_cpt.tolist(), output_file, indent=4, separators=(',', ': ')) 73 | with open(os.path.join(paths.prior_root, 'object_cpt.json'), 'w') as output_file: 74 | json.dump(object_cpt.tolist(), output_file, indent=4, separators=(',', ': ')) 75 | with open(os.path.join(paths.prior_root, 'object_affordance_cpt.json'), 'w') as output_file: 76 | json.dump(object_affordance_cpt.tolist(), output_file, indent=4, separators=(',', ': ')) 77 | 78 | duration_prior = dict() 79 | for s, durations in duration_dict.items(): 80 | mu, std = scipy.stats.norm.fit(durations) 81 | duration_prior[s] = [mu, std] 82 | 83 | with open(os.path.join(paths.prior_root, 'duration_prior.json'), 'w') as output_file: 84 | json.dump(duration_prior, output_file, indent=4, separators=(',', ': ')) 85 | 86 | 87 | def main(): 88 | paths = config.Paths() 89 | learn_prior(paths) 90 | 91 | 92 | if __name__ == '__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /datasets/Breakfast/breakfast.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 4/20/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import pickle 11 | import numpy as np 12 | import torch 13 | import torch.utils.data 14 | from datasets.Breakfast.metadata import BREAKFAST_METADATA 15 | 16 | class Breakfast(torch.utils.data.Dataset): 17 | def __init__(self, paths, mode, task='activity', subsample=None): 18 | self.path = paths.inter_root 19 | self.sequence_ids = list() 20 | if subsample != 1: 21 | with open(os.path.join(self.path, 'features', 'breakfast_{}_0_{}.p'.format(mode, subsample)), 'rb') as f: 22 | self.data = pickle.load(f, encoding='latin1aa') 23 | else: 24 | with open(os.path.join(self.path, 'features', 'breakfast_{}_0.p'.format(mode)), 'rb') as f: 25 | self.data = pickle.load(f, encoding='latin1') 26 | for key in self.data.keys(): 27 | self.sequence_ids.append(key) 28 | self.task = task 29 | self.mode = mode 30 | 31 | def __getitem__(self, index): 32 | sequence_id = self.sequence_ids[index] 33 | return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \ 34 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \ 35 | self.data[sequence_id]['activity'], sequence_id, self.data[sequence_id]['all_labels'] 36 | 37 | def __len__(self): 38 | return len(self.sequence_ids) 39 | 40 | @staticmethod 41 | def collate_fn(batch): 42 | metadata = BREAKFAST_METADATA() 43 | features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0] 44 | feature_size = features.shape[1] 45 | label_num = len(metadata.subactivities) 46 | 47 | max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch])) 48 | features_batch = np.zeros((max_seq_length, len(batch), feature_size)) 49 | labels_batch = np.ones((max_seq_length, len(batch))) * -1 50 | max_all_seq_length = np.max(np.array([len(additional) for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch])) 51 | all_labels_batch = np.ones((max_all_seq_length, len(batch))) * -1 52 | probs_batch = np.zeros((max_seq_length, len(batch), label_num)) 53 | total_lengths = np.zeros(len(batch)) 54 | ctc_labels = list() 55 | ctc_lengths = list() 56 | activities = list() 57 | sequence_ids = list() 58 | all_total_lengths = np.zeros(len(batch)) 59 | 60 | for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch): 61 | features_batch[:total_length, batch_i, :] = np.nan_to_num(features) 62 | labels_batch[:total_length, batch_i] = labels 63 | all_labels_batch[:len(additional), batch_i] = additional 64 | all_total_lengths[batch_i] = len(additional) 65 | for frame in range(features.shape[0]): 66 | probs_batch[frame, batch_i, int(labels[frame])] = 1.0 67 | 68 | merged_labels = list() 69 | current_label = -1 70 | for label in labels: 71 | if label != current_label: 72 | current_label = label 73 | merged_labels.append(current_label) 74 | ctc_labels.append(merged_labels) 75 | ctc_lengths.append(len(merged_labels)) 76 | total_lengths[batch_i] = total_length 77 | activities.append(activity) 78 | sequence_ids.append(sequence_id) 79 | 80 | features_batch = torch.FloatTensor(features_batch) 81 | labels_batch = torch.LongTensor(labels_batch) 82 | probs_batch = torch.FloatTensor(probs_batch) 83 | total_lengths = torch.IntTensor(total_lengths) 84 | ctc_lengths = torch.IntTensor(ctc_lengths) 85 | all_labels_batch = torch.LongTensor(all_labels_batch) 86 | all_total_lengths = torch.IntTensor(all_total_lengths) 87 | 88 | # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional 89 | return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, (all_labels_batch, all_total_lengths) -------------------------------------------------------------------------------- /experiments/STAOG/prob_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import numpy as np 11 | import json 12 | 13 | class Prob_Utils(object): 14 | 15 | @staticmethod 16 | def get_likelihood_sum(action_log_likelihood, object_log_likelihood=None, affordance_log_likelihood=None, affordance=True): 17 | ''' 18 | Precompute the sum of log probabilities in interval [i, j] 19 | :param action_log_likelihood: action log likelihood 20 | :param object_log_likelihood: object log likelihood 21 | :param affordance_log_likelihood: affordance log likelihood 22 | :return: 23 | action_log_likelihood_sum: action_label_num x frames x frames 24 | object_log_likelihood_sum: object_bbox_num x object_label_num x frames x frames 25 | affordance_log_likelihood_sum: object_bbox_num x affordance_label_num x frames x frames 26 | ''' 27 | action_log_likelihood_sum = np.zeros( 28 | action_log_likelihood.shape + (action_log_likelihood.shape[-1],)) 29 | for a in range(action_log_likelihood.shape[0]): 30 | for i in range(action_log_likelihood.shape[1]): 31 | action_log_likelihood_sum[a, i, i] = action_log_likelihood[a, i] 32 | for a in range(action_log_likelihood.shape[0]): 33 | for i in range(action_log_likelihood.shape[1]): 34 | for j in range(i + 1, action_log_likelihood.shape[1]): 35 | action_log_likelihood_sum[a, i, j] = action_log_likelihood_sum[a, i, j - 1] + \ 36 | action_log_likelihood[a, j] 37 | 38 | object_log_likelihood_sum = None 39 | affordance_log_likelihood_sum = None 40 | 41 | if affordance: 42 | object_log_likelihood_sum = np.zeros(object_log_likelihood.shape + (object_log_likelihood.shape[-1],)) 43 | for b in range(object_log_likelihood.shape[0]): 44 | for o in range(object_log_likelihood.shape[1]): 45 | for i in range(object_log_likelihood.shape[2]): 46 | object_log_likelihood_sum[b, o, i, i] = object_log_likelihood[b, o, i] 47 | for b in range(object_log_likelihood.shape[0]): 48 | for o in range(object_log_likelihood.shape[1]): 49 | for i in range(object_log_likelihood.shape[2]): 50 | for j in range(i + 1, object_log_likelihood.shape[2]): 51 | object_log_likelihood_sum[b, o, i, j] = object_log_likelihood_sum[b, o, i, j - 1] + \ 52 | object_log_likelihood[b, o, j] 53 | 54 | affordance_log_likelihood_sum = np.zeros( 55 | affordance_log_likelihood.shape + (affordance_log_likelihood.shape[-1],)) 56 | for b in range(affordance_log_likelihood.shape[0]): 57 | for a in range(affordance_log_likelihood.shape[1]): 58 | for i in range(affordance_log_likelihood.shape[2]): 59 | affordance_log_likelihood_sum[b, a, i, i] = affordance_log_likelihood[b, a, i] 60 | for b in range(affordance_log_likelihood.shape[0]): 61 | for a in range(affordance_log_likelihood.shape[1]): 62 | for i in range(affordance_log_likelihood.shape[2]): 63 | for j in range(i + 1, affordance_log_likelihood.shape[2]): 64 | affordance_log_likelihood_sum[b, a, i, j] = affordance_log_likelihood_sum[b, a, i, j - 1] + \ 65 | affordance_log_likelihood[b, a, j] 66 | 67 | return action_log_likelihood_sum, object_log_likelihood_sum, affordance_log_likelihood_sum 68 | 69 | @staticmethod 70 | def combine_cpt(action_log_cpt, object_log_cpt, affordance_log_cpt, affordance=True): 71 | ''' 72 | Combine action prior, object prior and affordance prior using the log probability 73 | :param action_log_cpt: action log probability, indexed by (subactivity, action) 74 | :param object_log_cpt: object log probability, indexed by (subactivity, object) 75 | :param affordance_log_cpt: affordance log probability, indexed by (subactivity, affordance) 76 | :return: combined log probability, indexed by (subactivity, action, object, affordance) 77 | ''' 78 | if affordance: 79 | combined_log_cpt = np.zeros((action_log_cpt.shape[0], action_log_cpt.shape[1], 80 | object_log_cpt.shape[1], affordance_log_cpt.shape[1])) 81 | else: 82 | combined_log_cpt = np.zeros((action_log_cpt.shape[0], action_log_cpt.shape[1])) 83 | for s in range(combined_log_cpt.shape[0]): 84 | for a in range(action_log_cpt.shape[1]): 85 | if affordance: 86 | for o in range(object_log_cpt.shape[1]): 87 | for u in range(affordance_log_cpt.shape[1]): 88 | combined_log_cpt[s, a, o, u] = action_log_cpt[s, a] + object_log_cpt[s, o] + affordance_log_cpt[s, u] 89 | else: 90 | combined_log_cpt[s, a] = action_log_cpt[s, a] 91 | return combined_log_cpt -------------------------------------------------------------------------------- /utils/logutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/30/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import shutil 11 | import torch 12 | import numpy as np 13 | import sklearn.metrics 14 | 15 | rgb_width = 1920 16 | rgb_height = 1280 17 | depth_width = 512 18 | depth_height = 424 19 | 20 | 21 | class AverageMeter(object): 22 | """Computes and stores the average and current value""" 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.val = 0 29 | self.avg = 0 30 | self.sum = 0 31 | self.count = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.count += n 37 | self.avg = self.sum / self.count 38 | 39 | 40 | class MultiAverageMeter(AverageMeter): 41 | def __init__(self): 42 | super(MultiAverageMeter, self).__init__() 43 | self.reset() 44 | 45 | def reset(self): 46 | self.vals = {} 47 | self.avgs = {} 48 | self.sums = {} 49 | self.counts = {} 50 | self.val = 0 51 | self.avg = 0 52 | 53 | # Return avg precision for affordance that is not null 54 | def update(self, key, val, n=1): 55 | if key not in self.vals.keys(): 56 | self.vals[key] = 0 57 | self.avgs[key] = 0 58 | self.sums[key] = 0 59 | self.counts[key] = 0 60 | self.vals[key] = val 61 | self.sums[key] += val * n 62 | self.counts[key] += n 63 | self.avgs[key] = self.sums[key] / self.counts[key] 64 | 65 | val = 0 66 | avg = 0 67 | count = 0 68 | for key in self.vals: 69 | if key is not 'null': 70 | val += self.vals[key] 71 | avg += self.avgs[key] 72 | count += 1 73 | if count != 0: 74 | self.val = val / count 75 | self.avg = avg / count 76 | else: 77 | self.val = -1 78 | self.avg = -1 79 | 80 | class Logger(object): 81 | """record useful logging varaibles for training and validation""" 82 | def __init__(self): 83 | self.batch_time = AverageMeter() 84 | self.data_time = AverageMeter() 85 | self.losses = AverageMeter() 86 | self.multi_losses = MultiAverageMeter() 87 | self.top1 = MultiAverageMeter() 88 | 89 | def compute_accuracy(gt_results, results, labels='all', metric='micro'): 90 | if labels == 'all': 91 | labels_list = list(set(gt_results + results)) 92 | else: 93 | labels_list = list(set(gt_results + results)) 94 | labels_list.remove(0) 95 | results = sklearn.metrics.precision_recall_fscore_support(gt_results, results, labels=labels_list, average=metric) 96 | if metric == 'micro': 97 | return results[0] 98 | else: 99 | return results[0], results[1], results[2] 100 | 101 | def save_checkpoint(state_dict, is_best, args, filename='checkpoint.pth'): 102 | if not os.path.exists(args.resume): 103 | os.makedirs(args.resume) 104 | torch.save(state_dict, os.path.join(args.resume, filename)) 105 | if is_best: 106 | shutil.copyfile(os.path.join(args.resume, filename), os.path.join(args.resume, 'model_best.pth')) 107 | 108 | def save_checkpoint_epoch(state_dict, epoch, args): 109 | if not os.path.exists(args.resume): 110 | os.makedirs(args.resume) 111 | torch.save(state_dict, os.path.join(args.resume, 'checkpoint_{}.pth'.format(epoch))) 112 | 113 | def load_checkpoint_epoch(args, model, epoch, optimizer=None, scheduler=None): 114 | file_name = os.path.join(args.resume, 'checkpoint_{}.pth'.format(epoch)) 115 | print('Loading {}: {}'.format(file_name, os.path.isfile(file_name))) 116 | if os.path.isfile(file_name): 117 | checkpoint = torch.load(file_name) 118 | print('Best precision:{}'.format(checkpoint['best_prec'])) 119 | args.start_epoch = checkpoint['epoch'] 120 | model.load_state_dict(checkpoint['state_dict']) 121 | if optimizer != None: 122 | optimizer.load_state_dict(checkpoint['optimizer']) 123 | if scheduler != None: 124 | scheduler.load_state_dict(checkpoint['scheduler']) 125 | print('finished loading') 126 | 127 | 128 | def load_checkpoint(args, model, optimizer=None, scheduler=None): 129 | print('Loading {}: {}'.format(os.path.join(args.resume, 'model_best.pth'), os.path.isfile(os.path.join(args.resume, 'model_best.pth')))) 130 | if os.path.isfile(os.path.join(args.resume, 'model_best.pth')): 131 | checkpoint = torch.load(os.path.join(args.resume, 'model_best.pth')) 132 | print('Best precision:{}'.format(checkpoint['best_prec'])) 133 | args.start_epoch = checkpoint['epoch'] 134 | model.load_state_dict(checkpoint['state_dict']) 135 | if optimizer != None: 136 | optimizer.load_state_dict(checkpoint['optimizer']) 137 | if scheduler != None: 138 | scheduler.load_state_dict(checkpoint['scheduler']) 139 | print('finished loading') 140 | 141 | 142 | 143 | # TODO: Fix transform in both 3d and 2d 144 | def transform(skeleton, mean_skeleton, dims, anchor_points=[5, 9, 1]): 145 | aligned_skeleton = skeleton 146 | return aligned_skeleton -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/vcla_gaze.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/3/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import numpy as np 10 | import os 11 | import torch 12 | import torch.utils.data 13 | import pickle 14 | import datasets.VCLA_GAZE.vcla_gaze_config as config 15 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 16 | 17 | class VCLA_GAZE(torch.utils.data.Dataset): 18 | def __init__(self, paths, mode, task, subsample=None): 19 | self.path = paths.inter_root 20 | with open(os.path.join(self.path, 'features', 'vcla_gaze_{}.p'.format(mode)), 'rb') as f: 21 | self.data = pickle.load(f) 22 | self.sequence_ids = list() 23 | for key in self.data.keys(): 24 | self.sequence_ids.append(key) 25 | self.task = task 26 | self.mode = mode 27 | 28 | def __getitem__(self, index): 29 | sequence_id = self.sequence_ids[index] 30 | if self.task == 'affordance': 31 | return self.data[sequence_id]['u_features'], self.data[sequence_id]['u_labels'],\ 32 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \ 33 | self.data[sequence_id]['activity'], sequence_id, None 34 | else: 35 | return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \ 36 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'],\ 37 | self.data[sequence_id]['activity'], sequence_id, None 38 | 39 | def __len__(self): 40 | return len(self.sequence_ids) 41 | 42 | @staticmethod 43 | def collate_fn(batch): 44 | metadata = VCLA_METADATA() 45 | affordance = False 46 | features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0] 47 | feature_dim = list(features.shape) 48 | if len(feature_dim) > 2: 49 | affordance = True 50 | max_seq_length = np.max( 51 | np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch])) 52 | feature_dim[0] = max_seq_length 53 | feature_dim.insert(1, len(batch)) # max_length * batch * (obj_num) * feature_size 54 | obj_nums = np.zeros(len(batch)) 55 | if affordance: 56 | max_obj_num = metadata.MAXIMUM_OBJ_VIDEO 57 | feature_dim[-2] = max_obj_num 58 | total_lengths = np.zeros(len(batch) * max_obj_num) 59 | else: 60 | total_lengths = np.zeros(len(batch)) 61 | features_batch = np.zeros(feature_dim) 62 | labels_batch = np.zeros(feature_dim[: -1]) 63 | probs_batch = np.zeros(feature_dim[: 2] + [len(metadata.subactivities)]) 64 | 65 | activities = list() 66 | sequence_ids = list() 67 | ctc_labels = list() 68 | ctc_lengths = list() 69 | for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch): 70 | for frame in range(features.shape[0]): 71 | probs_batch[frame, batch_i, int(labels[frame])] = 1.0 72 | merged_labels = list() 73 | current_label = -1 74 | for label in labels: 75 | if label != current_label: 76 | current_label = label 77 | merged_labels.append(current_label) 78 | ctc_labels.append(merged_labels) 79 | ctc_lengths.append(len(merged_labels)) 80 | 81 | if affordance: 82 | obj_num = labels.shape[1] 83 | features_batch[:total_length, batch_i, :obj_num, :] = np.nan_to_num(features) 84 | labels_batch[:total_length, batch_i, :obj_num] = labels 85 | for rel_idx in range(3): 86 | total_lengths[batch_i * 3 + rel_idx] = total_length 87 | obj_nums[batch_i] = obj_num 88 | else: 89 | features_batch[:total_length, batch_i, :] = np.nan_to_num(features) 90 | labels_batch[:total_length, batch_i] = labels 91 | total_lengths[batch_i] = total_length 92 | activities.append(activity) 93 | sequence_ids.append(sequence_id) 94 | 95 | features_batch = torch.FloatTensor(features_batch) 96 | labels_batch = torch.LongTensor(labels_batch) 97 | total_lengths = torch.IntTensor(total_lengths) 98 | obj_nums = torch.IntTensor(obj_nums) 99 | ctc_lengths = torch.IntTensor(ctc_lengths) 100 | 101 | # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, all_labels 102 | return features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, None, None 103 | 104 | 105 | def main(): 106 | paths = config.Paths() 107 | dataset = VCLA_GAZE(paths, 'train', 'affordance') 108 | data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True, 109 | num_workers=1, pin_memory=True) 110 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = dataset[0] 111 | 112 | print('Finished') 113 | 114 | if __name__ == '__main__': 115 | main() 116 | -------------------------------------------------------------------------------- /datasets/CAD/finetune/parse_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mar 13, 2017 3 | 4 | @author: Siyuan Qi 5 | 6 | Description of the file. 7 | 8 | """ 9 | 10 | import os 11 | import time 12 | import json 13 | import pickle 14 | 15 | import datasets.CAD.cad_config as config 16 | 17 | 18 | def parse_colon_seperated_features(colon_seperated): 19 | f_list = [int(x.split(':')[1]) for x in colon_seperated] 20 | return f_list 21 | 22 | 23 | def read_features(filename): 24 | data = dict() 25 | with open(filename) as f: 26 | first_line = f.readline().strip() 27 | object_num = int(first_line.split(' ')[0]) 28 | object_object_num = int(first_line.split(' ')[1]) 29 | skeleton_object_num = int(first_line.split(' ')[2]) 30 | 31 | # Object feature 32 | o_aff = [] 33 | o_id = [] 34 | o_fea = [] 35 | for _ in range(object_num): 36 | line = f.readline() 37 | colon_seperated = [x.strip() for x in line.strip().split(' ')] 38 | o_aff.append(int(colon_seperated[0])) 39 | o_id.append(int(colon_seperated[1])) 40 | object_feature = parse_colon_seperated_features(colon_seperated[2:]) 41 | assert len(object_feature) == 180 42 | o_fea.append(object_feature) 43 | data['o_aff'] = o_aff 44 | data['o_id'] = o_id 45 | data['o_fea'] = o_fea 46 | 47 | # Skeleton feature 48 | line = f.readline() 49 | colon_seperated = [x.strip() for x in line.strip().split(' ')] 50 | data['h_act'] = int(colon_seperated[0]) 51 | skeleton_feature = parse_colon_seperated_features(colon_seperated[2:]) 52 | assert len(skeleton_feature) == 630 53 | data['h_fea'] = skeleton_feature 54 | 55 | o_o_id = [] 56 | o_o_fea = [] 57 | # Object-object feature 58 | for _ in range(object_object_num): 59 | line = f.readline() 60 | colon_seperated = [x.strip() for x in line.strip().split(' ')] 61 | o_o_id.append([int(colon_seperated[2]), int(colon_seperated[3])]) 62 | object_object_feature = parse_colon_seperated_features(colon_seperated[4:]) 63 | assert len(object_object_feature) == 200 64 | o_o_fea.append(object_object_feature) 65 | data['o_o_id'] = o_o_id 66 | data['o_o_fea'] = o_o_fea 67 | 68 | s_o_id = [] 69 | s_o_fea = [] 70 | # Skeleton-object feature 71 | for _ in range(skeleton_object_num): 72 | line = f.readline() 73 | colon_seperated = [x.strip() for x in line.strip().split(' ')] 74 | s_o_id.append(int(colon_seperated[2])) 75 | skeleton_object_feature = parse_colon_seperated_features(colon_seperated[3:]) 76 | assert len(skeleton_object_feature) == 400 77 | s_o_fea.append(skeleton_object_feature) 78 | data['s_o_id'] = s_o_id 79 | data['s_o_fea'] = s_o_fea 80 | 81 | for o_id, s_o_id in zip(data['o_id'] , data['s_o_id']): 82 | assert o_id == s_o_id 83 | return data 84 | 85 | 86 | def collect_data(paths): 87 | segments_files_path = os.path.join(paths.data_root, 'features_cad120_ground_truth_segmentation', 'segments_svm_format') 88 | segments_feature_path = os.path.join(paths.data_root, 'features_cad120_ground_truth_segmentation', 'features_binary_svm_format') 89 | 90 | activity_corpus = pickle.load(open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb')) 91 | 92 | subject5_sequences = list() 93 | segment_count_dict = dict() 94 | for activity, tpgs in activity_corpus.items(): 95 | for tpg in tpgs: 96 | segment_count_dict[tpg.id] = len(tpg.terminals) 97 | if tpg.subject == 'Subject5': 98 | subject5_sequences.append(tpg.id) 99 | 100 | data = dict() 101 | for sequence_path_file in os.listdir(segments_files_path): 102 | sequence_id = os.path.splitext(sequence_path_file)[0] 103 | data[sequence_id] = list() 104 | if sequence_id not in segment_count_dict: 105 | continue 106 | 107 | with open(os.path.join(segments_files_path, sequence_path_file)) as f: 108 | first_line = f.readline() 109 | segment_feature_num = int(first_line.split(' ')[0]) 110 | # if sequence_id in subject5_sequences: 111 | # print sequence_id, segment_count_dict[sequence_id], segment_feature_num 112 | # assert segment_count_dict[sequence_id] == segment_feature_num 113 | 114 | last_oid = None 115 | for _ in range(segment_feature_num): 116 | segment_feature_filename = f.readline().strip() 117 | segment_data = read_features(os.path.join(segments_feature_path, os.path.basename(segment_feature_filename))) 118 | data[sequence_id].append(segment_data) 119 | if last_oid: 120 | for o_id, s_o_id in zip(last_oid, segment_data['o_id']): 121 | assert o_id == s_o_id 122 | last_oid = segment_data['o_id'] 123 | 124 | with open(os.path.join(paths.tmp_root, 'features.p'), 'wb') as f: 125 | pickle.dump(data, f) 126 | with open(os.path.join(paths.tmp_root, 'features.json'), 'w') as f: 127 | json.dump(data, f, indent=4, separators=(',', ': ')) 128 | with open(os.path.join(paths.tmp_root, 'video_list.p'), 'wb') as f: 129 | pickle.dump(list(data.keys()), f) 130 | 131 | 132 | def main(): 133 | paths = config.Paths() 134 | start_time = time.time() 135 | collect_data(paths) 136 | print('Time elapsed: {}s'.format(time.time() - start_time)) 137 | 138 | 139 | if __name__ == '__main__': 140 | main() 141 | -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/finetune/vcla_gaze_finetune.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/19/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import time 11 | import pickle 12 | import numpy as np 13 | import torch 14 | import torchvision 15 | from skimage import io 16 | import glob 17 | import cv2 18 | import datasets.VCLA_GAZE.vcla_gaze_config as vcla_gaze_config 19 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 20 | import utils.plyutils as utils 21 | metadata = VCLA_METADATA() 22 | 23 | def match_path(img_dir, frame): 24 | rgb_name = glob.glob(os.path.join(img_dir, 'raw_rgb_{0:05}_*'.format(frame))) 25 | depth_name = glob.glob(os.path.join(img_dir, 'raw_depth_{0:05}_*'.format(frame))) 26 | aligned_name = glob.glob(os.path.join(img_dir, 'aligned_rgb_{0:05}.png').format(frame)) 27 | return rgb_name[0], depth_name[0], aligned_name[0] 28 | 29 | def get_valid_bbox(bbox): 30 | x_1 = int(bbox[1]) 31 | y_1 = int(bbox[0]) 32 | x_2 = int(bbox[3]) 33 | y_2 = int(bbox[2]) 34 | return x_1, y_1, x_2, y_2 35 | 36 | class VCLA_GAZE_FEATURE(torch.utils.data.Dataset): 37 | def __init__(self, paths, sequence_ids, transform, input_size, name, task, verbose=False): 38 | self.root = paths.img_root 39 | self.tmp_root = paths.tmp_root 40 | self.inter_root = paths.inter_root 41 | self.imsize = input_size 42 | self.name = name 43 | self.transform = transform 44 | self.sequence_ids = sequence_ids 45 | self.task = task 46 | self.verbose = verbose 47 | with open(os.path.join(paths.tmp_root, 'image_data_list.p'), 'rb') as f: 48 | self.data_list = pickle.load(f) 49 | 50 | # Using framewise information for prediction purposes 51 | def __getitem__(self, index): 52 | sequence_id = self.sequence_ids[index] 53 | sequence_info = self.data_list[sequence_id] 54 | 55 | category, video_id, frame = sequence_id.split('$') 56 | frame = int(frame) 57 | 58 | img_dir = os.path.join(self.root, category, video_id, 'TPV') 59 | rgb_name, depth_name, aligned_name = match_path(img_dir, frame) 60 | 61 | rgb_image = torch.FloatTensor(io.imread(rgb_name)) 62 | depth_image = torch.FloatTensor(np.array(io.imread(depth_name), dtype=np.double)) 63 | aligned_image = torch.FloatTensor(io.imread(aligned_name)) 64 | 65 | activity = torch.LongTensor([sequence_info['activity_mat']]) 66 | object_pair = sequence_info['object_mat'] 67 | object_labels = torch.LongTensor(object_pair[:, :-4]) 68 | bboxs = object_pair[:, -4:] 69 | object_images = np.empty((1, 3, self.imsize[0], self.imsize[1])) 70 | for idx, bbox in enumerate(bboxs): 71 | object_image = np.zeros((3, self.imsize[0], self.imsize[1]), dtype=np.float) 72 | # Get valid bounding boxes 73 | x_1, y_1, x_2, y_2 = get_valid_bbox(bbox) 74 | if np.sum(bbox) != 0: 75 | bbox_image = rgb_image[y_1 : y_2, x_1 : x_2, :] 76 | object_image = self.transform(cv2.resize(bbox_image.numpy(), self.imsize, interpolation=cv2.INTER_LINEAR)) 77 | object_images = np.vstack((object_images, np.expand_dims(object_image, axis=0))) 78 | object_images = torch.FloatTensor(object_images[1:]) 79 | rgb_image = torch.FloatTensor(self.transform(cv2.resize(rgb_image.numpy(), self.imsize, interpolation=cv2.INTER_LINEAR))) 80 | affordance = torch.LongTensor(sequence_info['affordance_mat']) 81 | skeleton = torch.FloatTensor(sequence_info['skeleton_mat']) 82 | if self.task != 'affordance': 83 | affordance_features = torch.FloatTensor(np.load(os.path.join(self.inter_root, 'finetune', 'affordance', sequence_id + '.npy'))) 84 | assert(affordance_features.shape[0] == 3) 85 | else: 86 | affordance_features = torch.Tensor([0]) 87 | if self.verbose: 88 | return sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \ 89 | object_images, affordance, skeleton, object_pair 90 | else: 91 | return sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \ 92 | object_images, affordance, skeleton, affordance_features 93 | def __len__(self): 94 | return len(self.sequence_ids) 95 | 96 | 97 | # For testing purposes 98 | def main(): 99 | paths = vcla_gaze_config.Paths() 100 | start_time = time.time() 101 | with open(os.path.join(paths.tmp_root, 'image_list.p'), 'rb') as f: 102 | video_list = pickle.load(f) 103 | train_ratio = 0.1 104 | sequence_ids = np.random.permutation(video_list) 105 | sequence_ids = sequence_ids[:int(train_ratio * len(sequence_ids))] 106 | 107 | input_imsize = (224, 224) 108 | normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], 109 | std=[0.229, 0.224, 0.225]) 110 | transform = torchvision.transforms.Compose([ 111 | torchvision.transforms.ToTensor(), 112 | normalize, 113 | ]) 114 | training_set = VCLA_GAZE_FEATURE(paths, sequence_ids, transform, input_imsize, 'test', 'activity', verbose=True) 115 | 116 | sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \ 117 | object_images, affordance, skeleton, object_pair = training_set[0] 118 | utils.visualize_bbox_rgb(sequence_id, (rgb_image.permute(1, 2, 0), object_pair), metadata.objects) 119 | utils.visualize_bbox_image(sequence_id, (object_labels, object_images), metadata.objects) 120 | utils.visualize_skeleton_depth(sequence_id, (aligned_image, skeleton)) 121 | print('Time elapsed: {}s'.format(time.time() - start_time)) 122 | print(sequence_id) 123 | 124 | 125 | if __name__ == '__main__': 126 | main() -------------------------------------------------------------------------------- /datasets/Breakfast/dataparser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 04/20/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | import time 12 | import json 13 | import glob 14 | import pickle 15 | from random import shuffle 16 | import numpy as np 17 | import sys 18 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src') 19 | 20 | import datasets.Breakfast.breakfast_config as config 21 | from datasets.Breakfast.metadata import BREAKFAST_METADATA 22 | metadata = BREAKFAST_METADATA() 23 | 24 | 25 | def parse_data(paths, subsample=False): 26 | metadata_path = os.path.join(paths.data_root, 'metadata') 27 | save_path = os.path.join(paths.inter_root, 'features') 28 | 29 | if not os.path.exists(save_path): 30 | os.makedirs(save_path) 31 | 32 | with open(os.path.join(metadata_path, 'sequence_ids.json'), 'r') as f: 33 | sequence_ids = json.load(f) 34 | data_dict = dict() 35 | count = 0 36 | for sequence_id in sequence_ids: 37 | data_dict[sequence_id] = dict() 38 | activity_id, video_id = sequence_id.split('$') 39 | feature_path = os.path.join(paths.data_root, 'fisher_vector', activity_id) 40 | annotation_file = os.path.join(metadata_path, 'annotations', sequence_id + '.p') 41 | feature_files = glob.glob(os.path.join(feature_path, video_id + '*')) 42 | features = None 43 | for feature_file in feature_files: 44 | features = np.loadtxt(feature_file)[:, 1:] 45 | break 46 | 47 | frames = features.shape[0] 48 | subsample_freq = 1000 49 | total_length = features.shape[0] 50 | subsample_indices = None 51 | if subsample: 52 | subsample_indices = np.arange(0, frames, subsample_freq) 53 | features = features[subsample_indices] 54 | labels = np.ones(features.shape[0]) * metadata.action_index['SIL'] 55 | 56 | data_dict[sequence_id]['features'] = features 57 | data_dict[sequence_id]['total_length'] = features.shape[0] 58 | data_dict[sequence_id]['activity'] = activity_id 59 | data_dict[sequence_id]['seg_lengths'] = list() 60 | 61 | with open(annotation_file, 'rb') as f: 62 | start, end, activity = pickle.load(f) 63 | 64 | all_labels = np.ones(total_length) * metadata.action_index['SIL'] 65 | all_segs = list() 66 | if(end[-1] != total_length): 67 | count += 1 68 | if(abs(end[-1] - total_length) > 10): 69 | print('Feature error for {}'.format(sequence_id)) 70 | for s, e, a in zip(start, end, activity): 71 | if (s > e): 72 | print(s, e) 73 | print('Error for {}'.format(sequence_id)) 74 | exit() 75 | e = min(e, total_length) 76 | all_segs.append(e - s + 1) 77 | all_labels[s - 1 : e] = metadata.action_index[a] 78 | 79 | if subsample: 80 | start = 0 81 | end = 0 82 | all_segs = list() 83 | for idx, sub_idx in enumerate(subsample_indices): 84 | if idx == len(subsample_indices) - 1: 85 | all_segs.append(idx - start + 1) 86 | break 87 | if all_labels[sub_idx] == all_labels[subsample_indices[idx + 1]]: 88 | end = end + 1 89 | else: 90 | all_segs.append(end - start + 1) 91 | start = end + 1 92 | end = start 93 | 94 | labels[idx] = all_labels[sub_idx] 95 | else: 96 | labels = all_labels 97 | 98 | data_dict[sequence_id]['labels'] = labels 99 | data_dict[sequence_id]['all_labels'] = all_labels 100 | data_dict[sequence_id]['seg_lengths'] = all_segs 101 | print('Finished processing for {}, from {} to {}'.format(sequence_id, frames, data_dict[sequence_id]['total_length'])) 102 | 103 | 104 | with open(os.path.join(metadata_path, 'train_test_split.json'), 'r') as f: 105 | split = json.load(f) 106 | 107 | for split_idx, ids in enumerate(split): 108 | train_dict = dict() 109 | test_dict = dict() 110 | for other_idx, other_ids in enumerate(split): 111 | for id in other_ids: 112 | if other_idx != split_idx: 113 | train_dict[id] = data_dict[id] 114 | else: 115 | test_dict[id] = data_dict[id] 116 | if not subsample: 117 | train_file = 'breakfast_train_{}_ori.p'.format(split_idx) 118 | test_file = 'breakfast_test_{}_ori.p'.format(split_idx) 119 | val_file = 'breakfast_val_{}_ori.p'.format(split_idx) 120 | all_file = 'breakfast_all_{}_ori.p'.format(split_idx) 121 | else: 122 | train_file = 'breakfast_train_{}_{}.p'.format(split_idx, subsample_freq) 123 | test_file = 'breakfast_test_{}_{}.p'.format(split_idx, subsample_freq) 124 | val_file = 'breakfast_val_{}_{}.p'.format(split_idx, subsample_freq) 125 | all_file = 'breakfast_all_{}_{}.p'.format(split_idx, subsample_freq) 126 | 127 | with open(os.path.join(save_path, train_file), 'wb') as f: 128 | pickle.dump(train_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 129 | with open(os.path.join(save_path, test_file), 'wb') as f: 130 | pickle.dump(test_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 131 | with open(os.path.join(save_path, val_file), 'wb') as f: 132 | pickle.dump(test_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 133 | 134 | with open(os.path.join(save_path, all_file), 'wb') as f: 135 | pickle.dump(data_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 136 | 137 | def main(): 138 | paths = config.Paths() 139 | start_time = time.time() 140 | parse_data(paths, subsample=False) 141 | print('Time elapsed: {}'.format(time.time() - start_time)) 142 | 143 | if __name__ == '__main__': 144 | main() -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/dataparser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 11/27/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import os 11 | import time 12 | import pickle 13 | from random import shuffle 14 | import numpy as np 15 | import sys 16 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src') 17 | 18 | import datasets.VCLA_GAZE.vcla_gaze_config as config 19 | from models import parsegraph as parsegraph 20 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 21 | metadata = VCLA_METADATA() 22 | 23 | 24 | def parse_data(paths): 25 | activity_feature_path = os.path.join(paths.inter_root, 'finetune', 'activity') 26 | affordance_feature_path = os.path.join(paths.inter_root, 'finetune', 'affordance') 27 | save_path = os.path.join(paths.inter_root, 'features') 28 | 29 | # for STAOG formulation 30 | activity_corpus = dict() 31 | with open(os.path.join(paths.tmp_root, 'video_data_list.p'), 'rb') as f: 32 | data_list = pickle.load(f) 33 | data_dict = dict() 34 | for sequence_id, data in data_list.items(): 35 | names = sequence_id.split("$") 36 | activity_id, video_id = names[0], names[1] 37 | data_dict[sequence_id] = dict() 38 | if activity_id not in activity_corpus.keys(): 39 | activity_corpus[activity_id] = list() 40 | tpg = parsegraph.TParseGraph(activity_id, sequence_id=video_id) 41 | segmentation = data['segment'] 42 | activity = data['activity_mat'] 43 | objects = data['object_mat'] 44 | affordance = data['affordance_mat'] 45 | skeleton = data['skeleton_mat'] 46 | obj_nums = data['object_num'] 47 | data_dict[sequence_id]['total_length'] = activity.shape[0] 48 | data_dict[sequence_id]['labels'] = activity 49 | data_dict[sequence_id]['u_labels'] = affordance[:, : obj_nums] 50 | data_dict[sequence_id]['seg_lengths'] = list() 51 | data_dict[sequence_id]['activity'] = activity_id 52 | 53 | # feature reformat for GEP 54 | activity_features = None 55 | affordance_features = None 56 | 57 | start_ori = segmentation[0][0] 58 | for (start, end) in segmentation: 59 | end = end - start_ori 60 | start = start - start_ori 61 | data_dict[sequence_id]['seg_lengths'].append(end - start + 1) 62 | subactivity = metadata.subactivities[int(activity[start])] 63 | object_data = objects[start : end + 1, : obj_nums, :] 64 | obj_positions = [object_data[obj_idx, metadata.OBJECT_NUM : ] for obj_idx in range(obj_nums)] 65 | obj_names = [metadata.objects[np.argmax(object_data[0, obj_idx, : metadata.OBJECT_NUM])] for obj_idx in range(obj_nums)] 66 | affordance_labels = affordance[start, : obj_nums] 67 | affordance_labels = [metadata.affordances[int(affordance_labels[obj_idx])] for obj_idx in range(obj_nums)] 68 | spg = parsegraph.SParseGraph(start, end, subactivity, subactivity, obj_names, affordance_labels) 69 | spg.set_obj_positions(obj_positions) 70 | spg.set_skeletons(skeleton[start : end + 1, :]) 71 | tpg.append_terminal(spg) 72 | 73 | for feature_idx in range(start, end + 1): 74 | image_id = sequence_id + '$' + str(feature_idx + start_ori) 75 | activity_feature = np.load(os.path.join(activity_feature_path, '{}.npy'.format(image_id))) 76 | affordance_feature = np.expand_dims(np.load(os.path.join(affordance_feature_path, 77 | '{}.npy'.format(image_id)))[: obj_nums, :], axis =0) 78 | if activity_features is None: 79 | activity_features = activity_feature 80 | else: 81 | activity_features = np.vstack((activity_features, activity_feature)) 82 | if affordance_features is None: 83 | affordance_features = affordance_feature 84 | else: 85 | affordance_features = np.vstack((affordance_features, affordance_feature)) 86 | 87 | data_dict[sequence_id]['features'] = activity_features 88 | data_dict[sequence_id]['u_features'] = affordance_features 89 | 90 | activity_corpus[activity_id].append(tpg) 91 | print('Finished processing for {}'.format(sequence_id)) 92 | with open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'wb') as f: 93 | pickle.dump(activity_corpus, f) 94 | if not os.path.exists(save_path): 95 | os.makedirs(save_path) 96 | 97 | training_rate = 0.65 98 | validation_rate = 0.15 99 | training_num = training_rate * len(data_dict) 100 | validation_num = (training_rate + validation_rate) * len(data_dict) 101 | keys = list(data_dict.keys()) 102 | shuffle(keys) 103 | 104 | training_dict = dict() 105 | validation_dict = dict() 106 | testing_dict = dict() 107 | 108 | for idx, key in enumerate(keys): 109 | if idx < training_num: 110 | training_dict[key] = data_dict[key] 111 | if idx >= training_num and idx < validation_num: 112 | validation_dict[key] = data_dict[key] 113 | if idx >= validation_num: 114 | testing_dict[key] = data_dict[key] 115 | 116 | with open(os.path.join(save_path, 'vcla_gaze_all.p'), 'wb') as f: 117 | pickle.dump(data_dict, f) 118 | with open(os.path.join(save_path, 'vcla_gaze_train.p'), 'wb') as f: 119 | pickle.dump(training_dict, f) 120 | with open(os.path.join(save_path, 'vcla_gaze_val.p'), 'wb') as f: 121 | pickle.dump(validation_dict, f) 122 | with open(os.path.join(save_path, 'vcla_gaze_test.p'), 'wb') as f: 123 | pickle.dump(testing_dict, f) 124 | 125 | def main(): 126 | paths = config.Paths() 127 | start_time = time.time() 128 | parse_data(paths) 129 | print('Time elapsed: {}'.format(time.time() - start_time)) 130 | 131 | if __name__ == '__main__': 132 | main() -------------------------------------------------------------------------------- /datasets/VCLA_GAZE/finetune/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/29/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | import torch 11 | import torchvision 12 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA 13 | metadata = VCLA_METADATA() 14 | 15 | model_dict = { 16 | 'resnet': lambda num_classes, feature_dims : ResNet152(num_classes=num_classes, feature_dim=feature_dims), 17 | 'densenet': lambda num_classes, feature_dims : DenseNet(num_classes=num_classes, feature_dim=feature_dims), 18 | 'vgg16': lambda num_classes, feature_dims : VGG16(num_classes=num_classes, feature_dim=feature_dims) 19 | } 20 | 21 | class VGG16(torch.nn.Module): 22 | def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200): 23 | super(VGG16, self).__init__() 24 | self.features = torchvision.models.vgg16(pretrained=True).features 25 | self.classifier = torch.nn.Sequential( 26 | torch.nn.Linear(512 * 7 * 7, 4096), 27 | torch.nn.ReLU(True), 28 | torch.nn.Dropout(), 29 | torch.nn.Linear(4096, 4096), 30 | torch.nn.ReLU(), 31 | torch.Dropout(), 32 | torch.nn.Linear(4096, feature_dim) 33 | ) 34 | self.last = torch.nn.Linear(feature_dim, num_classes) 35 | self._initialize_weights() 36 | 37 | def forward(self, x): 38 | x = self.features(x) 39 | x = x.view(x.size(0), -1) 40 | x = self.classifier(x) 41 | output = self.last(x) 42 | return x, output 43 | 44 | def _initialize_weights(self): 45 | for m in self.modules(): 46 | if isinstance(m, torch.nn.BatchNorm2d): 47 | torch.nn.init.constant_(m.weight, 1) 48 | torch.nn.init.zeros_(m.bias) 49 | elif isinstance(m, torch.nn.Linear): 50 | torch.nn.init.normal_(m.weight, 0, 0.01) 51 | torch.nn.init.zeros_(m.bias) 52 | 53 | class ResNet152(torch.nn.Module): 54 | def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200): 55 | super(ResNet152, self).__init__() 56 | self.features = torchvision.models.resnet152(pretrained=True) 57 | self.fc_ = torch.nn.Linear(1000, feature_dim) 58 | self.fc = torch.nn.Linear(feature_dim, num_classes) 59 | self._initialize_weights() 60 | 61 | def forward(self, x): 62 | x = self.features(x) 63 | x = x.view(x.size(0), -1) 64 | x = self.fc_(x) 65 | output = self.fc(x) 66 | return x, output 67 | 68 | def _initialize_weights(self): 69 | for m in self.modules(): 70 | if isinstance(m, torch.nn.Linear): 71 | torch.nn.init.normal_(m.weight, 0, 0.01) 72 | torch.nn.init.zeros_(m.bias) 73 | 74 | class DenseNet(torch.nn.Module): 75 | def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200): 76 | super(DenseNet, self).__init__() 77 | self.features = torchvision.models.densenet161(pretrained=True) 78 | self.fc_ = torch.nn.Linear(1000, feature_dim) 79 | self.fc = torch.nn.Linear(feature_dim, num_classes) 80 | self._initialize_weights() 81 | 82 | def forward(self, x): 83 | x = self.features(x) 84 | x = x.view(x.size(0), -1) 85 | x = self.fc_(x) 86 | output = self.fc(x) 87 | return x, output 88 | 89 | def _initialize_weights(self): 90 | for m in self.modules(): 91 | if isinstance(m, torch.nn.Linear): 92 | torch.nn.init.normal_(m.weight, 0, 0.01) 93 | torch.nn.init.zeros_(m.bias) 94 | 95 | class AffordanceNet(torch.nn.Module): 96 | def __init__(self, num_classes, name='resnet', feature_dim=200): 97 | super(AffordanceNet, self).__init__() 98 | self.network = model_dict[name](num_classes, feature_dim) 99 | 100 | def forward(self, x): 101 | return self.network(x) 102 | 103 | class ActivityNet(torch.nn.Module): 104 | def __init__(self, num_classes, name='resnet', feature_dim=500, obj_feature_dim=200): 105 | super(ActivityNet, self).__init__() 106 | self.network = model_dict[name](num_classes, 2 * feature_dim) 107 | self.pooling = torch.nn.MaxPool2d(3, stride=1, padding=1) 108 | self.fc = torch.nn.Sequential( 109 | torch.nn.Linear(2 * feature_dim + 3 * obj_feature_dim, 2 * feature_dim), 110 | torch.nn.BatchNorm1d(2 * feature_dim), 111 | torch.nn.ReLU(), 112 | torch.nn.Linear(2 * feature_dim, feature_dim) 113 | ) 114 | self.fc_ = torch.nn.Linear(feature_dim, num_classes) 115 | 116 | def forward(self, x, affordance_features): 117 | feature, x = self.network(x) 118 | u_feature = self.pooling(affordance_features).view(affordance_features.size(0), -1) 119 | features = self.fc(torch.cat((feature, u_feature), 1)) 120 | output = self.fc_(features) 121 | return features, output 122 | 123 | class TaskNet(torch.nn.Module): 124 | def __init__(self, task='affordance', name='resnet', feature_dim=1500, obj_feature_dim=1000): 125 | super(TaskNet, self).__init__() 126 | self.task = task 127 | if task == 'affordance': 128 | self.network = AffordanceNet(num_classes=metadata.AFFORDANCE_NUM, name=name, feature_dim=obj_feature_dim) 129 | else: 130 | self.network = ActivityNet(num_classes=metadata.ACTION_NUM, name=name, feature_dim=feature_dim, obj_feature_dim=obj_feature_dim) 131 | 132 | def forward(self, x, features=None): 133 | if self.task == 'affordance': 134 | return self.network(x) 135 | else: 136 | return self.network(x, features) 137 | 138 | # For test purposes only 139 | def main(): 140 | normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], 141 | std=[0.229, 0.224, 0.225]) 142 | transform = torchvision.transforms.Compose([ 143 | torchvision.transforms.ToTensor(), 144 | normalize, 145 | ]) 146 | 147 | if __name__ == '__main__': 148 | main() -------------------------------------------------------------------------------- /datasets/CAD/cad.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/9/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import os 10 | import pickle 11 | import numpy as np 12 | import torch 13 | import torch.utils.data 14 | from datasets.CAD.metadata import CAD_METADATA 15 | class CAD(torch.utils.data.Dataset): 16 | def __init__(self, paths, mode, task, subsample=None): 17 | super(CAD, self).__init__() 18 | self.paths = paths.inter_root 19 | with open(os.path.join(self.paths, 'features', 'cad_{}.p'.format(mode)), 'rb') as f: 20 | self.data = pickle.load(f, encoding='latin1') 21 | self.sequence_ids = list() 22 | for key in self.data.keys(): 23 | self.sequence_ids.append(key) 24 | self.task = task 25 | self.mode = mode 26 | 27 | def __getitem__(self, index): 28 | sequence_id = self.sequence_ids[index] 29 | if self.task == 'affordance': 30 | return self.data[sequence_id]['u_features'], self.data[sequence_id]['u_labels'],\ 31 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \ 32 | self.data[sequence_id]['activity'], sequence_id, None 33 | else: 34 | return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \ 35 | self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'],\ 36 | self.data[sequence_id]['activity'], sequence_id, None 37 | 38 | def __len__(self): 39 | return len(self.sequence_ids) 40 | 41 | @staticmethod 42 | def collate_fn(batch): 43 | metadata = CAD_METADATA() 44 | features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0] 45 | feature_size = features[0].shape[1] 46 | label_num = len(metadata.subactivities) 47 | 48 | max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch])) 49 | features_batch = np.zeros((max_seq_length, len(batch), feature_size)) 50 | labels_batch = np.ones((max_seq_length, len(batch))) * -1 51 | probs_batch = np.zeros((max_seq_length, len(batch), label_num)) 52 | total_lengths = np.zeros(len(batch)) 53 | ctc_labels = list() 54 | ctc_lengths = list() 55 | activities = list() 56 | sequence_ids = list() 57 | 58 | for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch): 59 | current_len = 0 60 | ctc_labels.append(labels) 61 | ctc_lengths.append(len(labels)) 62 | for seg_i, feature in enumerate(features): 63 | features_batch[current_len:current_len + seg_lengths[seg_i], batch_i, :] = np.repeat(features[seg_i], 64 | seg_lengths[seg_i], 65 | axis=0) 66 | labels_batch[current_len:current_len + seg_lengths[seg_i], batch_i] = labels[seg_i] 67 | probs_batch[current_len:current_len + seg_lengths[seg_i], batch_i, labels[seg_i]] = 1.0 68 | current_len += seg_lengths[seg_i] 69 | total_lengths[batch_i] = total_length 70 | activities.append(activity) 71 | sequence_ids.append(sequence_id) 72 | 73 | features_batch = torch.FloatTensor(features_batch) 74 | labels_batch = torch.LongTensor(labels_batch) 75 | probs_batch = torch.FloatTensor(probs_batch) 76 | total_lengths = torch.IntTensor(total_lengths) 77 | ctc_lengths = torch.IntTensor(ctc_lengths) 78 | 79 | return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None 80 | 81 | # @staticmethod 82 | # def collate_fn(batch): 83 | # metadata = CAD_METADATA() 84 | # features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0] 85 | # feature_size = features[0].shape[1] 86 | # label_num = len(metadata.subactivities) 87 | # 88 | # max_seq_length = len(labels) 89 | # features_batch = np.zeros((max_seq_length, len(batch), feature_size)) 90 | # labels_batch = np.ones((max_seq_length, len(batch))) * -1 91 | # probs_batch = np.zeros((max_seq_length, len(batch), label_num)) 92 | # total_lengths = np.zeros(len(batch)) 93 | # ctc_labels = list() 94 | # ctc_lengths = list() 95 | # activities = list() 96 | # sequence_ids = list() 97 | # 98 | # for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate( 99 | # batch): 100 | # current_len = 0 101 | # ctc_labels.append(labels) 102 | # ctc_lengths.append(len(labels)) 103 | # for seg_i, feature in enumerate(features): 104 | # features_batch[current_len:current_len + seg_lengths[seg_i], batch_i, :] = np.repeat(features[seg_i], 105 | # 1, axis=0) 106 | # labels_batch[current_len:current_len + 1, batch_i] = labels[seg_i] 107 | # probs_batch[current_len:current_len + 1, batch_i, labels[seg_i]] = 1.0 108 | # current_len += 1 109 | # total_lengths[batch_i] = total_length 110 | # activities.append(activity) 111 | # sequence_ids.append(sequence_id) 112 | # 113 | # features_batch = torch.FloatTensor(features_batch) 114 | # labels_batch = torch.LongTensor(labels_batch) 115 | # probs_batch = torch.FloatTensor(probs_batch) 116 | # total_lengths = torch.IntTensor(total_lengths) 117 | # ctc_lengths = torch.IntTensor(ctc_lengths) 118 | # 119 | # return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None -------------------------------------------------------------------------------- /models/parser/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jan 25, 2018 3 | 4 | @author: Siyuan Qi 5 | 6 | Description of the file. 7 | 8 | """ 9 | 10 | import os 11 | import time 12 | 13 | import nltk 14 | import numpy as np 15 | 16 | import config 17 | import datasets 18 | import models.parser.grammarutils as grammarutils 19 | import models.parser.GEP as generalizedearley 20 | import models.parser.GEP_online as generalizedearley_online 21 | 22 | 23 | def parsing_examples(): 24 | rules = list() 25 | rules.append("GAMMA -> R [1.0]") 26 | rules.append("R -> N O N [0.2]") 27 | rules.append("R -> N [0.3]") 28 | rules.append("R -> '0' [0.5]") 29 | rules.append("N -> '0' [0.1]") 30 | rules.append("N -> '1' [0.9]") 31 | rules.append("O -> '+' [0.4]") 32 | rules.append("O -> '-' [0.6]") 33 | 34 | grammar_rules = grammarutils.get_pcfg(rules) 35 | grammar = nltk.PCFG.fromstring(grammar_rules) 36 | 37 | sentence = '0' 38 | tokens = sentence.split(' ') 39 | 40 | # earley_parser = nltk.EarleyChartParser(grammar, trace=1) 41 | # e_chart = earley_parser.chart_parse(tokens) 42 | 43 | symbols = ['0', '1', '+', '-'] 44 | symbol_index = dict() 45 | for s in symbols: 46 | symbol_index[s] = symbols.index(s) 47 | grammar_rules = grammarutils.get_pcfg(rules, index=True, mapping=None) 48 | grammar = nltk.PCFG.fromstring(grammar_rules) 49 | 50 | # grammar_file = os.path.join('/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/grammar/cleaning_objects.pcfg') 51 | # import datasets.CAD.metadata as metadata 52 | # grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=metadata.CAD_METADATA().action_index) 53 | # symbols = metadata.CAD_METADATA().actions 54 | 55 | classifier_output = [ 56 | [0.7, 0.1, 0.1, 0.1], 57 | [0.67, 0.11, 0.20, 0.02], 58 | [0.25, 0.25, 0.49, 0.01], 59 | [0.01, 0.14, 0.49, 0.36], 60 | [0.04, 0.20, 0.7, 0.06], 61 | [0.15, 0.6, 0.20, 0.05], 62 | [0.01, 0.7, 0.19, 0.1], 63 | [0.1, 0.7, 0.1, 0.1], 64 | [0.14, 0.63, 0.1, 0.13], 65 | ] 66 | classifier_output = np.array(classifier_output) 67 | 68 | # # np.random.seed(0) 69 | # classifier_output = np.random.rand(2000, 10) 70 | # classifier_output = classifier_output / np.sum(classifier_output, axis=1)[:, None] # Normalize to probability 71 | 72 | # # print(classifier_output.shape) 73 | gen_earley_parser = generalizedearley.GeneralizedEarley(grammar, mapping=symbol_index) 74 | start_time = time.time() 75 | best_string, prob = gen_earley_parser.parse(classifier_output) 76 | gen_earley_parser.cached_prob_tex() 77 | print('classic gep', best_string, prob, '{}s'.format(time.time()-start_time)) 78 | 79 | print('================================================================') 80 | print('================================================================') 81 | # Online GEP 82 | gen_earley_parser_online = generalizedearley_online.GeneralizedEarley(grammar, len(symbols), mapping=symbol_index) 83 | start_time = time.time() 84 | for t in range(classifier_output.shape[0]): 85 | gen_earley_parser_online.update_prob(classifier_output[t, :]) 86 | best_string, prob = gen_earley_parser_online.parse() 87 | gen_earley_parser_online.future_predict() 88 | print('online gep', best_string, prob) 89 | print('{}s'.format(time.time()-start_time)) 90 | exit() 91 | 92 | # for t : {update gep -> parse} 93 | best_string, prob = gen_earley_parser.parse(classifier_output) 94 | prob_sum = 0 95 | for key, data in gen_earley_parser._cached_log_prob.items(): 96 | print('-----------------------------------------------------------') 97 | print('String: {}'.format(key)) 98 | print('Parsing Probability: {}'.format(np.exp(data[-2]))) 99 | print('Grammar Prefix Probability: {}'.format(gen_earley_parser._cached_grammar_prob[key])) 100 | # print(best_string, np.exp(prob)) 101 | 102 | 103 | def test_generalized_earley(grammar, classifier_output): 104 | gen_earley_parser = generalizedearley.GeneralizedEarley(grammar) 105 | best_string, prob = gen_earley_parser.parse(classifier_output) 106 | print('best_string with prob {:.3f}:'.format(prob), best_string) 107 | print(gen_earley_parser.compute_labels()) 108 | print(np.argmax(classifier_output, axis=1)) 109 | 110 | 111 | def test_earley(grammar, tokens): 112 | earley_parser = nltk.EarleyChartParser(grammar, trace=1) 113 | e_chart = earley_parser.chart_parse(tokens) 114 | for edge in e_chart.edges(): 115 | print(edge, edge.end()) 116 | 117 | print(grammarutils.earley_predict(grammar, tokens)) 118 | 119 | 120 | def test_valid(): 121 | paths = config.Paths() 122 | grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', 'stacking_objects.pcfg') 123 | 124 | # sentence = 'null reaching moving placing' 125 | # grammar = grammarutils.read_grammar(grammar_file, index=False) 126 | # test_earley(grammar, sentence.split()) 127 | 128 | sentence = 'null reaching' 129 | tokens = sentence.split() 130 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) 131 | seg_length = 15 132 | correct_prob = 0.8 133 | classifier_output = np.ones((seg_length*2, 10)) * 1e-10 134 | classifier_output[:seg_length, datasets.cad_metadata.subactivity_index[tokens[0]]] = correct_prob 135 | classifier_output[seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]]] = correct_prob 136 | 137 | classifier_output[:seg_length, datasets.cad_metadata.subactivity_index[tokens[0]]+1] = 1 - correct_prob 138 | classifier_output[seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]]+1] = 1 - correct_prob 139 | test_generalized_earley(grammar, classifier_output) 140 | 141 | 142 | def test_time(): 143 | paths = config.Paths() 144 | start_time = time.time() 145 | np.random.seed(int(start_time)) 146 | classifier_output = np.random.rand(100000, 10) 147 | classifier_output = classifier_output / np.sum(classifier_output, axis=1)[:, None] # Normalize to probability 148 | for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')): 149 | if not pcfg.endswith('.pcfg'): 150 | continue 151 | grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg) 152 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) 153 | test_generalized_earley(grammar, classifier_output) 154 | print('Time elapsed: {}s'.format(time.time() - start_time)) 155 | 156 | 157 | def test_grammar(): 158 | paths = config.Paths() 159 | for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')): 160 | if not pcfg.endswith('.pcfg'): 161 | continue 162 | grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg) 163 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) 164 | corpus_file = os.path.join(paths.tmp_root, 'corpus', 'cad', pcfg.replace('pcfg', 'txt')) 165 | with open(corpus_file, 'r') as f: 166 | for line in f: 167 | tokens = [str(datasets.cad_metadata.subactivity_index[token]) for token in line.strip(' *#\n').split(' ')] 168 | earley_parser = nltk.EarleyChartParser(grammar, trace=0) 169 | e_chart = earley_parser.chart_parse(tokens) 170 | print(e_chart.edges()[-1]) 171 | 172 | 173 | def visualize_grammar(): 174 | paths = config.Paths() 175 | dataset_name = 'wnp' 176 | for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', dataset_name)): 177 | if not pcfg.endswith('.pcfg'): 178 | continue 179 | grammar_file = os.path.join(paths.tmp_root, 'grammar', dataset_name, pcfg) 180 | grammar = grammarutils.read_grammar(grammar_file, insert=False) 181 | dot_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.dot')) 182 | pdf_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.pdf')) 183 | grammarutils.grammar_to_dot(grammar, dot_filename) 184 | os.system('dot -Tpdf {} -o {}'.format(dot_filename, pdf_filename)) 185 | 186 | 187 | def main(): 188 | # test_grammar() 189 | # test_valid() 190 | # test_time() 191 | # visualize_grammar() 192 | parsing_examples() 193 | 194 | 195 | if __name__ == '__main__': 196 | main() -------------------------------------------------------------------------------- /experiments/GEP/gep_ablation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/11/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | # System imports 11 | import sys 12 | sys.path.append('/mnt/hdd/home/baoxiong/Projects/TPAMI2019/src') 13 | 14 | import os 15 | import argparse 16 | import json 17 | from tqdm import tqdm 18 | 19 | # Libraries 20 | import numpy as np 21 | import torch 22 | import torch.nn.functional as F 23 | 24 | # Local imports 25 | import models.parser.GEP_adj as GEP 26 | import models.parser.grammarutils as grammarutils 27 | import utils.logutils as logutils 28 | import utils.evalutils as evalutils 29 | import utils.vizutils as vizutils 30 | import experiments.exp_config as exp_config 31 | 32 | def inference(model_outputs, activities, sequence_ids, args): 33 | model_output_probs = torch.nn.Softmax(dim=-1)(model_outputs) 34 | model_output_probs = model_output_probs.data.cpu().numpy() 35 | batch_earley_pred_labels = list() 36 | batch_tokens = list() 37 | batch_seg_pos = list() 38 | for batch_i in range(model_outputs.size()[1]): 39 | grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i]+'.pcfg') 40 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.action_index) 41 | gen_earley_parser = GEP.GeneralizedEarley(grammar, args.prior) 42 | best_string, prob = gen_earley_parser.parse(model_output_probs[:, batch_i, :]) 43 | # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob))) 44 | 45 | # Back trace to get labels of the entire sequence 46 | earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels() 47 | batch_earley_pred_labels.append(earley_pred_labels) 48 | batch_tokens.append(tokens) 49 | batch_seg_pos.append(seg_pos) 50 | 51 | _, nn_pred_labels = torch.max(model_outputs, dim=2) 52 | 53 | return nn_pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos 54 | 55 | def validate(data_loader, model, args): 56 | all_gt_detections = list() 57 | all_detections = list() 58 | 59 | task_acc_ratio = logutils.AverageMeter() 60 | task_macro_prec = logutils.AverageMeter() 61 | task_macro_rec = logutils.AverageMeter() 62 | task_macro_f1 = logutils.AverageMeter() 63 | task_acc_ratio_nn = logutils.AverageMeter() 64 | 65 | for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')): 66 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit 67 | epsilon = torch.log(torch.tensor(1e-4)) 68 | maximum = torch.log(torch.tensor(1 - 1e-4 * (len(args.metadata.actions) - 1))) 69 | model_outputs = torch.ones((features_batch.size(0), features_batch.size(1), len(args.metadata.actions))) * epsilon 70 | model_outputs = model_outputs.scatter_(2, labels_batch.type(torch.LongTensor).unsqueeze(1), maximum) 71 | model_outputs = F.softmax(model_outputs / args.temperature, dim=-1) 72 | # model_outputs = torch.ones((features_batch.size(0), features_batch.size(1), len(args.metadata.actions))) / len(args.metadata.actions) 73 | 74 | # Inference 75 | tqdm.write('[{}] Inference'.format(sequence_ids[0])) 76 | _, nn_pred_labels = torch.max(model_outputs, dim=-1) 77 | nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist() 78 | pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args) 79 | # Evaluation 80 | # Frame-wise detection 81 | detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()] 82 | if args.subsample != 1: 83 | all_total_labels, all_total_lengths = additional 84 | gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist() 85 | video_length = len(gt_detections) 86 | 87 | detections = evalutils.upsample(detections, freq=args.subsample, length=video_length) 88 | nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length) 89 | else: 90 | gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist() 91 | detections = detections[:total_lengths[0]] 92 | video_length = len(gt_detections) 93 | 94 | # vizutils.plot_segmentation([gt_detections, nn_detections, detections], video_length, 95 | # filename=os.path.join(args.paths.visualize_root, '{}.jpg'.format(sequence_ids[0])), border=False) 96 | 97 | micro_prec = logutils.compute_accuracy(gt_detections, detections) 98 | micro_prec_nn = logutils.compute_accuracy(gt_detections, nn_detections) 99 | macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_detections, detections, metric='macro') 100 | task_acc_ratio.update(micro_prec, video_length) 101 | task_acc_ratio_nn.update(micro_prec_nn, video_length) 102 | task_macro_prec.update(macro_prec, video_length) 103 | task_macro_rec.update(macro_rec, video_length) 104 | task_macro_f1.update(macro_f1, video_length) 105 | 106 | all_gt_detections.extend(gt_detections) 107 | all_detections.extend(detections) 108 | 109 | micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections) 110 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, 111 | metric='macro') 112 | tqdm.write('[Evaluation] Micro Prec: {}\t' 113 | 'Macro Precision: {}\t' 114 | 'Macro Recall: {}\t' 115 | 'Macro F-score: {}'.format(micro_prec, macro_prec, macro_recall, macro_fscore)) 116 | 117 | micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections) 118 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, metric='macro') 119 | tqdm.write('Detection:\n' 120 | 'Micro Prec: {}\t' 121 | 'NN Prec:{}\t' 122 | 'Macro Precision: {}\t' 123 | 'Macro Recall: {}\t' 124 | 'Macro F-score: {}\n\n'.format(micro_prec, task_acc_ratio_nn.avg, macro_prec, macro_recall, macro_fscore)) 125 | 126 | def main(args): 127 | exp_info = exp_config.Experiment(args.dataset) 128 | paths = exp_info.paths 129 | args.paths = paths 130 | args.metadata = exp_info.metadata 131 | 132 | np.random.seed(args.seed) 133 | torch.manual_seed(args.seed) 134 | 135 | args.batch_size = 1 136 | feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True) 137 | 138 | validate(test_loader, None, args=args) 139 | 140 | 141 | def parse_args(): 142 | parser = argparse.ArgumentParser() 143 | def str2bool(v): 144 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 145 | return True 146 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 147 | return False 148 | else: 149 | return argparse.ArgumentTypeError('Unsupported value encountered') 150 | parser.add_argument('--dataset', default='CAD', type=str, 151 | help='indicating which dataset to use') 152 | parser.add_argument('--seed', default=12345, type=int, 153 | help='Default seed for all random generators') 154 | parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, 155 | help='Option flag for using cuda trining (default: True)') 156 | parser.add_argument('--workers', default=1, type=int, metavar='N', 157 | help='number of data loading workers (default: 1)') 158 | parser.add_argument('--task', default='activity', type=str, 159 | help='Default working task activity/affordance') 160 | parser.add_argument('--epochs', default=100, type=int, metavar='N', 161 | help='number of epochs for training (default: 100)') 162 | parser.add_argument('--batch_size', default=1, type=int, metavar='N', 163 | help='batch size for training (default: 1)') 164 | parser.add_argument('--subsample', default=1, type=int, 165 | help='subsample frequency for Breakfast dataset') 166 | parser.add_argument('--temperature', default=1.0, type=float, 167 | help='The temperature used for ablative study') 168 | parser.add_argument('--prior', default=False, type=str2bool, 169 | help='Flag indicating prior usage (default: False)') 170 | args = parser.parse_args() 171 | return args 172 | 173 | 174 | if __name__ == '__main__': 175 | args = parse_args() 176 | main(args) 177 | -------------------------------------------------------------------------------- /utils/vizutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Feb 28, 2017 3 | 4 | @author: Siyuan Qi 5 | 6 | Description of the file. 7 | 8 | """ 9 | 10 | import os 11 | import itertools 12 | import pickle 13 | 14 | import numpy as np 15 | import matplotlib 16 | import matplotlib.pyplot as plt 17 | import sklearn.metrics 18 | # import tabulate 19 | 20 | import config 21 | import datasets.VCLA_GAZE.metadata as metadata 22 | 23 | 24 | def plot_segmentation(input_labels_list, endframe, vmax=None, filename=None, border=True, cmap=plt.get_cmap('gist_rainbow')): 25 | plt_idx = 0 26 | aspect_ratio = 60 27 | fig = plt.figure(figsize=(28, 3)) 28 | for input_labels in input_labels_list: 29 | seg_image = np.empty((int(endframe/aspect_ratio), endframe)) 30 | 31 | for frame in range(endframe): 32 | seg_image[:, frame] = input_labels[frame] 33 | 34 | plt_idx += 1 35 | ax = plt.subplot(len(input_labels_list), 1, plt_idx) 36 | if not border: 37 | ax.axis('off') 38 | if vmax: 39 | ax.imshow(seg_image, vmin=0, vmax=vmax, cmap=cmap) 40 | else: 41 | ax.imshow(seg_image, cmap=cmap) 42 | ax.set_ylabel('LSTM') 43 | 44 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 45 | if not filename: 46 | plt.show() 47 | else: 48 | plt.savefig(filename) 49 | plt.close() 50 | 51 | 52 | def visualize_tpg_labeling(gt_subactivity, gt_affordance, tpg, obj_num, end_frame): 53 | # Visualization of segmentation and labeling results for subactivity and affordance 54 | start_frame = tpg.terminals[0].start_frame 55 | end_frame = np.min([gt_subactivity.shape[0], tpg.terminals[-1].end_frame-start_frame, end_frame]) 56 | # Get labels for every frame 57 | subactivity_lables = np.empty(end_frame, dtype=int) 58 | affordance_labels = np.empty((obj_num, end_frame), dtype=int) 59 | for spg in tpg.terminals: 60 | # Note: a spg spans [spg.start_frame, spg.end_frame], hence need to +1 in range() 61 | for frame in range(spg.start_frame, spg.end_frame+1): 62 | # print frame, spg.subactivity, metadata.subactivities[spg.subactivity] 63 | if frame >= end_frame + start_frame: 64 | break 65 | subactivity_lables[frame-start_frame] = spg.subactivity 66 | affordance_labels[:, frame-start_frame] = spg.affordance 67 | 68 | # Add labels to the plot list 69 | plot_labels = [gt_subactivity[:end_frame], subactivity_lables, (gt_subactivity[:end_frame]-subactivity_lables) == 0] 70 | for o in range(obj_num): 71 | plot_labels.append(gt_affordance[o, :end_frame]) 72 | plot_labels.append(affordance_labels[o, :]) 73 | plot_labels.append((gt_affordance[o, :end_frame]-affordance_labels[o, :]) == 0) 74 | plot_segmentation(plot_labels, end_frame) 75 | 76 | 77 | def plot_confusion_matrix(cm, classes, filename=None, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): 78 | """ 79 | This function prints and plots the confusion matrix. 80 | Normalization can be applied by setting `normalize=True`. 81 | """ 82 | if normalize: 83 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 84 | print("Normalized confusion matrix") 85 | else: 86 | print('Confusion matrix, without normalization') 87 | thresh = cm.max() / 2. 88 | 89 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 90 | plt.title(title) 91 | # plt.colorbar() 92 | tick_marks = np.arange(len(classes)) 93 | plt.xticks(tick_marks, classes, rotation=45, ha='right') 94 | plt.yticks(tick_marks, classes) 95 | 96 | ax = plt.gca() 97 | ax.tick_params(axis=u'both', which=u'both', length=0) 98 | # matplotlib.rcParams.update({'font.size': 15}) 99 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 100 | if cm[i, j] != 0: 101 | plt.text(j, i, '{0:.2f}'.format(cm[i, j]), verticalalignment='center', horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") 102 | 103 | plt.tight_layout() 104 | # plt.ylabel('True label') 105 | # plt.xlabel('Predicted label') 106 | if not filename: 107 | plt.show() 108 | else: 109 | plt.savefig(filename) 110 | plt.close() 111 | 112 | 113 | def save_results(paths, results): 114 | result_folder = os.path.join(paths.tmp_root, 'results') 115 | if not os.path.exists(result_folder): 116 | os.makedirs(result_folder) 117 | os.makedirs(os.path.join(result_folder, 'figs')) 118 | 119 | with open(os.path.join(result_folder, 'labels.p'), 'wb') as f: 120 | pickle.dump(results, f) 121 | 122 | 123 | def load_results(paths): 124 | with open(os.path.join(paths.tmp_root, 'results', 'labels.p'), 'rb') as f: 125 | results = pickle.load(f) 126 | return results 127 | 128 | ''''' 129 | def print_latex_table(data, row_labels, col_labels): 130 | data = data * 100 131 | row_labels = np.array(row_labels) 132 | row_labels = np.reshape(row_labels, [row_labels.shape[0], 1]) 133 | data = np.hstack((row_labels, data)) 134 | print 135 | print(tabulate.tabulate(data, tablefmt="latex", floatfmt=".1f", numalign="center", headers=col_labels)) 136 | ''''' 137 | 138 | def analyze_results(paths): 139 | def get_f1_score(precision, recall): 140 | return 2 * (precision * recall) / (precision + recall) 141 | 142 | def format_table(predict_frame): 143 | data = np.empty((2, 8)) 144 | data[0, 0:3] = 1.0/len(metadata.subactivities[:-1]) 145 | data[0, 3] = get_f1_score(data[0, 0], data[0, 0]) 146 | data[0, 4:7] = 1.0/len(metadata.affordances) 147 | data[0, 7] = get_f1_score(data[0, 4], data[0, 4]) 148 | 149 | precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities)-1), average='micro') 150 | data[1, 0] = precision 151 | precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities)-1), average='macro') 152 | data[1, 1] = precision 153 | data[1, 2] = recall 154 | data[1, 3] = get_f1_score(precision, recall) 155 | 156 | precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances)), average='micro') 157 | data[1, 4] = precision 158 | precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances)), average='macro') 159 | data[1, 5] = precision 160 | data[1, 6] = recall 161 | data[1, 7] = get_f1_score(precision, recall) 162 | 163 | print_latex_table(data, methods, metrics) 164 | 165 | # ====================== Function starts here ====================== 166 | # fig_folder = os.path.join(paths.tmp_root, 'results', 'figs') 167 | fig_folder = os.path.join(paths.project_root, 'fig', 'raw') 168 | if not os.path.exists(fig_folder): 169 | os.makedirs(fig_folder) 170 | 171 | seg_gt_s, seg_pred_s, seg_gt_u, seg_pred_u, gt_s, pred_s, gt_u, pred_u, gt_e, pred_e = load_results(paths) 172 | 173 | methods = ['chance', 'ours'] 174 | metrics = ['P/R', 'Prec.', 'Recall', 'F1-score', 'P/R', 'Prec.', 'Recall', 'F1-score'] 175 | # Evaluation 176 | # TODO: see if need to exclude "null" class 177 | # Online detection 178 | predict_frame = 0 179 | format_table(predict_frame) 180 | 181 | # Future detection 182 | predict_frame = 40 183 | for i in range(predict_frame): 184 | gt_s[predict_frame].extend(gt_s[i]) 185 | pred_s[predict_frame].extend(pred_s[i]) 186 | gt_u[predict_frame].extend(gt_u[i]) 187 | pred_u[predict_frame].extend(pred_u[i]) 188 | format_table(predict_frame) 189 | 190 | # Plot confusion matrices 191 | predict_frame = 0 192 | confusion_matrix = sklearn.metrics.confusion_matrix(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances))) 193 | plot_confusion_matrix(confusion_matrix, metadata.affordances, normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_affordance.pdf')) 194 | 195 | confusion_matrix = sklearn.metrics.confusion_matrix(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities) - 1)) 196 | plot_confusion_matrix(confusion_matrix, metadata.subactivities[:-1], normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_subactivity.pdf')) 197 | 198 | confusion_matrix = sklearn.metrics.confusion_matrix(gt_e, pred_e, labels=range(len(metadata.activities))) 199 | plot_confusion_matrix(confusion_matrix, metadata.activities, normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_event.pdf')) 200 | 201 | 202 | def main(): 203 | paths = config.Paths() 204 | analyze_results(paths) 205 | pass 206 | 207 | 208 | if __name__ == '__main__': 209 | main() 210 | -------------------------------------------------------------------------------- /experiments/GEP/gep_seg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/21/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | 11 | # System imports 12 | import sys 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src') 14 | 15 | import os 16 | import argparse 17 | import json 18 | from tqdm import tqdm 19 | 20 | # Libraries 21 | import numpy as np 22 | import torch 23 | 24 | # Local imports 25 | from models.BiLSTM import BiLSTM 26 | from models.LSTM_pred import LSTM_Pred 27 | import models.parser.GEP_online as GEP 28 | import models.parser.grammarutils as grammarutils 29 | import utils.logutils as logutils 30 | import utils.evalutils as evalutils 31 | import experiments.exp_config as exp_config 32 | 33 | def predict(detection_outputs, activities, total_lengths, args): 34 | detection_outputs_probs = torch.nn.Softmax(dim=-1)(detection_outputs) 35 | detection_outputs_probs = detection_outputs_probs.data.cpu().numpy() 36 | class_num = detection_outputs_probs.shape[2] 37 | pred_probs = np.empty_like(detection_outputs_probs) 38 | for batch_i in range(detection_outputs_probs.shape[1]): 39 | grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i] + '.pcfg') 40 | grammar = grammarutils.read_grammar(grammar_file, index=True) 41 | gen_earley_parser = GEP.GeneralizedEarley(grammar, class_num, mapping=args.metadata.action_index) 42 | for frame in range(total_lengths[batch_i]): 43 | gen_earley_parser.update_prob(detection_outputs_probs[frame, batch_i, :]) 44 | gen_earley_parser.parse() 45 | pred_probs[frame, batch_i, :] = gen_earley_parser.future_predict(args.epsilon) 46 | return pred_probs 47 | 48 | def get_gt_pred(labels, total_lengths): 49 | all_gt_pred_labels = list() 50 | for i_batch in range(labels.size(1)): 51 | gt_pred_labels = list() 52 | seg_length = int(total_lengths[i_batch]) 53 | current_label = int(labels[0, i_batch]) 54 | for f in range(seg_length): 55 | if int(labels[f, i_batch]) != current_label: 56 | current_label = int(labels[f, i_batch]) 57 | gt_pred_labels.extend([current_label for _ in range(f-len(gt_pred_labels)-1)]) 58 | gt_pred_labels.extend([int(labels[seg_length-1, i_batch]) for _ in range(seg_length-len(gt_pred_labels))]) 59 | all_gt_pred_labels.extend(gt_pred_labels) 60 | return all_gt_pred_labels 61 | 62 | def validate(data_loader, detection_model, prediction_model, args): 63 | all_gt_segment_predictions = list() 64 | all_segment_predictions = list() 65 | all_nn_segment_predictions = list() 66 | 67 | task_acc_ratio = logutils.AverageMeter() 68 | task_macro_prec = logutils.AverageMeter() 69 | task_macro_rec = logutils.AverageMeter() 70 | task_macro_f1 = logutils.AverageMeter() 71 | task_acc_ratio_nn = logutils.AverageMeter() 72 | 73 | # switch to evaluate mode 74 | detection_model.eval() 75 | prediction_model.eval() 76 | 77 | for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')): 78 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit 79 | 80 | prediction_output = prediction_model(features_batch) 81 | detection_output = detection_model(features_batch) 82 | 83 | pred_probs = predict(detection_output, activities, total_lengths, args) 84 | pred_labels = np.argmax(pred_probs * prediction_output.data.cpu().numpy(), axis=-1).flatten().tolist() 85 | 86 | _, nn_pred_labels = torch.max(prediction_output, dim=-1) 87 | gt_pred_labels = get_gt_pred(labels_batch, total_lengths) 88 | video_length = len(gt_pred_labels) 89 | nn_pred_labels = nn_pred_labels.cpu().data.numpy().flatten().tolist() 90 | 91 | micro_prec = logutils.compute_accuracy(gt_pred_labels, pred_labels) 92 | nn_micro_prec = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels) 93 | macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels, metric='macro') 94 | task_acc_ratio.update(micro_prec, video_length) 95 | task_acc_ratio_nn.update(nn_micro_prec, video_length) 96 | task_macro_prec.update(macro_prec, video_length) 97 | task_macro_rec.update(macro_rec, video_length) 98 | task_macro_f1.update(macro_f1, video_length) 99 | 100 | all_gt_segment_predictions.extend(gt_pred_labels) 101 | all_segment_predictions.extend(pred_labels) 102 | all_nn_segment_predictions.extend(nn_pred_labels) 103 | 104 | tqdm.write('Task {} {} Batch [{}/{}]\t' 105 | 'Acc {top1.val:.4f} ({top1.avg:.4f})\t' 106 | 'NN Acc {nn.val:.4f} ({nn.avg:.4f})\t' 107 | 'Prec {prec.val:.4f} ({prec.avg:.4f})\t' 108 | 'Recall {recall.val:.4f} ({recall.avg:.4f})\t' 109 | 'F1 {f1.val:.4f} ({f1.avg:.4f})'.format( 110 | args.task, 'test', batch_idx, len(data_loader), top1=task_acc_ratio, nn=task_acc_ratio_nn, 111 | prec=task_macro_prec, recall=task_macro_rec, f1=task_macro_f1)) 112 | 113 | micro_prec = logutils.compute_accuracy(all_gt_segment_predictions, all_segment_predictions) 114 | nn_micro_prec = logutils.compute_accuracy(all_gt_segment_predictions, all_nn_segment_predictions) 115 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_segment_predictions, all_segment_predictions, metric='weighted') 116 | tqdm.write('[Evaluation] Micro Prec: {}\t' 117 | 'NN Micro Prec: {}\t' 118 | 'Macro Precision: {}\t' 119 | 'Macro Recall: {}\t' 120 | 'Macro F-score: {}'.format(micro_prec, nn_micro_prec, macro_prec, macro_recall, macro_fscore)) 121 | 122 | def main(args): 123 | exp_info = exp_config.Experiment(args.dataset) 124 | paths = exp_info.paths 125 | args.paths = paths 126 | args.metadata = exp_info.metadata 127 | 128 | np.random.seed(args.seed) 129 | torch.manual_seed(args.seed) 130 | 131 | batch_size = args.batch_size 132 | args.batch_size = 1 133 | feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True) 134 | label_num = exp_info.get_label_num(args) 135 | 136 | hidden_size = 256 137 | hidden_layers = 2 138 | args.save_path = os.path.join(paths.inter_root, 'likelihood', args.task) 139 | args.resume = os.path.join(paths.checkpoint_root, 140 | 'detection_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs, 141 | args.lr, args.batch_size, 142 | args.lr_decay, 143 | 1 if not args.subsample else args.subsample, 144 | args.dropout_rate)) 145 | detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) 146 | detection_model = torch.nn.DataParallel(detection_model) 147 | logutils.load_checkpoint(args, detection_model) 148 | 149 | args.resume = os.path.join(paths.checkpoint_root, 150 | 'segment_prediction_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs, 151 | args.lr, args.batch_size, 152 | args.lr_decay, 153 | 1 if not args.subsample else args.subsample, 154 | args.dropout_rate)) 155 | prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) 156 | prediction_model = torch.nn.DataParallel(prediction_model) 157 | logutils.load_checkpoint(args, prediction_model) 158 | 159 | validate(test_loader, detection_model, prediction_model, args=args) 160 | 161 | if __name__ == '__main__': 162 | parser = argparse.ArgumentParser() 163 | parser.add_argument('--dataset', default='VCLA_GAZE', type=str, 164 | help='indicating which dataset to use') 165 | parser.add_argument('--seed', default=12345, type=int, 166 | help='Default seed for all random generators') 167 | parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, 168 | help='Option flag for using cuda trining (default: True)') 169 | parser.add_argument('--workers', default=1, type=int, metavar='N', 170 | help='number of data loading workers (default: 1)') 171 | parser.add_argument('--task', default='activity', type=str, 172 | help='Default working task activity/affordance') 173 | parser.add_argument('--epochs', default=50, type=int, metavar='N', 174 | help='number of epochs for training (default: 100)') 175 | parser.add_argument('--batch_size', default=1, type=int, metavar='N', 176 | help='batch size for training (default: 1)') 177 | parser.add_argument('--lr', default=1e-4, type=float, 178 | help='learning rate for the feature extraction process (default: 1e-3)') 179 | parser.add_argument('--lr_decay', default=1., type=float, 180 | help='decay rate of learning rate (default: between 0.01 and 1)') 181 | parser.add_argument('--lr_freq', default=25, type=float, 182 | help='learing rate decay frequency while updating') 183 | parser.add_argument('--subsample', default=None, type=int, 184 | help='subsample frequency for Breakfast dataset') 185 | parser.add_argument('--dropout_rate', default=0, type=float, 186 | help='Dropout rate for LSTM training') 187 | parser.add_argument('--epsilon', default=1e-10, type=float, 188 | help='Balance between top-down bottom-up prediction') 189 | args = parser.parse_args() 190 | main(args) 191 | -------------------------------------------------------------------------------- /experiments/GEP/gep_pred_parse_prediction.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/21/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | 11 | # System imports 12 | import sys 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src') 14 | 15 | import os 16 | import argparse 17 | import json 18 | from tqdm import tqdm 19 | 20 | # Libraries 21 | import numpy as np 22 | import torch 23 | 24 | # Local imports 25 | from models.LSTM_pred import LSTM_Pred 26 | from models.BiLSTM import BiLSTM 27 | from models.MLP import MLP 28 | import models.parser.GEP_old as GEP 29 | import models.parser.grammarutils as grammarutils 30 | import utils.logutils as logutils 31 | import experiments.exp_config as exp_config 32 | 33 | def inference(prob_mat, activity, sequence_id, args): 34 | grammar_file = os.path.join(args.paths.grammar_root, activity+'.pcfg') 35 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.subactivity_index) 36 | gen_earley_parser = GEP.GeneralizedEarley(grammar) 37 | best_string, prob = gen_earley_parser.parse(prob_mat) 38 | # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob))) 39 | 40 | # Back trace to get labels of the entire sequence 41 | earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels() 42 | nn_pred_labels = np.argmax(prob_mat, axis=1) 43 | return nn_pred_labels, earley_pred_labels, tokens, seg_pos 44 | 45 | def predict(): 46 | return 47 | 48 | def validate(data_loader, detection_model, prediction_model, args): 49 | all_gt_frame_predictions = list() 50 | all_frame_predictions = list() 51 | all_nn_frame_predictions = list() 52 | 53 | task_acc_ratio = logutils.AverageMeter() 54 | task_acc_ratio_nn = logutils.AverageMeter() 55 | 56 | # switch to evaluate mode 57 | detection_model.eval() 58 | prediction_model.eval() 59 | 60 | for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')): 61 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit 62 | detection_likelihood = torch.nn.Softmax(dim=-1)(detection_model(features_batch)).data.cpu().numpy() 63 | 64 | padding = features_batch[0, :, :].repeat(args.using_pred_duration - 1, 1, 1) 65 | prediction_features = torch.cat((padding, features_batch), dim=0) 66 | prediction_output = prediction_model(prediction_features) 67 | prediction_likelihood = torch.nn.Softmax(dim=-1)(prediction_output).data.cpu().numpy() 68 | 69 | for batch_i in range(features_batch.size(1)): 70 | _, pred_labels = torch.max(prediction_output[:total_lengths[batch_i] - 1, batch_i, :], dim=-1) 71 | prediction_likelihood = prediction_likelihood[:total_lengths[batch_i] - 1, batch_i, :] 72 | 73 | skip_size = args.using_pred_duration - args.pred_duration 74 | 75 | # for frame in range(0, total_lengths[batch_i]-1, skip_size): 76 | for frame in range(0, total_lengths[batch_i] - args.using_pred_duration, skip_size): 77 | det = detection_likelihood[:frame + 1, batch_i, :] 78 | # det = detection_likelihood[:frame+1+args.using_pred_duration, batch_i, :] 79 | gt_det = torch.zeros(det.shape) 80 | gt_det.scatter_(1, labels_batch[:frame+1,batch_i].unsqueeze(1), 1) 81 | gt_det = gt_det * 0.95 + (0.05/10) * torch.ones(det.shape) 82 | gt_det = gt_det.numpy() 83 | 84 | pred = prediction_likelihood[frame:frame+args.using_pred_duration, :] 85 | prob_mat = np.concatenate((det, pred), axis=0) 86 | pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(prob_mat, activities[batch_i], 87 | sequence_ids[batch_i], args) 88 | 89 | # Testing 90 | gep_predictions = batch_earley_pred_labels[frame+1:frame+args.using_pred_duration+1] 91 | all_frame_predictions.extend(gep_predictions) 92 | nn_frame_predictions = pred_labels[frame+1:frame+args.using_pred_duration+1] 93 | all_nn_frame_predictions.extend(nn_frame_predictions) 94 | gt_frame_predictions = labels_batch[frame+1:frame + args.using_pred_duration + 1, 95 | batch_i].cpu().numpy().tolist() 96 | all_gt_frame_predictions.extend(gt_frame_predictions) 97 | 98 | video_length = len(gt_frame_predictions) 99 | micro_prec_nn = logutils.compute_accuracy(gt_frame_predictions, nn_frame_predictions) 100 | task_acc_ratio_nn.update(micro_prec_nn, video_length) 101 | 102 | continue 103 | micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_frame_predictions) 104 | nn_mirco_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions) 105 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_frame_predictions, 106 | all_frame_predictions, 107 | metric='macro') 108 | tqdm.write('[Evaluation] Micro Prec: {}\t' 109 | 'NN Precision: {}\t' 110 | 'Macro Precision: {}\t' 111 | 'Macro Recall: {}\t' 112 | 'Macro F-score: {}'.format(micro_prec, nn_mirco_prec, macro_prec, macro_recall, macro_fscore)) 113 | 114 | 115 | def main(args): 116 | exp_info = exp_config.Experiment(args.dataset) 117 | paths = exp_info.paths 118 | args.paths = paths 119 | args.metadata = exp_info.metadata 120 | 121 | np.random.seed(args.seed) 122 | torch.manual_seed(args.seed) 123 | 124 | batch_size = args.batch_size 125 | args.batch_size = 1 126 | feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True) 127 | label_num = exp_info.get_label_num(args) 128 | 129 | hidden_size = 256 130 | hidden_layers = 2 131 | 132 | args.resume = os.path.join(paths.checkpoint_root, 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.model, args.epochs, 133 | args.lr, args.batch_size, args.lr_decay, 134 | 1 if not args.subsample else args.subsample, 135 | args.dropout_rate)) 136 | if args.model == 'lstm': 137 | detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) 138 | else: 139 | detection_model = MLP(feature_size, hidden_size, label_num) 140 | detection_model = torch.nn.DataParallel(detection_model) 141 | logutils.load_checkpoint(args, detection_model) 142 | 143 | args.resume = os.path.join(paths.checkpoint_root, 144 | 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(args.task, args.model, args.epochs, 145 | args.lr, args.batch_size, 146 | args.lr_decay, 147 | 1 if not args.subsample else args.subsample, 148 | args.dropout_rate, 149 | args.using_pred_duration)) 150 | if args.model == 'lstm': 151 | prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) 152 | else: 153 | prediction_model = MLP(feature_size, hidden_size, label_num) 154 | prediction_model = torch.nn.DataParallel(prediction_model) 155 | logutils.load_checkpoint(args, prediction_model) 156 | 157 | validate(test_loader, detection_model, prediction_model, args=args) 158 | 159 | if __name__ == '__main__': 160 | parser = argparse.ArgumentParser() 161 | parser.add_argument('--dataset', default='CAD', type=str, 162 | help='indicating which dataset to use') 163 | parser.add_argument('--model', default='lstm', type=str, 164 | help='Model for classification (default: LSTM)') 165 | parser.add_argument('--seed', default=12345, type=int, 166 | help='Default seed for all random generators') 167 | parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, 168 | help='Option flag for using cuda trining (default: True)') 169 | parser.add_argument('--workers', default=1, type=int, metavar='N', 170 | help='number of data loading workers (default: 1)') 171 | parser.add_argument('--task', default='activity', type=str, 172 | help='Default working task activity/affordance') 173 | parser.add_argument('--epochs', default=100, type=int, metavar='N', 174 | help='number of epochs for training (default: 100)') 175 | parser.add_argument('--batch_size', default=1, type=int, metavar='N', 176 | help='batch size for training (default: 1)') 177 | parser.add_argument('--lr', default=1e-4, type=float, 178 | help='learning rate for the feature extraction process (default: 1e-3)') 179 | parser.add_argument('--lr_decay', default=1, 180 | help='decay rate of learning rate (default: between 0.01 and 1)') 181 | parser.add_argument('--lr_freq', default=25, type=float, 182 | help='learing rate decay frequency while updating') 183 | parser.add_argument('--subsample', default=None, type=int, 184 | help='subsample frequency for Breakfast dataset') 185 | parser.add_argument('--dropout_rate', default=0, type=float, 186 | help='Dropout rate for LSTM training') 187 | parser.add_argument('--pred_duration', default=45, type=int, 188 | help='length of frame prediction') 189 | parser.add_argument('--using_pred_duration', default=55, type=int, 190 | help='Using model that is trained to predict') 191 | args = parser.parse_args() 192 | main(args) 193 | -------------------------------------------------------------------------------- /datasets/CAD/dataparser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Feb 17, 2017 3 | 4 | @author: Siyuan Qi 5 | 6 | Description of the file. 7 | 8 | """ 9 | 10 | import os 11 | import time 12 | import fnmatch 13 | import pickle 14 | import json 15 | 16 | import numpy as np 17 | import cv2 18 | 19 | # Local imports 20 | import datasets.CAD.cad_config as config 21 | from datasets.CAD.metadata import CAD_METADATA 22 | from models import parsegraph 23 | metadata = CAD_METADATA() 24 | 25 | 26 | def save_activity_corpus(paths, activity_corpus): 27 | if not os.path.exists(os.path.join(paths.tmp_root, 'corpus')): 28 | os.makedirs(os.path.join(paths.tmp_root, 'corpus')) 29 | 30 | for event, tpgs in activity_corpus.items(): 31 | corpus_filename = os.path.join(paths.tmp_root, 'corpus', event+'.txt') 32 | with open(corpus_filename, 'w') as f: 33 | for tpg in tpgs: 34 | f.write(str(tpg)+'\n') 35 | 36 | 37 | def save_action_gt(paths, skeletons, skeleton_labels): 38 | action_gt = dict() 39 | for s in skeletons: 40 | assert skeletons[s].shape[0] == len(skeleton_labels[s]) 41 | skeletons[s] = skeletons[s].tolist() 42 | action_gt['skeletons'] = skeletons 43 | action_gt['skeleton_labels'] = skeleton_labels 44 | 45 | with open(os.path.join(paths.tmp_root, 'action.json'), 'w') as f: 46 | json.dump(action_gt, f, indent=4, separators=(',', ': ')) 47 | 48 | 49 | def get_position_indices(): 50 | start = 1 51 | position_indices = list() 52 | for i in range(11): 53 | position_indices.extend(range(start+i*14+10, start+i*14+13)) 54 | start += 11*14 55 | for i in range(4): 56 | position_indices.extend(range(start+i*4, start+i*4+3)) 57 | return position_indices 58 | 59 | 60 | def get_left_handed_indices(): 61 | flipped_joint_indices = [0, 1, 2, 5, 6, 3, 4, 9, 10, 7, 8, 12, 11, 14, 13] 62 | left_handed_indices = list() 63 | for joint in flipped_joint_indices: 64 | left_handed_indices.extend([3*joint, 3*joint+1, 3*joint+2]) 65 | return left_handed_indices 66 | 67 | 68 | def get_skeletons(skeletons, eventdir, sequence_ids, left_handed=False): 69 | position_indices = get_position_indices() 70 | left_handed_indices = get_left_handed_indices() 71 | x_indices = [i*3 for i in range(15)] 72 | for sequence_id in sequence_ids: 73 | raw_skeleton_data = np.genfromtxt(os.path.join(eventdir, sequence_id+'.txt'), delimiter=',', skip_footer=1, usecols=range(171)) 74 | assert raw_skeleton_data[-1, 0] == raw_skeleton_data.shape[0] 75 | joint_positions = raw_skeleton_data[:, position_indices]/1000.0 76 | if left_handed: 77 | joint_positions[:, x_indices] = -joint_positions[:, x_indices] 78 | joint_positions = joint_positions[:, left_handed_indices] 79 | skeletons[sequence_id] = joint_positions 80 | 81 | return skeletons 82 | 83 | 84 | def get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed=False): 85 | # Intrinsic camera parameters 86 | fx = 525.0 # focal length x 87 | fy = 525.0 # focal length y 88 | cx = 319.5 # optical center x 89 | cy = 239.5 # optical center y 90 | z_scale = 12.5 91 | 92 | for sequence_id in sequence_ids: 93 | print('get_obj_positions', sequence_id) 94 | obj_positions[sequence_id] = list() 95 | for filename in sorted(os.listdir(eventdir)): 96 | if fnmatch.fnmatch(filename, '{}_obj*.txt'.format(sequence_id)): 97 | print(filename) 98 | position_sequence = list() 99 | 100 | with open(os.path.join(eventdir, filename)) as f: 101 | last_image_bbx = None 102 | for line in f: 103 | line = line.split(',') 104 | frame = line[0] 105 | # if not os.path.exists(os.path.join(eventdir.replace('annotations', 'rgbd_images'), sequence_id, 'Depth_{}.png'.format(frame))): 106 | # exit(1) 107 | depth = cv2.imread(os.path.join(eventdir.replace('annotations', 'rgbd_images'), sequence_id, 'Depth_{}.png'.format(frame)), -1) 108 | depth = depth.astype(float) / z_scale 109 | 110 | image_bbx = [int(c) for c in line[2:6]] 111 | if not (0 < image_bbx[0] < 640 and 0 < image_bbx[2] < 640 and 0 < image_bbx[1] < 480 and 0 < image_bbx[3] < 480): 112 | if last_image_bbx: 113 | image_bbx = last_image_bbx 114 | else: 115 | continue 116 | else: 117 | last_image_bbx = image_bbx 118 | # image_bbx = [0, 0, depth.shape[1], depth.shape[0]] 119 | 120 | step = 10 121 | # # If the object is invisible, this will be an empty array 122 | positions = np.empty((len(range(image_bbx[0], image_bbx[2], step))*len(range(image_bbx[1], image_bbx[3], step)), 3)) 123 | pt_count = 0 124 | for u in range(image_bbx[0], image_bbx[2], step): 125 | for v in range(image_bbx[1], image_bbx[3], step): 126 | z = depth[v, u] 127 | if z == 0: 128 | continue 129 | if not left_handed: 130 | x = (u - cx) * z / fx 131 | else: 132 | x = -(u - cx) * z / fx 133 | y = -(v - cy) * z / fy # Note: need to flip y to align depth with skeleton 134 | 135 | positions[pt_count, :] = np.array((x, y, z)) 136 | pt_count += 1 137 | positions = positions[:pt_count, :] 138 | position_sequence.append(positions) 139 | # if np.isnan(np.mean(positions, axis=0)[0]): 140 | # print image_bbx, pt_count, positions.shape, positions 141 | # exit(1) 142 | # position_sequence.append(np.mean(positions, 0)) 143 | # position_sequence.append(np.ones((10, 3))) 144 | 145 | obj_positions[sequence_id].append(position_sequence) 146 | 147 | return obj_positions 148 | 149 | 150 | def parse_data(paths): 151 | if os.path.exists(os.path.join(paths.tmp_root, 'activity_corpus.p')): 152 | activity_corpus = pickle.load(open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb')) 153 | else: 154 | activity_corpus = dict() 155 | label_list = dict() 156 | skeletons = dict() 157 | skeleton_labels = dict() 158 | obj_positions = dict() 159 | for datadir in os.listdir(os.path.join(paths.data_root)): 160 | datadir = os.path.join(paths.data_root, datadir) 161 | if os.path.isdir(datadir) and datadir.endswith('annotations'): 162 | subject = os.path.split(datadir)[1].strip('_annotations') 163 | print(subject) 164 | left_handed = subject == 'Subject3' 165 | for event in os.listdir(datadir): 166 | # if event != 'stacking_objects': 167 | # continue 168 | if event not in activity_corpus: 169 | activity_corpus[event] = list() 170 | eventdir = os.path.join(datadir, event) 171 | 172 | sequence_objects = dict() 173 | sequence_ids = list() 174 | with open(os.path.join(eventdir, 'activityLabel.txt')) as f: 175 | for line in f: 176 | activity_labels = line.strip(',\n').split(',') 177 | sequence_ids.append(activity_labels[0]) 178 | activity_corpus[event].append(parsegraph.TParseGraph(event, activity_labels[0], subject)) 179 | sequence_objects[activity_labels[0]] = [o.split(':')[-1] for o in activity_labels[3:]] 180 | 181 | get_skeletons(skeletons, eventdir, sequence_ids, left_handed) 182 | get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed) 183 | get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed) 184 | 185 | # Parse data into spatial-temporal parse graphs 186 | with open(os.path.join(eventdir, 'labeling.txt')) as f: 187 | rel_idx = 0 188 | for line in f: 189 | sequence_labeling = line.strip().split(',') 190 | sequence_id = sequence_labeling[0] 191 | tpg = next(tpg for tpg in activity_corpus[event] if tpg.id == sequence_id) 192 | start_frame = int(sequence_labeling[1]) 193 | end_frame = int(sequence_labeling[2]) 194 | subactivity = sequence_labeling[3] 195 | affordance_labels = sequence_labeling[4:] 196 | 197 | # Create ground truth for action recognition 198 | if sequence_id not in skeleton_labels: 199 | skeleton_labels[sequence_id] = ['null' for _ in range(skeletons[sequence_id].shape[0])] 200 | 201 | for frame in range(start_frame-1, end_frame): 202 | if frame >= skeletons[sequence_id].shape[0]: 203 | break 204 | skeleton_labels[sequence_id][frame] = subactivity 205 | 206 | # Create ground truth ST-pgs 207 | label_list[sequence_id + '$' + str(rel_idx)] = dict() 208 | label_list[sequence_id + '$' + str(rel_idx)]['activity'] = metadata.action_index[subactivity] 209 | label_list[sequence_id + '$' + str(rel_idx)]['affordance'] = affordance_labels 210 | rel_idx += 1 211 | spg = parsegraph.SParseGraph(start_frame - 1, end_frame - 1, subactivity, subactivity, sequence_objects.get(sequence_id), affordance_labels) 212 | spg.set_skeletons(skeletons[sequence_id][start_frame - 1:end_frame, :]) 213 | spg.set_obj_positions(obj_positions[sequence_id]) 214 | tpg.append_terminal(spg) 215 | 216 | pickle.dump(label_list, open(os.path.join(paths.tmp_root, 'label_list.p'), 'wb')) 217 | pickle.dump(activity_corpus, open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'wb')) 218 | pickle.dump(skeletons, open(os.path.join(paths.tmp_root, 'skeletons.p'), 'wb')) 219 | pickle.dump(obj_positions, open(os.path.join(paths.tmp_root, 'obj_positions.p'), 'wb')) 220 | 221 | 222 | def main(): 223 | paths = config.Paths() 224 | start_time = time.time() 225 | 226 | parse_data(paths) 227 | 228 | print('Time elapsed: {}'.format(time.time() - start_time)) 229 | 230 | 231 | if __name__ == '__main__': 232 | main() 233 | -------------------------------------------------------------------------------- /datasets/helmert.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 10/30/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | import numpy as np 10 | 11 | def helmert_affine_3d(datum1, datum2): 12 | ''' 13 | :param datum1: n*3 matrix 14 | :param datum2: n*3 matrix 15 | :return: 16 | ''' 17 | s1 = datum1.shape 18 | s2 = datum2.shape 19 | N = s1.shape[0] 20 | 21 | G = np.zeros((3 * N, 12), dtype=np.float) 22 | E1 = np.ones((N, 1), dtype=np.float) 23 | Z1 = np.zeros((N, 1), dtype=np.float) 24 | z3 = np.zeros(N, dtype=np.float) 25 | G 26 | 27 | def helmert_3d(datum1, datum2, type='7p', without_scale=0, approx = np.zeros(3, dtype=np.float)): 28 | ''' 29 | :param datum1: n*3 matrix 30 | :param datum2: n*3 matrix 31 | :param type: '7p' or '10p' 32 | :param without_scale: 0 33 | :param approx: (3,) vector 34 | :return: 35 | ''' 36 | sof = 1 37 | # Check parameter validity 38 | assert(len(datum1.shape) == 2 and len(datum2.shape) == 2, 'datum1 and datum2 should be matrices') 39 | assert(len(approx.shape) == 2 and approx.shape[1] == 3, 'the approx vector should be 1 * 3') 40 | assert(isinstance(type, str), 'type parameter should be a string') 41 | if type is '7p': 42 | rc = np.zeros(3, dtype=np.float) 43 | else: 44 | # Case '10p' 45 | rc = np.mean(datum1, axis=0) 46 | 47 | # Check data validity 48 | N1 = datum1.shape[0] 49 | N2 = datum2.shape[0] 50 | N = N1 51 | assert(N1 == N2, 'datum1 and datum2 should have the same dimension') 52 | assert(datum1.shape[1] == 3 and datum2.shape[1] == 3, 'both datum matrix should be of N*3') 53 | 54 | # naeh should be (7,) vector, set the naeh vector 55 | naeh = np.concatenate((np.zeros(3, dtype=np.float), approx, np.array([1], dtype=np.float))) 56 | if np.array_equal(approx, np.zeros(3,dtype=np.float)) and N > 3: 57 | # TODO: add helmert_affine_3d transformation and debug 58 | pass 59 | if without_scale != 0: 60 | naeh[6] = without_scale 61 | 62 | wert_A = np.array([1e-8, 1e-8]) 63 | zaehl = 0 64 | x0 = naeh[0] 65 | y0 = naeh[1] 66 | z0 = naeh[2] 67 | ex = naeh[3] 68 | ey = naeh[4] 69 | ez = naeh[5] 70 | m = naeh[6] 71 | tp = np.array([x0, y0, z0, ex, ey, ez, m]) 72 | qbb = np.eye(3 * N) 73 | while True: 74 | A = np.zeros((3 * N, 7), dtype=np.float) 75 | w = np.zeros((3 * N, 1), dtype = np.float) 76 | for i in range(N): 77 | A[i * 3][0] = -1 78 | A[i * 3 + 1][1] = -1 79 | A[i * 3 + 2][2] = -1 80 | A[i * 3][3]= -m * ((np.cos(ex) * np.sin(ey) * np.cos(ez) - np.sin(ex) * np.sin(ez)) *(datum1[i][1] - rc[1]) 81 | + (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex) * np.sin(ey)) *(datum1[i][2] - rc[2])) 82 | A[i * 3][4] = -m * ((-np.sin(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) + 83 | (np.sin(ex) * np.cos(ey) * np.cos(ez)) * (datum1[i][1] - rc[1]) + 84 | (-np.cos(ex) * np.cos(ey) * np.cos(ez)) * (datum1[i][2] - rc[3])) 85 | A[i * 3][5] = -m * ((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) + 86 | (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1]-rc[2]) + 87 | (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex)* np.cos(ex)) * (datum1[i][2]-rc[3])) 88 | A[i * 3][6] = -((np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) + 89 | (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex) * np.sin(ez)) * (datum1[i][1] - rc[1]) + 90 | (-np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2])) 91 | A[i * 3 + 1][3] = -m * ((-np.cos(ex) * np.sin(ey) * np.sin(ez) - np.sin(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) + 92 | (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][2] - rc[2])) 93 | A[i * 3 + 1][4] = -m * ((np.sin(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) + 94 | (-np.sin(ex) * np.cos(ey) * np.sin(ez)) * (datum1[i][1] - rc[1]) + 95 | (np.cos(ex) * np.cos(ey) * np.sin(ez)) * (datum1[i][2] - rc[2])) 96 | A[i * 3 + 1][5] = -m * ((-np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) + 97 | (-np.sin(ex) * np.sin(ey) * np.cos(ez) - np.cos(ex) * np.sin(ez)) * (datum1[i][1] - rc[1]) + 98 | (np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2])) 99 | A[i * 3 + 1][6] = -((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[1]) + 100 | (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) + 101 | (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex) * np.cos(ez)) * (datum1[i][2] - rc[2])) 102 | A[i * 3 + 2][3] = -m * ((-np.cos(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) + 103 | (-np.sin(ex) * np.cos(ey)) * (datum1[i][2] - rc[2])) 104 | A[i * 3 + 2][4] = -m * ((np.cos(ey)) * (datum1[i][0] - rc[0]) + 105 | (np.sin(ex) * np.sin(ey)) * (datum1[i][1] - rc[1]) + 106 | (-np.cos(ex) * np.sin(ey)) * (datum1[i][2] - rc[2])) 107 | A[i * 3 + 2][5] = 0 108 | A[i * 3 + 2][6] = -((np.sin(ey)) * (datum1[i][0] - rc[0]) + 109 | (-np.sin(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) + 110 | (np.cos(ex) * np.cos(ey)) * (datum1[i][2] - rc[2])) 111 | w[i * 3][0] = -rc[0] + datum2[i][0]- x0 - m * ((np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) + 112 | (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex)* np.sin(ez)) * (datum1[i][1] - rc[1]) + 113 | (-np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2])) 114 | w[i * 3 + 1][0] = -rc[1] + datum2[i][1] - y0 - m * ((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) + 115 | (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) + 116 | (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex) * np.cos(ez)) * (datum1[i][2] - rc[2])) 117 | w[i * 3 + 2][0] = -rc[2] + datum2[i][2] - z0 - m * ((np.sin(ey))*(datum1[i][0] - rc[0]) + 118 | (-np.sin(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) + 119 | (np.cos(ex) * np.cos(ey)) * (datum1[i][2] - rc[2])) 120 | if without_scale != 0: 121 | A = A[:, : -1] 122 | 123 | w = -1. * w 124 | r = A.shape[0] - A.shape[1] 125 | pbb = np.linalg.inv(qbb) 126 | quadra_A = np.matmul(np.matmul(A.T, pbb), A) 127 | inv_quadra_A = np.linalg.inv(quadra_A) 128 | delta_x = np.matmul(inv_quadra_A, np.matmul(np.matmul(A.T, pbb), w)) 129 | v = np.matmul(A, delta_x) - w 130 | quadra_v = np.matmul(np.matmul(v.T, pbb), v) 131 | sig0p = np.sqrt(quadra_v / r) 132 | qxxda = inv_quadra_A 133 | kxxda = sig0p ** 2 * qxxda 134 | ac = np.sqrt(np.diag(kxxda)) 135 | 136 | delta_x = delta_x.reshape((-1, )) # reshape to row vector 137 | testv = np.sqrt((delta_x[0] ** 2 + delta_x[1] ** 2 + delta_x[2] ** 2) / 3.) 138 | testd = np.sqrt((delta_x[3] ** 2 + delta_x[4] ** 2 + delta_x[5] ** 2) / 3.) 139 | zaehl = zaehl + 1 140 | x0 = x0 + delta_x[0] 141 | y0 = y0 + delta_x[1] 142 | z0 = z0 + delta_x[2] 143 | ex = ex + delta_x[3] 144 | ey = ey + delta_x[4] 145 | ez = ez + delta_x[5] 146 | if without_scale == 0 and (m + delta_x[6]) > 1e-15: # This condition is to prevent numerical problems with m-->0 147 | m = m + delta_x[6] 148 | tp = np.array([x0, y0, z0, ex, ey, ez, m]) 149 | if abs(testv) < wert_A[0] and abs(testd) < wert_A[1]: 150 | break 151 | elif zaehl > 1000: 152 | sof = 0 153 | print('Iteration Limit Warning: Calculation not converging after 1000 iterations. I am aborting. Results may be inaccurate.') 154 | break 155 | 156 | if len(np.argwhere(np.abs(tp[3:6]) > 2 * np.pi)) > 0: 157 | print('Approximate Accuracy Warning: Rotation angles seem to be big. A better approximation is regarded. Results will be inaccurate.') 158 | 159 | idz = np.zeros_like(datum1) 160 | for i in range(N): 161 | idz[i][1] = rc[1] + tp[1] + tp[6] * ((-np.cos(tp[4]) * np.sin(tp[5])) * (datum1[i][0] - rc[0]) + 162 | (-np.sin(tp[3]) * np.sin(tp[4]) * np.sin(tp[5]) + np.cos(tp[3]) * np.cos(tp[5])) * (datum1[i][1] - rc[1]) + 163 | (np.cos(tp[3]) * np.sin(tp[4]) * np.sin(tp[5]) + np.sin(tp[3]) * np.cos(tp[5]))*(datum1[i][2] - rc[2])) 164 | idz[i][0] = rc[0] + tp[0] + tp[6] * ((np.cos(tp[4]) * np.cos(tp[5])) * (datum1[i][0] - rc[0]) + 165 | (np.sin(tp[3]) * np.sin(tp[4]) * np.cos(tp[5]) + np.cos(tp[3]) * np.sin(tp[5])) *(datum1[i][1] - rc[1]) + 166 | (-np.cos(tp[3]) * np.sin(tp[4]) * np.cos(tp[5]) + np.sin(tp[3]) * np.sin(tp[5])) * (datum1[i][2] - rc[2])) 167 | idz[i][2] = rc[2] + tp[2] + tp[6] * ((np.sin(tp[4])) * (datum1[i][0] - rc[0]) + 168 | (-np.sin(tp[3]) * np.cos(tp[4])) * (datum1[i][1] - rc[1]) + 169 | (np.cos(tp[3]) * np.cos(tp[4])) * (datum1[i][2] - rc[2])) 170 | tr = datum2 - idz 171 | return tp, rc, ac, tr, sof 172 | 173 | def helmert_2d(): 174 | pass 175 | 176 | def test(): 177 | cases = [ 178 | ( 179 | np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000], 180 | [0.140380900000000, 0.314954300000000, 1.89607300000000], 181 | [-0.153808100000000, -0.135794000000000, 1.85765100000000], 182 | [0.0416980000000000, -0.239627400000000, 1.69971600000000]]), 183 | np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000], 184 | [0.140380900000000, 0.314954300000000, 1.89607300000000], 185 | [-0.153808100000000, -0.135794000000000, 1.85765100000000], 186 | [0.0416980000000000, -0.239627400000000, 1.69971600000000]]) 187 | ), 188 | ( 189 | np.array([[0.0343117800000000, 0.219011300000000, 1.65202900000000], 190 | [0.144150100000000, 0.265266300000000, 1.86847700000000], 191 | [-0.174984600000000, -0.176452300000000, 1.83610600000000], 192 | [0.0246211000000000, -0.278663500000000, 1.68597900000000]]), 193 | np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000], 194 | [0.140380900000000, 0.314954300000000, 1.89607300000000], 195 | [-0.153808100000000, -0.135794000000000, 1.85765100000000], 196 | [0.0416980000000000, -0.239627400000000, 1.69971600000000]]) 197 | ) 198 | ] 199 | for case in cases: 200 | output = helmert_3d(case[0], case[1]) 201 | if __name__ == '__main__': 202 | test() -------------------------------------------------------------------------------- /experiments/GEP/gep.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/11/18 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | # System imports 11 | import sys 12 | sys.path.append('/mnt/hdd/home/baoxiong/Projects/TPAMI2019/src') 13 | 14 | import os 15 | import argparse 16 | import json 17 | from tqdm import tqdm 18 | 19 | # Libraries 20 | import numpy as np 21 | import torch 22 | 23 | # Local imports 24 | import models.BiLSTM as lstm_model 25 | import models.MLP as mlp_model 26 | import models.parser.GEP_adj as GEP 27 | import models.parser.grammarutils as grammarutils 28 | import utils.logutils as logutils 29 | import utils.evalutils as evalutils 30 | import utils.vizutils as vizutils 31 | import experiments.exp_config as exp_config 32 | 33 | def inference(model_outputs, activities, sequence_ids, args): 34 | model_output_probs = torch.nn.Softmax(dim=-1)(model_outputs) 35 | model_output_probs = model_output_probs.data.cpu().numpy() 36 | batch_earley_pred_labels = list() 37 | batch_tokens = list() 38 | batch_seg_pos = list() 39 | for batch_i in range(model_outputs.size()[1]): 40 | grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i]+'.pcfg') 41 | grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.action_index) 42 | gen_earley_parser = GEP.GeneralizedEarley(grammar) 43 | best_string, prob = gen_earley_parser.parse(model_output_probs[:, batch_i, :]) 44 | # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob))) 45 | 46 | # Back trace to get labels of the entire sequence 47 | earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels() 48 | batch_earley_pred_labels.append(earley_pred_labels) 49 | batch_tokens.append(tokens) 50 | batch_seg_pos.append(seg_pos) 51 | 52 | _, nn_pred_labels = torch.max(model_outputs, dim=2) 53 | 54 | return nn_pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos 55 | 56 | def validate(data_loader, model, args): 57 | all_gt_detections = list() 58 | all_detections = list() 59 | 60 | task_acc_ratio = logutils.AverageMeter() 61 | task_macro_prec = logutils.AverageMeter() 62 | task_macro_rec = logutils.AverageMeter() 63 | task_macro_f1 = logutils.AverageMeter() 64 | task_acc_ratio_nn = logutils.AverageMeter() 65 | 66 | # switch to evaluate mode 67 | model.eval() 68 | 69 | for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')): 70 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit 71 | print(os.path.join(args.save_path, '{}_out_s{}_b{}_c{}.npy'.format(sequence_ids[0], 72 | args.subsample, args.using_batch_size, args.trained_epochs))) 73 | # exit() 74 | model_outputs = torch.tensor(np.load(os.path.join(args.save_path, '{}_out_s{}_b{}_c{}.npy'.format(sequence_ids[0], 75 | args.subsample, args.using_batch_size, args.trained_epochs)))).unsqueeze(1) 76 | 77 | # Inference 78 | tqdm.write('[{}] Inference'.format(sequence_ids[0])) 79 | 80 | seg_path = os.path.join(args.paths.inter_root, 'segmentation') 81 | if not os.path.exists(seg_path): 82 | os.makedirs(seg_path) 83 | 84 | # # If no prior model outputs are provided 85 | # if not os.path.isfile(os.path.join(seg_path, '{}.npy'.format(sequence_ids[0]))): 86 | # _, nn_pred_labels = torch.max(model_outputs, dim=-1) 87 | # nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist() 88 | # pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args) 89 | # 90 | # # Evaluation 91 | # # Frame-wise detection 92 | # detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()] 93 | # if args.subsample != 1: 94 | # all_total_labels, all_total_lengths = additional 95 | # gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist() 96 | # video_length = len(gt_detections) 97 | # 98 | # detections = evalutils.upsample(detections, freq=args.subsample, length=video_length) 99 | # nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length) 100 | # else: 101 | # gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist() 102 | # detections = detections[:total_lengths[0]] 103 | # np.save(os.path.join(args.paths.inter_root, 'segmentation', '{}.npy'.format(sequence_ids[0])), 104 | # [gt_detections, nn_detections, detections]) 105 | # else: 106 | # results = np.load(os.path.join(seg_path, '{}.npy'.format(sequence_ids[0]))) 107 | # gt_detections, nn_detections, detections = results[0], results[1], results[2] 108 | 109 | _, nn_pred_labels = torch.max(model_outputs, dim=-1) 110 | nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist() 111 | pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args) 112 | 113 | # Evaluation 114 | # Frame-wise detection 115 | detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()] 116 | if args.subsample != 1: 117 | all_total_labels, all_total_lengths = additional 118 | gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist() 119 | video_length = len(gt_detections) 120 | 121 | detections = evalutils.upsample(detections, freq=args.subsample, length=video_length) 122 | nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length) 123 | else: 124 | gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist() 125 | detections = detections[:total_lengths[0]] 126 | video_length = len(gt_detections) 127 | 128 | # # Visualization code for figures 129 | # vizutils.plot_segmentation([gt_detections, nn_detections, detections], video_length, 130 | # filename=os.path.join(args.paths.visualize_root, '{}.jpg'.format(sequence_ids[0])), border=False) 131 | 132 | micro_prec = logutils.compute_accuracy(gt_detections, detections) 133 | micro_prec_nn = logutils.compute_accuracy(gt_detections, nn_detections) 134 | macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_detections, detections, metric='macro') 135 | task_acc_ratio.update(micro_prec, video_length) 136 | task_acc_ratio_nn.update(micro_prec_nn, video_length) 137 | task_macro_prec.update(macro_prec, video_length) 138 | task_macro_rec.update(macro_rec, video_length) 139 | task_macro_f1.update(macro_f1, video_length) 140 | 141 | all_gt_detections.extend(gt_detections) 142 | all_detections.extend(detections) 143 | 144 | micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections) 145 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, 146 | metric='macro') 147 | tqdm.write('[Evaluation] Micro Prec: {}\t' 148 | 'Macro Precision: {}\t' 149 | 'Macro Recall: {}\t' 150 | 'Macro F-score: {}'.format(micro_prec, macro_prec, macro_recall, macro_fscore)) 151 | 152 | micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections) 153 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, metric='macro') 154 | tqdm.write('Detection:\n' 155 | 'Micro Prec: {}\t' 156 | 'NN Prec:{}\t' 157 | 'Macro Precision: {}\t' 158 | 'Macro Recall: {}\t' 159 | 'Macro F-score: {}\n\n'.format(micro_prec, task_acc_ratio_nn.avg, macro_prec, macro_recall, macro_fscore)) 160 | 161 | def main(args): 162 | exp_info = exp_config.Experiment(args.dataset) 163 | paths = exp_info.paths 164 | args.paths = paths 165 | args.metadata = exp_info.metadata 166 | 167 | np.random.seed(args.seed) 168 | torch.manual_seed(args.seed) 169 | 170 | args.batch_size = 1 171 | feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True) 172 | label_num = exp_info.get_label_num(args) 173 | 174 | hidden_size = 256 175 | hidden_layers = 2 176 | if args.model == 'lstm': 177 | parsing_model = lstm_model.BiLSTM(feature_size, hidden_size, hidden_layers, label_num) 178 | else: 179 | parsing_model = mlp_model.MLP(feature_size, hidden_size, label_num) 180 | parsing_model = torch.nn.DataParallel(parsing_model) 181 | prev = args.subsample 182 | args.subsample = 1 183 | args.save_path = os.path.join(paths.inter_root, 'likelihood', args.task, args.model) 184 | args.resume = os.path.join(paths.checkpoint_root, 185 | 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.model, args.epochs, 186 | args.lr, args.using_batch_size, args.lr_decay, 187 | 1 if not args.subsample else args.subsample, 188 | args.dropout_rate)) 189 | args.subsample = prev 190 | logutils.load_checkpoint(args, parsing_model) 191 | validate(test_loader, parsing_model, args=args) 192 | 193 | if __name__ == '__main__': 194 | parser = argparse.ArgumentParser() 195 | parser.add_argument('--dataset', default='CAD', type=str, 196 | help='indicating which dataset to use') 197 | parser.add_argument('--model', default='lstm', type=str, 198 | help='Model for classification (default: LSTM)') 199 | parser.add_argument('--seed', default=12345, type=int, 200 | help='Default seed for all random generators') 201 | parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, 202 | help='Option flag for using cuda trining (default: True)') 203 | parser.add_argument('--workers', default=1, type=int, metavar='N', 204 | help='number of data loading workers (default: 1)') 205 | parser.add_argument('--task', default='activity', type=str, 206 | help='Default working task activity/affordance') 207 | parser.add_argument('--epochs', default=100, type=int, metavar='N', 208 | help='number of epochs for training (default: 100)') 209 | parser.add_argument('--batch_size', default=1, type=int, metavar='N', 210 | help='batch size for training (default: 1)') 211 | parser.add_argument('--using_batch_size', default=1, type=int, metavar='N', 212 | help='using model trained on args.using_batch_size') 213 | parser.add_argument('--lr', default=1e-4, type=float, 214 | help='learning rate for the feature extraction process (default: 1e-3)') 215 | parser.add_argument('--lr_decay', default=1, 216 | help='decay rate of learning rate (default: between 0.01 and 1)') 217 | parser.add_argument('--lr_freq', default=25, type=float, 218 | help='learing rate decay frequency while updating') 219 | parser.add_argument('--subsample', default=1, type=int, 220 | help='subsample frequency for Breakfast dataset') 221 | parser.add_argument('--dropout_rate', default=0, type=float, 222 | help='Dropout rate for LSTM training') 223 | parser.add_argument('--trained_epochs', default=100, type=int, 224 | help='The number of iterations for trained model') 225 | args = parser.parse_args() 226 | main(args) 227 | -------------------------------------------------------------------------------- /experiments/GEP/gep_pred_topdown.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 5/21/19 3 | 4 | @author: Baoxiong Jia 5 | 6 | Description: 7 | 8 | """ 9 | 10 | 11 | # System imports 12 | import sys 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src') 14 | 15 | import os 16 | import argparse 17 | import json 18 | import time 19 | import copy 20 | 21 | 22 | # Libraries 23 | from tqdm import tqdm 24 | import numpy as np 25 | import torch 26 | 27 | # Local imports 28 | from models.BiLSTM import BiLSTM 29 | from models.LSTM_pred import LSTM_Pred 30 | import models.parser.GEP_online as GEP 31 | import models.parser.grammarutils as grammarutils 32 | import utils.logutils as logutils 33 | import experiments.exp_config as exp_config 34 | 35 | def predict(parser, detection_output, duration_prior, record, frame, args, epsilon=1e-5): 36 | detection_output_prob = torch.nn.Softmax(dim=-1)(detection_output).data.cpu().numpy() 37 | parser.update_prob(detection_output_prob) 38 | best_l, _ = parser.parse() 39 | current_token = args.metadata.action_index[best_l.split()[-1]] 40 | if 'last' not in record.keys() or current_token != record['last']: 41 | record['last'] = current_token 42 | record['start'] = frame 43 | 44 | pred_duration = args.using_pred_duration 45 | pred_labels = list() 46 | predict_parser = copy.deepcopy(parser) 47 | mu, sigma = duration_prior[args.metadata.actions[current_token]] 48 | current_duration = max(0, int(mu) - (frame - record['start'] + 1)) 49 | pred_labels.extend([current_token for _ in range(current_duration)]) 50 | pred_duration -= current_duration 51 | while pred_duration > 0: 52 | prob = np.ones(len(args.metadata.actions)) * epsilon 53 | prob[current_token] = 1.0 54 | prob = prob / sum(prob) 55 | for _ in range(current_duration): 56 | predict_parser.update_prob(prob) 57 | predict_parser.parse() 58 | predict_mat = predict_parser.future_predict() 59 | current_token = np.argmax(predict_mat, axis=-1) 60 | mu, sigma = duration_prior[args.metadata.actions[current_token]] 61 | current_duration = int(mu) 62 | pred_duration -= current_duration 63 | pred_labels.extend([current_token for _ in range(current_duration)]) 64 | pred_labels = pred_labels[: args.using_pred_duration] 65 | return pred_labels 66 | 67 | def validate(data_loader, detection_model, prediction_model, args): 68 | all_gt_frame_predictions = list() 69 | all_frame_predictions = list() 70 | all_nn_frame_predictions = list() 71 | 72 | task_acc_ratio = logutils.AverageMeter() 73 | task_macro_prec = logutils.AverageMeter() 74 | task_macro_rec = logutils.AverageMeter() 75 | task_macro_f1 = logutils.AverageMeter() 76 | task_acc_ratio_nn = logutils.AverageMeter() 77 | 78 | # switch to evaluate mode 79 | detection_model.eval() 80 | prediction_model.eval() 81 | 82 | for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')): 83 | features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit 84 | 85 | padding = features_batch[0, :, :].repeat(args.using_pred_duration - 1, 1, 1) 86 | prediction_features = torch.cat((padding, features_batch), dim=0) 87 | prediction_output = prediction_model(prediction_features) 88 | detection_output = detection_model(features_batch) 89 | 90 | _, detection_labels = torch.max(detection_output, dim=-1) 91 | detection_labels = detection_labels.cpu().numpy() 92 | 93 | for batch_i in range(detection_output.size(1)): 94 | 95 | gt_all_pred_labels = labels_batch[1: total_lengths[batch_i], batch_i].cpu().numpy().tolist() 96 | _, nn_all_pred_labels = torch.max(prediction_output[:total_lengths[batch_i] - 1, batch_i, :], dim=-1) 97 | nn_all_pred_labels = nn_all_pred_labels.cpu().numpy().tolist() 98 | 99 | # Initialization of Earley Parser 100 | class_num = detection_output.shape[2] 101 | grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i] + '.pcfg') 102 | grammar = grammarutils.read_grammar(grammar_file, index=True) 103 | gen_earley_parser = GEP.GeneralizedEarley(grammar, class_num, mapping=args.metadata.action_index) 104 | with open(os.path.join(args.paths.prior_root, 'duration_prior.json')) as f: 105 | duration_prior = json.load(f) 106 | 107 | record = dict() 108 | 109 | start_time = time.time() 110 | for frame in range(total_lengths[batch_i] - args.using_pred_duration): 111 | nn_pred_labels = nn_all_pred_labels[frame : frame + args.using_pred_duration] 112 | gt_pred_labels = gt_all_pred_labels[frame : frame + args.using_pred_duration] 113 | update_length = len(nn_pred_labels) 114 | 115 | pred_labels = predict(gen_earley_parser, detection_output[frame, batch_i, :], 116 | duration_prior, record, frame, args) 117 | # gt = torch.ones(detection_output.size(2)) * 1e-5 118 | # gt[labels_batch[frame, batch_i]] = 1 119 | # gt = torch.log(gt / torch.sum(gt)) 120 | # pred_labels = predict(gen_earley_parser, gt, 121 | # duration_prior, record, frame, args) 122 | # print(frame) 123 | # print('detection_labels', detection_labels[max(0, frame - 44) : frame + 1, batch_i].tolist()) 124 | # print('gt_detect labels', labels_batch[max(0, frame - 44) :frame+1, batch_i].cpu().numpy().tolist()) 125 | # print('gt_predic_labels', gt_pred_labels) 126 | # print('nn_predic_labels', nn_pred_labels) 127 | # print('xx_predic_labels', pred_labels) 128 | 129 | micro_prec = logutils.compute_accuracy(gt_pred_labels, pred_labels) 130 | nn_micro_prec = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels) 131 | macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels, 132 | metric='macro') 133 | task_acc_ratio.update(micro_prec, update_length) 134 | task_acc_ratio_nn.update(nn_micro_prec, update_length) 135 | task_macro_prec.update(macro_prec, update_length) 136 | task_macro_rec.update(macro_rec, update_length) 137 | task_macro_f1.update(macro_f1, update_length) 138 | 139 | all_gt_frame_predictions.extend(gt_pred_labels) 140 | all_frame_predictions.extend(pred_labels) 141 | all_nn_frame_predictions.extend(nn_pred_labels) 142 | 143 | 144 | print(time.time() - start_time) 145 | 146 | tqdm.write('Task {} {} Batch [{}/{}]\t' 147 | 'Acc {top1.val:.4f} ({top1.avg:.4f})\t' 148 | 'NN Acc {nn.val:.4f} ({nn.avg:.4f})\t' 149 | 'Prec {prec.val:.4f} ({prec.avg:.4f})\t' 150 | 'Recall {recall.val:.4f} ({recall.avg:.4f})\t' 151 | 'F1 {f1.val:.4f} ({f1.avg:.4f})'.format( 152 | args.task, 'test', batch_idx, len(data_loader), top1=task_acc_ratio, nn=task_acc_ratio_nn, 153 | prec=task_macro_prec, recall=task_macro_rec, f1=task_macro_f1)) 154 | 155 | micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_frame_predictions) 156 | nn_micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions) 157 | macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions, metric='weighted') 158 | tqdm.write('[Evaluation] Micro Prec: {}\t' 159 | 'NN Micro Prec: {}\t' 160 | 'Macro Precision: {}\t' 161 | 'Macro Recall: {}\t' 162 | 'Macro F-score: {}'.format(micro_prec, nn_micro_prec, macro_prec, macro_recall, macro_fscore)) 163 | def main(args): 164 | exp_info = exp_config.Experiment(args.dataset) 165 | paths = exp_info.paths 166 | args.paths = paths 167 | args.metadata = exp_info.metadata 168 | 169 | np.random.seed(args.seed) 170 | torch.manual_seed(args.seed) 171 | 172 | batch_size = args.batch_size 173 | args.batch_size = 1 174 | feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True) 175 | label_num = exp_info.get_label_num(args) 176 | 177 | hidden_size = 256 178 | hidden_layers = 2 179 | 180 | args.resume = os.path.join(paths.checkpoint_root, 'detection_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs, 181 | args.lr, args.batch_size, args.lr_decay, 182 | 1 if not args.subsample else args.subsample, 183 | args.dropout_rate)) 184 | detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) 185 | detection_model = torch.nn.DataParallel(detection_model) 186 | logutils.load_checkpoint(args, detection_model) 187 | 188 | args.resume = os.path.join(paths.checkpoint_root, 189 | 'frame_prediction_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(args.task, args.epochs, 190 | args.lr, args.batch_size, 191 | args.lr_decay, 192 | 1 if not args.subsample else args.subsample, 193 | args.dropout_rate, 194 | args.using_pred_duration)) 195 | prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) 196 | prediction_model = torch.nn.DataParallel(prediction_model) 197 | logutils.load_checkpoint(args, prediction_model) 198 | 199 | validate(test_loader, detection_model, prediction_model, args=args) 200 | 201 | if __name__ == '__main__': 202 | parser = argparse.ArgumentParser() 203 | parser.add_argument('--dataset', default='VCLA_GAZE', type=str, 204 | help='indicating which dataset to use') 205 | parser.add_argument('--seed', default=12345, type=int, 206 | help='Default seed for all random generators') 207 | parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, 208 | help='Option flag for using cuda trining (default: True)') 209 | parser.add_argument('--workers', default=1, type=int, metavar='N', 210 | help='number of data loading workers (default: 1)') 211 | parser.add_argument('--task', default='activity', type=str, 212 | help='Default working task activity/affordance') 213 | parser.add_argument('--epochs', default=50, type=int, metavar='N', 214 | help='number of epochs for training (default: 100)') 215 | parser.add_argument('--batch_size', default=1, type=int, metavar='N', 216 | help='batch size for training (default: 1)') 217 | parser.add_argument('--lr', default=1e-4, type=float, 218 | help='learning rate for the feature extraction process (default: 1e-3)') 219 | parser.add_argument('--lr_decay', default=1, type=float, 220 | help='decay rate of learning rate (default: between 0.01 and 1)') 221 | parser.add_argument('--lr_freq', default=25, type=float, 222 | help='learing rate decay frequency while updating') 223 | parser.add_argument('--subsample', default=None, type=int, 224 | help='subsample frequency for Breakfast dataset') 225 | parser.add_argument('--dropout_rate', default=0, type=float, 226 | help='Dropout rate for LSTM training') 227 | parser.add_argument('--using_pred_duration', default=45, type=int, 228 | help='Using model that is trained to predict') 229 | args = parser.parse_args() 230 | main(args) 231 | -------------------------------------------------------------------------------- /models/parser/GEP_old.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jan 25, 2018 3 | 4 | @author: Siyuan Qi 5 | 6 | Description of the file. 7 | 8 | """ 9 | 10 | import queue as Queue 11 | 12 | import numpy as np 13 | import nltk.grammar 14 | 15 | 16 | class State(object): 17 | def __init__(self, r, dot, i, j, prefix, prob): 18 | self._r = r 19 | self._dot = dot 20 | self._i = i 21 | self._j = j 22 | self._prefix = prefix 23 | self._prob = prob 24 | 25 | def is_complete(self): 26 | return self._dot == len(self._r.rhs()) 27 | 28 | def next_symbol(self): 29 | if self.is_complete(): 30 | return None 31 | return self._r.rhs()[self._dot] 32 | 33 | def __repr__(self): 34 | rhs = [str(n) for n in self._r.rhs()] 35 | rhs = ' '.join(rhs[:self._dot]) + " * " + ' '.join(rhs[self._dot:]) 36 | return '[{}:{}:{}] {} -> {} : {:.3f} "{}"'\ 37 | .format(self._dot, self._i, self._j, self._r.lhs(), rhs, self._prob, ' '.join(self._prefix)) 38 | 39 | @property 40 | def r(self): return self._r 41 | 42 | @property 43 | def dot(self): return self._dot 44 | 45 | @property 46 | def i(self): return self._i 47 | 48 | @property 49 | def j(self): return self._j 50 | 51 | @property 52 | def prefix(self): return self._prefix 53 | 54 | @property 55 | def prob(self): return self._prob 56 | 57 | def prefix_str(self): 58 | return ' '.join(self._prefix) 59 | 60 | 61 | class GeneralizedEarley(object): 62 | def __init__(self, grammar): 63 | self._grammar = grammar 64 | self._classifier_output = None 65 | self._total_frame = 0 66 | self._cached_prob = None 67 | self._state_set = None 68 | self._queue = None 69 | self._prefix_queue = None 70 | self._max_prob = None 71 | self._best_l = None 72 | self._parse_init() 73 | 74 | def _parse_init(self, classifier_output=None): 75 | self._queue = Queue.PriorityQueue() 76 | self._prefix_queue = Queue.PriorityQueue() 77 | self._state_set = [[[]]] 78 | for r in self._grammar.productions(): 79 | if str(r.lhs()) == 'GAMMA': 80 | self._state_set[0][0].append(State(r, 0, 0, 0, [], 0.0)) 81 | break 82 | self._queue.put((1.0 - 1.0, (0, 0, '', self._state_set[0][0]))) 83 | self._max_prob = -np.inf 84 | 85 | if classifier_output is not None: 86 | if len(classifier_output.shape) != 2: 87 | raise ValueError('Classifier output shape not recognized, expecting (frame_num, class_num).') 88 | self._classifier_output = classifier_output 89 | self._cached_prob = dict() 90 | self._total_frame = self._classifier_output.shape[0] 91 | self._class_num = self._classifier_output.shape[1] 92 | self._cached_prob[''] = np.ones(self._total_frame + 1) * np.finfo('d').min 93 | self._cached_prob[''][self._total_frame] = 0.0 94 | 95 | def parse(self, classifier_output): 96 | self._parse_init(classifier_output) 97 | count = 0 98 | while not self._queue.empty(): 99 | count += 1 100 | # print(count) 101 | _, (m, n, set_l, current_set) = self._queue.get() 102 | # print(set_l) 103 | branch_probs = dict() 104 | branch_probs[set_l] = self._cached_prob[set_l][self._total_frame-1] 105 | for s in current_set: 106 | l = ' '.join(s.prefix) 107 | if self._cached_prob[l][self._total_frame-1] > self._max_prob: 108 | self._max_prob = self._cached_prob[l][self._total_frame-1] 109 | self._best_l = l 110 | 111 | if s.is_complete(): 112 | self.complete(m, n, s) 113 | elif nltk.grammar.is_nonterminal(s.next_symbol()): 114 | self.predict(m, n, s) 115 | elif nltk.grammar.is_terminal(s.next_symbol()): 116 | if m == self._total_frame: 117 | continue 118 | new_l = self.scan(m, n, s) 119 | branch_probs[new_l] = self._cached_prob[new_l][self._total_frame] 120 | else: 121 | raise ValueError('No operation (predict, scan, complete) applies to state {}'.format(s)) 122 | 123 | # Early stop 124 | if not self._queue.empty(): 125 | _, best_prefix_string = self._prefix_queue.get() 126 | max_prefix_prob = self._cached_prob[best_prefix_string][self._total_frame] 127 | else: 128 | max_prefix_prob = - np.inf 129 | max_branch_prob = max([val for key, val in branch_probs.items()]) 130 | if branch_probs[set_l] == max_branch_prob: 131 | if max_branch_prob > self._max_prob: 132 | self._best_l, self._max_prob = set_l, max_branch_prob 133 | if self._max_prob > max_prefix_prob: 134 | # print('Find best parse before exhausting all strings.') # TODO: check validity 135 | return self._best_l, self._max_prob 136 | return self._best_l, self._max_prob 137 | 138 | def get_log_prob_sum(self): 139 | log_prob = np.log(self._classifier_output).transpose() 140 | log_prob_sum = np.zeros((self._class_num, self._total_frame, self._total_frame)) 141 | for c in range(self._class_num): 142 | for b in range(self._total_frame): 143 | log_prob_sum[c, b, b] = log_prob[c, b] 144 | for c in range(self._class_num): 145 | for b in range(self._total_frame): 146 | for e in range(b+1, self._total_frame): 147 | log_prob_sum[c, b, e] = log_prob_sum[c, b, e-1] + log_prob[c, e] 148 | return log_prob, log_prob_sum 149 | 150 | def compute_labels(self): 151 | log_prob, log_prob_sum = self.get_log_prob_sum() 152 | 153 | tokens = [int(token) for token in self._best_l.split(' ')] 154 | dp_tables = np.zeros((len(tokens), self._total_frame)) 155 | traces = np.zeros_like(dp_tables) 156 | 157 | for end in range(0, self._total_frame): 158 | dp_tables[0, end] = log_prob_sum[tokens[0], 0, end] 159 | 160 | for token_i, token in enumerate(tokens): 161 | if token_i == 0: 162 | continue 163 | for end in range(token_i, self._total_frame): 164 | max_log_prob = -np.inf 165 | for begin in range(token_i, end+1): 166 | check_prob = dp_tables[token_i-1, begin-1] + log_prob_sum[token, begin, end] 167 | if check_prob > max_log_prob: 168 | max_log_prob = check_prob 169 | traces[token_i, end] = begin-1 170 | dp_tables[token_i, end] = max_log_prob 171 | 172 | # Back tracing 173 | token_pos = [-1 for _ in tokens] 174 | token_pos[-1] = self._total_frame - 1 175 | for token_i in reversed(range(len(tokens)-1)): 176 | token_pos[token_i] = int(traces[token_i+1, token_pos[token_i+1]]) 177 | 178 | labels = - np.ones(self._total_frame).astype(np.int) 179 | labels[:token_pos[0]+1] = tokens[0] 180 | for token_i in range(1, len(tokens)): 181 | labels[token_pos[token_i-1]+1:token_pos[token_i]+1] = tokens[token_i] 182 | 183 | return labels, self._best_l.split(' '), token_pos 184 | 185 | def complete(self, m, n, s): 186 | for back_s in self._state_set[s.i][s.j]: 187 | if str(back_s.next_symbol()) == str(s.r.lhs()): 188 | new_s = State(back_s.r, back_s.dot+1, back_s.i, back_s.j, s.prefix, s.prob) 189 | # # if str(new_s.r.lhs()) == 'GAMMA': 190 | # # print(new_s.prefix) 191 | 192 | # # For grammars that don't have recursive rules 193 | # self._state_set[m][n].append(new_s) 194 | 195 | # For grammars that have recursive rules 196 | state_exist = False 197 | for exist_s in self._state_set[m][n]: 198 | if str(exist_s) == str(new_s): 199 | state_exist = True 200 | break 201 | if not state_exist: 202 | # print 'complete: S[{}, {}]'.format(m, n), new_s 203 | self._state_set[m][n].append(new_s) 204 | 205 | def predict(self, m, n, s): 206 | expand_symbol = str(s.next_symbol()) 207 | for r in self._grammar.productions(): 208 | if expand_symbol == str(r.lhs()): 209 | new_s = State(r, 0, m, n, s.prefix, s.prob) 210 | 211 | # # For grammars that don't have recursive rules 212 | # self._state_set[m][n].append(new_s) 213 | 214 | # For grammars that have recursive rules 215 | state_exist = False 216 | for exist_s in self._state_set[m][n]: 217 | if str(exist_s) == str(new_s): 218 | state_exist = True 219 | break 220 | if not state_exist: 221 | # print 'predict: S[{}, {}]'.format(m, n), new_s 222 | self._state_set[m][n].append(new_s) 223 | 224 | def scan(self, m, n, s): 225 | new_prefix = s.prefix[:] 226 | new_prefix.append(str(s.next_symbol())) 227 | prob = self.compute_prob(new_prefix) 228 | new_s = State(s.r, s.dot+1, s.i, s.j, new_prefix, prob) 229 | if m == len(self._state_set) - 1: 230 | new_n = 0 231 | self._state_set.append([]) 232 | else: 233 | new_n = len(self._state_set[m+1]) 234 | 235 | # To eliminate same prefix branches 236 | state_exist = False 237 | for state_set in self._state_set[m+1]: 238 | exist_s = state_set[0] 239 | if exist_s.prefix_str() == new_s.prefix_str(): 240 | state_exist = True 241 | break 242 | 243 | new_prefix_str = ' '.join(new_prefix) 244 | if not state_exist: 245 | # print 'scan: S[{}, {}]'.format(m+1, new_n), new_s 246 | self._state_set[m+1].append([]) 247 | self._state_set[m+1][new_n].append(new_s) 248 | self._queue.put((1.0 - prob, (m + 1, new_n, new_prefix_str, self._state_set[m + 1][new_n]))) 249 | self._prefix_queue.put((1.0 - prob, new_prefix_str)) 250 | 251 | return new_prefix_str 252 | 253 | def update_prob(self, prefix): 254 | pass 255 | 256 | def compute_prob(self, prefix): 257 | l = ' '.join(prefix) 258 | if l not in self._cached_prob: 259 | k = int(prefix[-1]) 260 | l_minus = ' '.join(prefix[:-1]) 261 | self._cached_prob[l] = np.ones(self._total_frame + 1) * np.finfo('d').min 262 | if len(prefix) == 1: 263 | self._cached_prob[l][0] = np.log(self._classifier_output[0, k]) 264 | 265 | # Compute p(l) 266 | for t in range(1, self._total_frame): 267 | max_log = max(self._cached_prob[l][t-1], self._cached_prob[l_minus][t-1]) 268 | self._cached_prob[l][t] = np.log(self._classifier_output[t, k]) + max_log + np.log(np.exp(self._cached_prob[l][t-1]-max_log) + np.exp(self._cached_prob[l_minus][t-1]-max_log)) 269 | 270 | # Compute p(l...) 271 | if self._total_frame == 1: 272 | max_log = self._cached_prob[l][0] 273 | else: 274 | max_log = max(self._cached_prob[l][0], np.max(self._cached_prob[l_minus][0:self._total_frame - 1])) 275 | self._cached_prob[l][self._total_frame] = np.exp(self._cached_prob[l][0]-max_log) 276 | for t in range(1, self._total_frame): 277 | self._cached_prob[l][self._total_frame] += self._classifier_output[t, k] * np.exp(self._cached_prob[l_minus][t-1]-max_log) 278 | self._cached_prob[l][self._total_frame] = np.log(self._cached_prob[l][self._total_frame]) + max_log 279 | # TODO: change self._cached_prob[l][self._total_frame] to be self.prefx_prob[l] 280 | # TODO: update self._cached_prob[l] from [0, self._total_frame - 1] so that it can be adjusted online 281 | # Search according to prefix probability (Prefix probability stored in the last dimension!!!!!!) 282 | return self._cached_prob[l][self._total_frame] 283 | 284 | 285 | def main(): 286 | pass 287 | 288 | 289 | if __name__ == '__main__': 290 | main() --------------------------------------------------------------------------------