├── datasets
    ├── __init__.py
    ├── VCLA_GAZE
    │   ├── __init__.py
    │   ├── finetune
    │   │   ├── __init__.py
    │   │   ├── vcla_gaze_finetune.py
    │   │   └── model.py
    │   ├── vcla_gaze_config.py
    │   ├── metadata.py
    │   ├── vcla_gaze_prior.py
    │   ├── vcla_gaze.py
    │   └── dataparser.py
    ├── CAD
    │   ├── __init__.py
    │   ├── finetune
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   ├── cad_finetune.py
    │   │   └── parse_features.py
    │   ├── cad_config.py
    │   ├── metadata.py
    │   ├── cad.py
    │   └── dataparser.py
    ├── WNP
    │   ├── __init__.py
    │   ├── wnp_config.py
    │   ├── metadata.py
    │   └── wnp.py
    ├── Breakfast
    │   ├── __init__.py
    │   ├── breakfast_config.py
    │   ├── metadata.py
    │   ├── breakfast.py
    │   └── dataparser.py
    └── helmert.py
├── models
    ├── __init__.py
    ├── parser
    │   ├── __init__.py
    │   ├── test.py
    │   └── GEP_old.py
    ├── MLP.py
    ├── BiLSTM.py
    ├── LSTM_pred.py
    ├── grammar_gen.py
    └── parsegraph.py
├── utils
    ├── __init__.py
    ├── qualitative.py
    ├── evalutils.py
    ├── plyutils.py
    ├── logutils.py
    └── vizutils.py
├── experiments
    ├── __init__.py
    ├── GEP
    │   ├── __init__.py
    │   ├── gep_ablation.py
    │   ├── gep_seg.py
    │   ├── gep_pred_parse_prediction.py
    │   ├── gep.py
    │   └── gep_pred_topdown.py
    ├── LSTM
    │   └── __init__.py
    ├── STAOG
    │   ├── __init__.py
    │   └── prob_utils.py
    └── exp_config.py
├── requirements.txt
├── gep_breakfast_det.sh
├── basemeta.py
├── config.py
├── breakfast_det.sh
├── cad_pred.sh
├── visualization
    ├── prediction_plot.py
    └── detection_plot.py
└── README.md


/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/2/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 11/27/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/utils/qualitative.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 11/27/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/datasets/CAD/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 11/27/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/datasets/WNP/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 11/27/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/2/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/models/parser/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 2/15/19
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/datasets/Breakfast/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 4/16/19
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/experiments/GEP/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/11/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/experiments/LSTM/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/9/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/experiments/STAOG/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/9/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/datasets/CAD/finetune/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/8/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/finetune/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 11/30/18
3 | 
4 | @author: Baoxiong Jia
5 | 
6 | Description:
7 | 
8 | """


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch
 2 | torchvision
 3 | scipy
 4 | numpy
 5 | matplotlib
 6 | scikit-image
 7 | tqdm
 8 | opencv-python
 9 | sklearn
10 | nltk
11 | seaborn
12 | pandas


--------------------------------------------------------------------------------
/utils/evalutils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 5/18/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import numpy as np
10 | 
11 | def upsample(prediction, freq=10, length=None):
12 |     upsampled_prediction = [i for i in prediction for _ in range(freq)]
13 |     if length:
14 |         if len(upsampled_prediction) > length:
15 |             upsampled_prediction = upsampled_prediction[:length]
16 |     return upsampled_prediction


--------------------------------------------------------------------------------
/models/MLP.py:
--------------------------------------------------------------------------------
 1 | """
 2 |   
 3 |     Created on 11/5/19
 4 | 
 5 |     @author: Baoxiong Jia
 6 | 
 7 |     Description:
 8 | 
 9 | """
10 | 
11 | import torch.nn as nn
12 | 
13 | class MLP(nn.Module):
14 |     def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.):
15 |         super(MLP, self).__init__()
16 |         self.linear1 = nn.Linear(input_size, 2 * hidden_size)
17 |         self.linear2 = nn.Linear(2 * hidden_size, hidden_size)
18 |         self.linear3 = nn.Linear(hidden_size, num_classes)
19 |         self.dropout = nn.Dropout(p=dropout_rate)
20 | 
21 |     def forward(self, x):
22 |         return self.linear3(self.dropout(self.linear2(self.linear1(x))))


--------------------------------------------------------------------------------
/gep_breakfast_det.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | TRAINED_EPOCHS=$1
 3 | 
 4 | LOG_PATH="/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/gep_results"
 5 | 
 6 | subsample=("1" "2" "5" "10" "20" "50")
 7 | batch_size=("20" "32" "32" "32" "32" "32")
 8 | 
 9 | if [ ! -d ${LOG_PATH} ]
10 | then
11 |     mkdir ${LOG_PATH}
12 | fi
13 | 
14 | for subs in "${!subsample[@]}"
15 | do
16 |     echo GEP_${subsample[$subs]}_b${batch_size[$subs]}_t${TRAINED_EPOCHS}
17 |     python experiments/GEP/gep.py --task activity --dataset Breakfast --using_batch_size ${batch_size[$subs]} --subsample ${subsample[$subs]} --lr 1e-3 --lr_decay 0.8 --epochs 50 --trained_epochs ${TRAINED_EPOCHS} > ${LOG_PATH}/eval_s${subsample[$subs]}_b${batch_size[$subs]}_t${TRAINED_EPOCHS}.txt
18 | done
19 | 


--------------------------------------------------------------------------------
/basemeta.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 10/18/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | class Metadata(object):
11 |     def __init__(self):
12 |         # list for constant strings
13 |         self.activities = list()
14 |         self.subactivities = list()
15 |         self.actions = list()
16 |         self.objects = list()
17 |         self.affordances = list()
18 | 
19 |         # reverse index of strings
20 |         self.activity_index = dict()
21 |         self.subactivity_index = dict()
22 |         self.action_index = dict()
23 |         self.object_index = dict()
24 |         self.affordance_index = dict()
25 | 
26 |         # Macro constant
27 |         self.ACTIVITY_NUM = -1
28 |         self.SUBACTIVITY_NUM = -1
29 |         self.ACTION_NUM = -1
30 |         self.OBJECT_NUM = -1
31 |         self.AFFORDANCE_NUM = -1
32 |         self.MAXIMUM_OBJ_VIDEO = -1


--------------------------------------------------------------------------------
/datasets/CAD/finetune/model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/9/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import torch
11 | from datasets.CAD.metadata import CAD_METADATA
12 | metadata = CAD_METADATA()
13 | 
14 | class TaskNet(torch.nn.Module):
15 |     def __init__(self, feature_dim, task='affordance', hidden_dim=1500):
16 |         super(TaskNet, self).__init__()
17 |         if task == 'affordance':
18 |             num_classes = metadata.AFFORDANCE_NUM
19 |         else:
20 |             num_classes = metadata.ACTION_NUM
21 |         self.module = torch.nn.Sequential(
22 |             torch.nn.Linear(feature_dim, 2 * hidden_dim),
23 |             torch.nn.ReLU(),
24 |             torch.nn.Linear(2 * hidden_dim, hidden_dim),
25 |         )
26 |         self.fc = torch.nn.Linear(hidden_dim, num_classes)
27 | 
28 |     def forward(self, x):
29 |         features = self.module(x)
30 |         output = self.fc(x)
31 |         return features, output


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 10/18/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import os
11 | 
12 | class Paths(object):
13 | 
14 |     def __init__(self):
15 |         self.project_root = '/mnt/hdd/home/baoxiong/Projects/TPAMI2019'
16 |         self.vcla_data_root = '/mnt/hdd/home/baoxiong/Datasets/VCLA/'
17 |         self.wnp_root = '/mnt/hdd/home/baoxiong/Datasets/Watch-n-Patch/'
18 |         self.cad_root = '/mnt/hdd/home/baoxiong/Datasets/CAD120/'
19 |         self.breakfast_root = '/mnt/hdd/home/baoxiong/Datasets/Breakfast/'
20 | 
21 |         self.tmp_root = os.path.join(self.project_root, 'tmp')
22 |         if not os.path.exists(self.tmp_root):
23 |             os.makedirs(self.tmp_root)
24 |         self.vis_root = os.path.join(self.project_root, 'vis')
25 |         if not os.path.exists(self.vis_root):
26 |             os.makedirs(self.vis_root)
27 |         self.log_root = os.path.join(self.project_root, 'log')
28 |         if not os.path.exists(self.log_root):
29 |             os.makedirs(self.log_root)
30 | 


--------------------------------------------------------------------------------
/breakfast_det.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | EPOCHS=$1
 3 | LOG_PATH="media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/nn_results"
 4 | subsample=("1" "2" "5" "10" "20" "50" "100")
 5 | batch_size=("20" "32" "32" "32" "32" "32" "32")
 6 | training_epochs=("5" "10" "15" "20" "25" "30" "35" "40" "45" "50")
 7 | 
 8 | if [ ! -d ${LOG_PATH} ]
 9 | then
10 |     mkdir ${LOG_PATH}
11 | fi
12 | 
13 | for sub in "${!subsample[@]}"
14 | do
15 |   	python experiments/LSTM/detect.py --task activity --dataset Breakfast --batch_size ${batch_size[$sub]} --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --subsample ${subsample[$sub]} --save_interval 5 > ${LOG_PATH}/s${subsample[$sub]}_b${batch_size[$sub]}.txt
16 | done
17 | 
18 | for sub in "${!subsample[@]}"
19 | do
20 |     for trainepochs in "${training_epochs[@]}"
21 |     do
22 |         python experiments/LSTM/detect.py --task activity --dataset Breakfast --batch_size ${batch_size[$sub]} --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --subsample ${subsample[$sub]} --save_interval 5 --trained_epochs ${trainepochs} --eval True > ${LOG_PATH}/eval_s${subsample[$sub]}_b${batch_size[$sub]}_t${trainepochs}.txt
23 |     done
24 | done
25 | 


--------------------------------------------------------------------------------
/cad_pred.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | EPOCHS=$1
 3 | LOG_PATH="/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/log/nn_results"
 4 | pred_duration=("15" "30" "45" "60" "75" "90" "105" "120" "135" "150")
 5 | 
 6 | if [ ! -d ${LOG_PATH} ]
 7 | then
 8 |     mkdir ${LOG_PATH}
 9 | fi
10 | 
11 | for pred in "${!pred_duration[@]}"
12 | do
13 | 	python experiments/LSTM/pred_baseline.py --task activity --dataset CAD --batch_size 1 --lr 1e-3 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} > ${LOG_PATH}/pred${pred_duration[$pred]}_train.txt
14 | done
15 | 
16 | for pred in "${!pred_duration[@]}"
17 | do
18 | 	python experiments/LSTM/pred_baseline.py --task activity --dataset CAD --batch_size 1 --lr 5e-4 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} --eval True > ${LOG_PATH}/pred${pred_duration[$pred]}_eval.txt
19 | done
20 | 
21 | for pred in "${!pred_duration[@]}"
22 | do
23 | 	python experiments/GEP/gep_pred_topdown.py --task activity --dataset CAD --batch_size 1 --lr 5e-4 --lr_decay 0.8 --epochs ${EPOCHS} --pred_duration ${pred_duration[$pred]} --using_pred_duration ${pred_duration[$pred]} > ${LOG_PATH}/gep_pred${pred_duration[$pred]}_eval.txt
24 | done
25 | 


--------------------------------------------------------------------------------
/datasets/WNP/wnp_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 11/27/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description: Watch-n-Patch dataset config
 7 |              No feature extraction, using kernel descriptor results
 8 | 
 9 | """
10 | import os
11 | import config
12 | 
13 | class Paths(config.Paths):
14 |     def __init__(self):
15 |         super(Paths, self).__init__()
16 |         self.data_root = self.wnp_root
17 |         self.tmp_root = os.path.join(self.tmp_root, 'wnp')
18 | 
19 |         self.inter_root = os.path.join(self.tmp_root, 'intermediate')
20 |         if not os.path.exists(self.inter_root):
21 |             os.makedirs(self.inter_root)
22 | 
23 |         self.log_root = os.path.join(self.tmp_root, 'log')
24 |         self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints')
25 | 
26 |         self.grammar_root = os.path.join(self.tmp_root, 'grammar')
27 |         self.prior_root = os.path.join(self.tmp_root, 'prior')
28 | 
29 |         self.visualize_root = os.path.join(self.tmp_root, 'visualization')
30 |         if not os.path.exists(self.visualize_root):
31 |             os.makedirs(self.visualize_root)
32 |         self.metadata_root = os.path.join(self.tmp_root, 'metadata')
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     a = Paths()


--------------------------------------------------------------------------------
/models/BiLSTM.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/2/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import torch.nn as nn
10 | import torch.nn.utils.rnn as rnn_utils
11 | 
12 | class BiLSTM(nn.Module):
13 |     def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate=0.):
14 |         super(BiLSTM, self).__init__()
15 |         self.hidden_layer = hidden_size
16 |         self.num_layers = num_layers
17 |         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False, bidirectional=True, dropout=dropout_rate)
18 |         self.fc = nn.Linear(hidden_size * 2, num_classes)
19 |         self.dropout = nn.Dropout(p=dropout_rate)
20 | 
21 |     def forward(self, features):
22 |         # # # Initialize hidden states, 2 for bidirectional RNN
23 |         # h0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device)
24 |         # c0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device)
25 | 
26 |         # out, _ = self.lstm(features, (h0, c0))
27 |         packed = rnn_utils.pack_sequence(features)
28 |         out, _ = self.lstm(features)
29 |         out = self.dropout(out)
30 |         out = self.fc(out)
31 |         return out


--------------------------------------------------------------------------------
/models/LSTM_pred.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 5/21/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.utils.rnn as rnn_utils
12 | 
13 | 
14 | class LSTM_Pred(nn.Module):
15 |     def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate=0.):
16 |         super(LSTM_Pred, self).__init__()
17 |         self.hidden_layer = hidden_size
18 |         self.num_layers = num_layers
19 |         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False, bidirectional=False, dropout=dropout_rate)
20 |         self.fc = nn.Linear(hidden_size, num_classes)
21 |         self.dropout = nn.Dropout(p=dropout_rate)
22 | 
23 |     def forward(self, features):
24 |         # # # Initialize hidden states, 2 for bidirectional RNN
25 |         # h0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device)
26 |         # c0 = torch.zeros(self.num_layers * 2, features.size(1), self.hidden_layer).to(device=features.device)
27 | 
28 |         # out, _ = self.lstm(features, (h0, c0))
29 |         packed = rnn_utils.pack_sequence(features)
30 |         out, _ = self.lstm(features)
31 |         out = self.dropout(out)
32 |         out = self.fc(out)
33 |         return out


--------------------------------------------------------------------------------
/models/grammar_gen.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 5/1/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import os
11 | import subprocess
12 | import tempfile
13 | 
14 | 
15 | def main():
16 |     project_path = '/media/hdd/home/baoxiong/Projects'
17 |     breakfast_path = os.path.join(project_path, 'TPAMI2019', 'tmp', 'breakfast')
18 |     corpus_dir = os.path.join(breakfast_path, 'corpus')
19 |     grammar_dir = os.path.join(breakfast_path, 'grammar')
20 |     madios_path = os.path.join(project_path, 'Tools', 'madios', 'build', 'madios')
21 | 
22 |     eta = 1
23 |     alpha = 0.1
24 |     context_size = 2
25 |     coverage = 0.5
26 | 
27 |     if not os.path.exists(grammar_dir):
28 |         os.makedirs(grammar_dir)
29 | 
30 |     for f in os.listdir(corpus_dir):
31 |         corpus_path = os.path.join(corpus_dir, f)
32 |         grammar_path = os.path.splitext(os.path.join(grammar_dir, f))[0] + '.pcfg'
33 |         cmd = '{} {} {} {} {} {}'.format(madios_path, corpus_path, eta, alpha, context_size, coverage)
34 | 
35 |         grammar = False
36 |         with open(grammar_path, 'w') as grammar_file:
37 |             for line in os.popen(cmd).readlines():
38 |                 if grammar:
39 |                     if line.strip() != '':
40 |                         grammar_file.write(line)
41 |                 if line.startswith('Time'):
42 |                     grammar = True
43 |         print('Finishing {}'.format(corpus_path))
44 | if __name__ == '__main__':
45 |     main()
46 | 


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/vcla_gaze_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 10/18/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import os
11 | import config
12 | 
13 | class Paths(config.Paths):
14 |     """
15 |     Configuration of data paths
16 |         data_root:  root folder of all videos and annotations
17 |         tmp_root:   intermediate result for vcla_gaze dataset
18 |     """
19 |     def __init__(self):
20 |         super(Paths, self).__init__()
21 |         self.data_root = self.vcla_data_root
22 |         self.tmp_root = os.path.join(self.tmp_root, 'vcla_gaze')
23 | 
24 |         self.inter_root = os.path.join(self.tmp_root, 'intermediate')
25 |         if not os.path.exists(self.inter_root):
26 |             os.makedirs(self.inter_root)
27 | 
28 |         self.log_root = os.path.join(self.tmp_root, 'log')
29 |         self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints')
30 |         self.vis_root = os.path.join(self.vis_root, 'vcla_gaze')
31 |         if not os.path.exists(self.vis_root):
32 |             os.makedirs(self.vis_root)
33 | 
34 |         self.prior_root = os.path.join(self.tmp_root, 'prior')
35 |         if not os.path.exists(self.prior_root):
36 |             os.makedirs(self.prior_root)
37 | 
38 |         self.grammar_root = os.path.join(self.tmp_root, 'grammar')
39 |         self.label_root = os.path.join(self.data_root, 'labels')
40 |         self.metadata_root = os.path.join(self.label_root, 'metadata')
41 |         self.anno_root =os.path.join(self.label_root, 'clean_annotations')
42 |         self.img_root = os.path.join(self.data_root, 'images')
43 |         self.bbox_root = os.path.join(self.label_root, 'ObjBbox')
44 | 


--------------------------------------------------------------------------------
/datasets/CAD/cad_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/7/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import os
11 | import config
12 | 
13 | class Paths(config.Paths):
14 |     """
15 |     Configuration of data paths
16 |         data_root:  root folder of all videos and annotations
17 |         tmp_root:   intermediate result for vcla_gaze dataset
18 |     """
19 |     def __init__(self):
20 |         super(Paths, self).__init__()
21 |         self.data_root = self.cad_root
22 |         self.tmp_root = os.path.join(self.tmp_root, 'cad')
23 | 
24 |         self.inter_root = os.path.join(self.tmp_root, 'intermediate')
25 |         if not os.path.exists(self.inter_root):
26 |             os.makedirs(self.inter_root)
27 | 
28 |         self.log_root = os.path.join(self.tmp_root, 'log')
29 |         self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints')
30 |         self.vis_root = os.path.join(self.vis_root, 'cad')
31 |         if not os.path.exists(self.vis_root):
32 |             os.makedirs(self.vis_root)
33 | 
34 |         self.prior_root = os.path.join(self.tmp_root, 'prior')
35 |         if not os.path.exists(self.prior_root):
36 |             os.makedirs(self.prior_root)
37 | 
38 |         self.visualize_root = os.path.join(self.tmp_root, 'visualization')
39 |         if not os.path.exists(self.visualize_root):
40 |             os.makedirs(self.visualize_root)
41 | 
42 |         self.grammar_root = os.path.join(self.tmp_root, 'grammar')
43 |         self.label_root = os.path.join(self.data_root, 'labels')
44 |         self.metadata_root = os.path.join(self.label_root, 'metadata')
45 |         self.anno_root =os.path.join(self.label_root, 'clean_annotations')
46 |         self.img_root = os.path.join(self.data_root, 'images')
47 |         self.bbox_root = os.path.join(self.label_root, 'ObjBbox')
48 | 


--------------------------------------------------------------------------------
/datasets/Breakfast/breakfast_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 4/20/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | 
11 | import os
12 | import config
13 | 
14 | class Paths(config.Paths):
15 |     """
16 |     Configuration of data paths
17 |         data_root:  root folder of all videos and annotations
18 |         tmp_root:   intermediate result for vcla_gaze dataset
19 |     """
20 |     def __init__(self):
21 |         super(Paths, self).__init__()
22 |         self.data_root = self.breakfast_root
23 |         self.tmp_root = os.path.join(self.tmp_root, 'breakfast')
24 | 
25 |         self.inter_root = os.path.join(self.tmp_root, 'intermediate')
26 |         if not os.path.exists(self.inter_root):
27 |             os.makedirs(self.inter_root)
28 | 
29 |         self.log_root = os.path.join(self.tmp_root, 'log')
30 |         self.checkpoint_root = os.path.join(self.tmp_root, 'checkpoints')
31 |         self.vis_root = os.path.join(self.vis_root, 'breakfast')
32 |         if not os.path.exists(self.vis_root):
33 |             os.makedirs(self.vis_root)
34 | 
35 |         self.prior_root = os.path.join(self.tmp_root, 'prior')
36 |         if not os.path.exists(self.prior_root):
37 |             os.makedirs(self.prior_root)
38 | 
39 |         self.visualize_root = os.path.join(self.tmp_root, 'visualization')
40 |         if not os.path.exists(self.visualize_root):
41 |             os.makedirs(self.visualize_root)
42 | 
43 |         self.grammar_root = os.path.join(self.tmp_root, 'grammar')
44 |         self.label_root = os.path.join(self.data_root, 'labels')
45 |         self.metadata_root = os.path.join(self.label_root, 'metadata')
46 |         self.anno_root =os.path.join(self.label_root, 'clean_annotations')
47 |         self.img_root = os.path.join(self.data_root, 'images')
48 |         self.bbox_root = os.path.join(self.label_root, 'ObjBbox')


--------------------------------------------------------------------------------
/datasets/WNP/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 11/27/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | from basemeta import Metadata
10 | 
11 | class WNP_METADATA(Metadata):
12 |     def __init__(self):
13 |         super(WNP_METADATA, self).__init__()
14 | 
15 |         self.activities = ['office', 'kitchen']
16 |         self.subactivities = [
17 |                                 'null',
18 |                                 'fetch_from_fridge', 'put_back_to_fridge', 'prepare_food', 'microwaving', 'fetch_from_oven',
19 |                                 'pouring', 'drinking', 'leave_kitchen', 'fill_kettle', 'plug_in_kettle', 'move_kettle',
20 |                                 'reading', 'walking', 'leave_office', 'fetch_book', 'put_back_book', 'put_down_item',
21 |                                 'take_item', 'play_computer', 'turn_on_monitor', 'turn_off_monitor'
22 |                             ]
23 |         self.actions = [
24 |                             'null',
25 |                             'fetch_from_fridge', 'put_back_to_fridge', 'prepare_food', 'microwaving', 'fetch_from_oven',
26 |                             'pouring', 'drinking', 'leave_kitchen', 'fill_kettle', 'plug_in_kettle', 'move_kettle',
27 |                             'reading', 'walking', 'leave_office', 'fetch_book', 'put_back_book', 'put_down_item',
28 |                             'take_item', 'play_computer', 'turn_on_monitor', 'turn_off_monitor'
29 |                         ]
30 | 
31 |         for a in self.activities:
32 |             self.activity_index[a] = self.activities.index(a)
33 | 
34 |         for s in self.subactivities:
35 |             self.subactivity_index[s] = self.subactivities.index(s)
36 | 
37 |         for a in self.actions:
38 |             self.action_index[a] = self.actions.index(a)
39 | 
40 |         self.ACTIVITY_NUM = len(self.activities)
41 |         self.SUBACTIVITY_NUM = len(self.subactivities)
42 |         self.ACTION_NUM = len(self.actions)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     metadata = WNP_METADATA()


--------------------------------------------------------------------------------
/visualization/prediction_plot.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 6/1/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import os
10 | import re
11 | import numpy as np
12 | import seaborn as sns
13 | rc={'axes.labelsize': 20, 'font.size': 20, 'legend.fontsize': 20.0, 'axes.titlesize': 20, 'xtick.labelsize': 20.0, 'ytick.labelsize': 24.0,}
14 | sns.set(rc=rc)
15 | import pandas as pd
16 | import matplotlib.pyplot as plt
17 | 
18 | path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/log/nn_results'
19 | save_path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/fig'
20 | pred_duration = [15, 30, 45, 60, 75, 90, 105, 120, 135, 150]
21 | gep_paths = [os.path.join(path, 'gep_pred{}_eval.txt'.format(i)) for i in pred_duration]
22 | nn_paths = [os.path.join(path, 'pred{}_eval.txt'.format(i)) for i in pred_duration]
23 | 
24 | pattern = '[0-9]+.[0-9]+'
25 | 
26 | df_columns = ['Prediction duration (s)', 'LSTM + GEP', 'LSTM', 'Random']
27 | df = []
28 | for idx, paths in enumerate(zip(gep_paths, nn_paths)):
29 |     gep_path, nn_path = paths
30 |     with open(gep_path, 'r') as f:
31 |         results_gep = f.readlines()
32 |         gep_acc = float(re.findall(pattern, results_gep[-1])[-1])
33 |         # gep_acc = 0
34 |         print('gep acc', gep_acc)
35 |     with open(nn_path, 'r') as f:
36 |         results_nn = f.readlines()
37 |         nn_acc = float(re.findall(pattern, results_nn[-1])[-1])
38 |         print('nn acc', nn_acc)
39 |     df.append([pred_duration[idx] / 15, gep_acc, nn_acc, 0.1])
40 | df = pd.DataFrame(df, columns=df_columns)
41 | fig, ax = plt.subplots()
42 | plt.axes([0, 0, 1 / 0.618, 1])
43 | df = pd.melt(df, id_vars=df_columns[0], value_vars=df_columns[1 : ], var_name='Method', value_name='F1 score')
44 | ax = sns.lineplot(x=df_columns[0], y='F1 score', hue='Method', data=df)
45 | ax.lines[2].set_linestyle('--')
46 | ax.set_title(r'Frame prediction over time')
47 | ax.set(xticks = np.array(pred_duration) / 15)
48 | plt.ylim(0, 0.7)
49 | ax.legend(loc='upper right')
50 | print(sns.plotting_context())
51 | plt.savefig(os.path.join(save_path, 'cad_prediction.pdf'), bbox_inches='tight')
52 | 


--------------------------------------------------------------------------------
/datasets/CAD/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 11/27/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | from basemeta import Metadata
10 | class CAD_METADATA(Metadata):
11 |     def __init__(self):
12 |         super(CAD_METADATA, self).__init__()
13 | 
14 |         self.activities = [
15 |                             'arranging_objects', 'picking_objects', 'taking_medicine',
16 |                             'making_cereal', 'cleaning_objects', 'stacking_objects', 'having_meal',
17 |                             'microwaving_food', 'unstacking_objects', 'taking_food'
18 |                            ]
19 | 
20 |         self.subactivities = [
21 |                                 'reaching', 'moving', 'pouring', 'eating', 'drinking',
22 |                                 'opening', 'placing', 'closing', 'null', 'cleaning', 'prior'
23 |                             ]
24 | 
25 |         self.actions = [
26 |                             'reaching', 'moving', 'pouring', 'eating', 'drinking',
27 |                             'opening', 'placing', 'closing', 'null', 'cleaning'
28 |                         ]
29 | 
30 |         self.objects = ['medcinebox', 'cup', 'bowl', 'box', 'milk', 'book', 'microwave', 'plate', 'remote', 'cloth']
31 | 
32 |         self.affordances = [
33 |                         'movable', 'stationary', 'reachable', 'pourable', 'pourto', 'containable',
34 |                         'drinkable', 'openable', 'placeable', 'closeable', 'cleanable', 'cleaner'
35 |                         ]
36 | 
37 |         for a in self.activities:
38 |             self.activity_index[a] = self.activities.index(a)
39 | 
40 |         for s in self.subactivities:
41 |             self.subactivity_index[s] = self.subactivities.index(s)
42 | 
43 |         for a in self.actions:
44 |             self.action_index[a] = self.actions.index(a)
45 | 
46 |         for o in self.objects:
47 |             self.object_index[o] = self.objects.index(o)
48 | 
49 |         for u in self.affordances:
50 |             self.affordance_index[u] = self.affordances.index(u)
51 | 
52 |         self.ACTIVITY_NUM = len(self.activities)
53 |         self.SUBACTIVITY_NUM = len(self.subactivities)
54 |         self.ACTION_NUM = len(self.actions)
55 |         self.OBJECT_NUM = len(self.objects)
56 |         self.AFFORDANCE_NUM = len(self.affordances)
57 | 


--------------------------------------------------------------------------------
/datasets/CAD/finetune/cad_finetune.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/9/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import os
10 | import pickle
11 | import torch.utils.data
12 | import torch
13 | from random import shuffle
14 | import numpy as np
15 | import datasets.CAD.cad_config as config
16 | from datasets.CAD.metadata import CAD_METADATA
17 | metadata = CAD_METADATA()
18 | 
19 | class CAD_FEATURE(torch.utils.data.Dataset):
20 |     def __init__(self, paths, sequence_ids, task, verbose=False):
21 |         self.root = paths.img_root
22 |         self.tmp_root = paths.tmp_root
23 |         self.inter_root = paths.inter_root
24 |         self.task = task
25 |         self.verbose = verbose
26 |         self.sequence_ids = sequence_ids
27 |         with open(os.path.join(paths.tmp_root, 'features.p'), 'rb') as f:
28 |             self.data_list = pickle.load(f)
29 |         with open(os.path.join(paths.tmp_root, 'label_list.p'), 'rb') as f:
30 |             self.label_list = pickle.load(f)
31 | 
32 |     # Using framewise information for prediction purposes
33 |     def __getitem__(self, index):
34 |         sequence_id = self.sequence_ids[index]
35 |         video_id, frame = sequence_id.split('$')
36 |         label = self.label_list[sequence_id]
37 |         sequence_info = self.data_list[video_id][int(frame)]
38 |         feature = list()
39 |         if self.task == 'affordance':
40 |             object_affordance_feature = np.array(sequence_info['o_fea'])
41 |             skeleton_object_feature = np.array(sequence_info['s_o_fea'])
42 |             feature = np.hstack((object_affordance_feature, skeleton_object_feature))
43 |         else:
44 |             h_feature = np.array(sequence_info['h_fea'])
45 |             # with open(os.path.join(self.inter_root, 'finetune', 'affordance'), )
46 | 
47 |         feature = torch.FloatTensor(feature)
48 |         label = torch.LongTensor(label)
49 |         return feature, label
50 | 
51 |     def __len__(self):
52 |         return len(self.sequence_ids)
53 | 
54 | def main():
55 |     paths = config.Paths()
56 |     with open(os.path.join(paths.tmp_root, 'label_list.p'), 'rb') as f:
57 |         sequence_ids = pickle.load(f)
58 |     train_num = 10
59 |     keys = list(sequence_ids.keys())
60 |     shuffle(keys)
61 |     train_ids = ['1130144242$4']
62 |     train_set = CAD_FEATURE(paths, train_ids, 'affordance')
63 |     feature, label = train_set[0]
64 |     print('Finished')
65 | 
66 | if __name__ == '__main__':
67 |     main()


--------------------------------------------------------------------------------
/visualization/detection_plot.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 6/1/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import re
11 | import os
12 | import glob
13 | import seaborn as sns
14 | rc={'axes.labelsize': 28, 'font.size': 20, 'legend.fontsize': 20.0, 'axes.titlesize': 20, 'xtick.labelsize': 24.0, 'ytick.labelsize': 28.0,}
15 | sns.set(rc=rc)
16 | import pandas as pd
17 | import matplotlib.pyplot as plt
18 | 
19 | path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/breakfast/log/'
20 | 
21 | save_path = '/media/hdd/home/baoxiong/Projects/TPAMI2019/fig'
22 | subsample_rate = [1, 2, 5, 10, 20, 50]
23 | # trained_epochs = [5, 10, 15, 20, 25, 30, 35, 40]
24 | trained_epochs = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
25 | 
26 | pattern = '[0-9]+.[0-9]+'
27 | 
28 | gep_all_paths = [[glob.glob(os.path.join(path, 'gep_results', 'eval_s{}_*_t{}.txt'.format(s, t)))[0] for t in trained_epochs] for s in subsample_rate]
29 | nn_all_paths = [[glob.glob(os.path.join(path, 'nn_results', 'eval_s{}_*_t{}.txt'.format(s, t)))[0] for t in trained_epochs] for s in subsample_rate]
30 | 
31 | df_columns = ['Trained epochs', 'Bi-LSTM + GEP', 'Bi-LSTM']
32 | for s_idx, (gep_paths, nn_paths) in enumerate(zip(gep_all_paths, nn_all_paths)):
33 |     df = []
34 |     for t_idx, (gep_path, nn_path) in enumerate(zip(gep_paths, nn_paths)):
35 |         with open(gep_path, 'r') as f:
36 |             results_gep = f.readlines()
37 |             gep_acc = float(re.findall(pattern, results_gep[-3])[0])
38 |         with open(nn_path, 'r') as f:
39 |             results_nn = f.readlines()
40 |             nn_acc = float(re.findall(pattern, results_nn[-1])[0])
41 |         print(gep_acc, nn_acc)
42 |         df.append([trained_epochs[t_idx], gep_acc, nn_acc])
43 |     df = pd.DataFrame(df, columns=df_columns)
44 |     fig, ax = plt.subplots()
45 |     df = pd.melt(df, id_vars=df_columns[0], value_vars=df_columns[1 :], var_name='Method', value_name='Accuracy')
46 |     sns.lineplot(x=df_columns[0], y='Accuracy', hue='Method', data=df)
47 |     plt.xticks(trained_epochs)
48 |     plt.ylim(0, 0.7)
49 |     # plt.title(r'Detection result with {} frame subsample'.format(subsample_rate[s_idx]) if subsample_rate[s_idx] != 1
50 |     #           else r'Detection result w/o subsample')
51 |     plt.legend(loc='lower right')
52 |     plt.savefig(os.path.join(save_path, 'breakfast_subsample_{}.pdf'.format(subsample_rate[s_idx])), bbox_inches='tight')
53 |     print('Finished for {}'.format(subsample_rate[s_idx]))
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/plyutils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 11/27/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | 
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | import matplotlib.patches as patches
13 | 
14 | def visualize_bbox_image(sequence_id, data, objects_dict):
15 |     object_labels, object_images = data
16 |     for obj_idx, obj_label in enumerate(object_labels):
17 |         object_name = objects_dict[np.argmax(obj_label.numpy())]
18 |         fig, ax = plt.subplots(1)
19 |         plt.title(object_name)
20 |         ax.imshow(object_images[obj_idx].permute(1, 2, 0).numpy().astype(np.uint8))
21 |         plt.show()
22 | 
23 | def visualize_bbox_rgb(sequence_id, data, objects_dict):
24 |     rgb_image, object_pair = data
25 |     color = ['r', 'g', 'b', 'y']
26 |     fig, ax = plt.subplots(1)
27 |     ax.imshow(rgb_image.numpy().astype(np.uint8))
28 |     plt.title('{} bboxs in rgb'.format(sequence_id))
29 |     for idx, vec in enumerate(object_pair):
30 |         object_id = list(vec[:-4]).index(1)
31 |         if object_id == 0:
32 |             continue
33 |         bbox = vec[-4:]
34 |         # Code for showing wrong bounding boxes
35 |         rect = patches.Rectangle((bbox[1], bbox[0]), bbox[3] - bbox[1], bbox[2] - bbox[0],
36 |                                  linewidth=1, edgecolor=color[idx], facecolor='none')
37 |         ax.add_patch(rect)
38 |         print(objects_dict[object_id])
39 |     plt.show()
40 | 
41 | 
42 | def visualize_skeleton_depth(sequence_id, data):
43 |     image, skeleton = data
44 |     skeleton_depth = skeleton[:, 5 : 7]
45 |     line_pairs = [
46 |                     (23, 11), (24, 11), (11, 10), (10, 9), (9, 8), (8, 20), # right arm
47 |                     (21, 7), (22, 7), (7, 6), (6, 5), (5, 4), (4, 20),      # left arm
48 |                     (3, 2), (2, 20),                                        # head
49 |                     (20, 1), (1, 0),                                        # torso
50 |                     (19, 18), (18, 17), (17, 16), (16, 0),                  # right leg
51 |                     (15, 14), (14, 13), (13, 12), (12, 0)                   # left leg
52 |                   ]
53 |     fig, ax = plt.subplots(1)
54 |     ax.imshow(image.numpy().astype(np.uint8))
55 |     plt.title('{} skeleton in depth'.format(sequence_id))
56 |     for line in line_pairs:
57 |         point1 = [skeleton_depth[line[0], 0], skeleton_depth[line[0], 1]]
58 |         point2 = [skeleton_depth[line[1], 0], skeleton_depth[line[1], 1]]
59 |         ax.scatter(point1[0], point1[1], c='y')
60 |         ax.scatter(point2[0], point2[1], c='y')
61 |         ax.plot([point1[0], point2[0]], [point1[1], point2[1]], 'r')
62 |     plt.show()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GEP
 2 | 
 3 | This repo is adapted from the [original GEP repo](https://github.com/SiyuanQi/generalized-earley-parser) and contains code and adjustments for our TPAMI 2020 paper.
 4 | 
 5 | [A Generalized Earley Parser for Human Activity Parsing and Prediction](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9018126)
 6 | 
 7 | Siyuan Qi, Baoxiong Jia, Siyuan Huang, Ping Wei, and Song-Chun Zhu
 8 | 
 9 | *IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)*, 2020
10 | 
11 | 
12 | # Dependencies
13 | 
14 | Please check that all required packages from ```requirements.txt``` are properly installed.
15 | 
16 | 
17 | # Experiments
18 | 
19 | This repo contains code for reproducing the results reported in our TPAMI paper.
20 | 
21 | To run your experments properly, please download the datasets and adjust the paths information properly in ```config.py```.
22 | 
23 | We provide three example scripts for showing how to use this code for the purpose of activity parsing and also future prediction.
24 | 
25 | First, we show how to run experiments for activity parsing in ```breakfast_det.sh``` and ```gep_breakfast_det.sh```. These two shell scripts run ```baseline``` and ```gep``` for recognizing human actions respectively. As the breakfast dataset is big in frame number, we tried subsampling frames as one hyper-parameter which could be tuned during experiment. Please change the ```LOG_PATH``` to your correct logging path for storing the results before running the scripts.
26 | 
27 | Next, for activity prediction, we use prediction on CAD dataset as an example. As shown in ```cad_pred.sh```, we run baseline training/eval and also gep prediction. We report and store models' performance under different prediction duration, which could be set in the shell script. Please also change the ```LOG_PATH``` to your correct loggging path for storing the results.
28 | 
29 | # Data
30 | For features and grammar files used for reproducing experimental results, please find at [here](https://drive.google.com/drive/folders/1_3rr3O1AtbZsGHwy33JPkSSQAOzq5Z8j?usp=sharing). Please put the unzipped directory at a valid location and fix path configurations inside ```config.py``` to match the usage of features path used in ```datasets/{dataset}.py```.
31 | 
32 | 
33 | 
34 | # Citation
35 | 
36 | If you find the paper and/or the code helpful, please cite
37 | ```
38 | @inproceedings{qi2018future,
39 |     title={Generalized Earley Parser: Bridging Symbolic Grammars and Sequence Data for Future Prediction},
40 |     author={Qi, Siyuan and Jia, Baoxiong and Zhu, Song-Chun},
41 |     booktitle={International Conference on Machine Learning (ICML)},
42 |     year={2018}
43 | }
44 | @article{qi2020generalized,
45 |   title={A Generalized Earley Parser for Human Activity Parsing and Prediction},
46 |   author={Qi, Siyuan and Jia, Baoxiong and Huang, Siyuan and Wei, Ping and Zhu, Song-Chun},
47 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
48 |   year={2020},
49 |   publisher={IEEE}
50 | }
51 | ```
52 | 
53 | 


--------------------------------------------------------------------------------
/datasets/Breakfast/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 4/20/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | from basemeta import Metadata
10 | class BREAKFAST_METADATA(Metadata):
11 |     def __init__(self):
12 |         super(BREAKFAST_METADATA, self).__init__()
13 | 
14 |         self.activities = [
15 |                                 "salat", "tea", "coffee", "scrambledegg", "pancake",
16 |                                 "sandwich", "milk", "cereals", "friedegg", "juice"
17 |                             ]
18 | 
19 |         self.subactivities = [
20 |                                     "fry_egg", "add_saltnpepper", "cut_fruit", "pour_milk", "take_cup", "pour_water",
21 |                                     "spoon_flour", "SIL", "stir_coffee", "pour_cereals", "butter_pan", "put_egg2plate",
22 |                                     "take_glass", "pour_sugar", "stir_milk", "take_butter", "peel_fruit", "take_knife",
23 |                                     "stirfry_egg", "pour_oil", "pour_flour", "spoon_powder", "put_pancake2plate",
24 |                                     "stir_fruit", "squeeze_orange", "fry_pancake", "pour_dough2pan", "put_fruit2bowl",
25 |                                     "stir_egg", "take_eggs", "put_bunTogether", "pour_coffee", "smear_butter",
26 |                                     "cut_orange", "take_bowl", "cut_bun", "stir_tea", "take_squeezer", "pour_juice",
27 |                                     "stir_cereals", "pour_egg2pan", "take_topping", "add_teabag", "crack_egg",
28 |                                     "take_plate", "put_toppingOnTop", "stir_dough", "spoon_sugar"
29 |                                 ]
30 | 
31 |         self.actions = [
32 |                             "fry_egg", "add_saltnpepper", "cut_fruit", "pour_milk", "take_cup", "pour_water",
33 |                             "spoon_flour", "SIL", "stir_coffee", "pour_cereals", "butter_pan", "put_egg2plate",
34 |                             "take_glass", "pour_sugar", "stir_milk", "take_butter", "peel_fruit", "take_knife",
35 |                             "stirfry_egg", "pour_oil", "pour_flour", "spoon_powder", "put_pancake2plate",
36 |                             "stir_fruit", "squeeze_orange", "fry_pancake", "pour_dough2pan", "put_fruit2bowl",
37 |                             "stir_egg", "take_eggs", "put_bunTogether", "pour_coffee", "smear_butter",
38 |                             "cut_orange", "take_bowl", "cut_bun", "stir_tea", "take_squeezer", "pour_juice",
39 |                             "stir_cereals", "pour_egg2pan", "take_topping", "add_teabag", "crack_egg",
40 |                             "take_plate", "put_toppingOnTop", "stir_dough", "spoon_sugar"
41 |                         ]
42 | 
43 |         for a in self.activities:
44 |             self.activity_index[a] = self.activities.index(a)
45 | 
46 |         for s in self.subactivities:
47 |             self.subactivity_index[s] = self.subactivities.index(s)
48 | 
49 |         for a in self.actions:
50 |             self.action_index[a] = self.actions.index(a)
51 | 
52 | 
53 |         self.ACTIVITY_NUM = len(self.activities)
54 |         self.SUBACTIVITY_NUM = len(self.subactivities)
55 |         self.ACTION_NUM = len(self.actions)


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 10/18/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | from basemeta import Metadata
10 | class VCLA_METADATA(Metadata):
11 |     def __init__(self):
12 |         super(VCLA_METADATA, self).__init__()
13 |         self.activities = [
14 |                             'c01_sweep_floor', 'c02_mop_floor', 'c03_write_on_blackboard',
15 |                             'c04_clean_blackboard', 'c05_use_elevator', 'c06_pour_liquid_from_jug',
16 |                             'c07_make_coffee', 'c08_read_book', 'c09_throw_trash',
17 |                             'c10_heat_food_with_microwave', 'c11_use_computer', 'c12_search_drawer',
18 |                             'c13_move_bottle_to_dispenser', 'c14_open_door'
19 |                         ]
20 | 
21 |         self.subactivities = [
22 |                                 'null',
23 |                                 'search', 'tear', 'read', 'throw', 'wring', 'open', 'use', 'walk', 'scrub', 'pour',
24 |                                 'write', 'sweep', 'grab', 'mop', 'close', 'push', 'stand', 'sit', 'grag', 'place', 'prior'
25 |                             ]
26 | 
27 |         self.actions = [
28 |                             'null',
29 |                             'search', 'tear', 'read', 'throw', 'wring', 'open', 'use', 'walk', 'scrub', 'pour',
30 |                             'write', 'sweep', 'grab', 'mop', 'close', 'push', 'stand', 'sit', 'grag', 'place'
31 |                         ]
32 | 
33 |         self.objects = [
34 |                             'null',
35 |                             'blackboard', 'chair', 'dispenser', 'dustpan', 'eraser', 'cup', 'drawer',
36 |                             'bucket', 'microwave', 'broom', 'button', 'handle', 'paper', 'door', 'mop',
37 |                             'jug', 'bottle', 'monitor', 'book', 'food', 'can', 'chalk'
38 |                         ]
39 | 
40 |         self.affordances = [
41 |                                 'null',
42 |                                 'usable', 'scrubber', 'searchable', 'wringable', 'scrubbable',
43 |                                 'throwable', 'sittable', 'sweepable', 'pourable', 'pourto', 'writer',
44 |                                 'writable', 'tearable', 'moppable', 'closeable', 'statuibar', 'placeable',
45 |                                 'stationary', 'readable', 'grabbable', 'openable', 'pushable'
46 |                             ]
47 | 
48 |         for a in self.activities:
49 |             self.activity_index[a] = self.activities.index(a)
50 | 
51 |         for s in self.subactivities:
52 |             self.subactivity_index[s] = self.subactivities.index(s)
53 | 
54 |         for a in self.actions:
55 |             self.action_index[a] = self.actions.index(a)
56 | 
57 |         for o in self.objects:
58 |             self.object_index[o] = self.objects.index(o)
59 | 
60 |         for u in self.affordances:
61 |             self.affordance_index[u] = self.affordances.index(u)
62 | 
63 |         self.ACTIVITY_NUM = len(self.activities)
64 |         self.SUBACTIVITY_NUM = len(self.subactivities)
65 |         self.ACTION_NUM = len(self.actions)
66 |         self.OBJECT_NUM = len(self.objects)
67 |         self.AFFORDANCE_NUM = len(self.affordances)
68 |         self.MAXIMUM_OBJ_VIDEO = 3
69 | 
70 | 


--------------------------------------------------------------------------------
/datasets/WNP/wnp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/9/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import os
10 | import pickle
11 | import numpy as np
12 | import torch
13 | import torch.utils.data
14 | from datasets.WNP.metadata import  WNP_METADATA
15 | 
16 | class WNP(torch.utils.data.Dataset):
17 |     def __init__(self, paths, mode, task='activity', subsample=None):
18 |         self.path = paths.inter_root
19 |         self.sequence_ids = list()
20 |         with open(os.path.join(self.path, 'features', 'wnp_{}.p'.format(mode)), 'rb') as f:
21 |             self.data = pickle.load(f, encoding='latin1')
22 |         for key in self.data.keys():
23 |             self.sequence_ids.append(key)
24 |         self.task = task
25 |         self.mode = mode
26 | 
27 |     def __getitem__(self, index):
28 |         sequence_id = self.sequence_ids[index]
29 |         return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \
30 |                self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \
31 |                self.data[sequence_id]['activity'], sequence_id, None
32 | 
33 |     def __len__(self):
34 |         return len(self.sequence_ids)
35 | 
36 |     @staticmethod
37 |     def collate_fn(batch):
38 |         metadata = WNP_METADATA()
39 |         features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0]
40 |         feature_size = features.shape[1]
41 |         label_num = len(metadata.subactivities)
42 | 
43 |         max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch]))
44 |         features_batch = np.zeros((max_seq_length, len(batch), feature_size))
45 |         labels_batch = np.ones((max_seq_length, len(batch))) * -1
46 |         probs_batch = np.zeros((max_seq_length, len(batch), label_num))
47 |         total_lengths = np.zeros(len(batch))
48 |         ctc_labels = list()
49 |         ctc_lengths = list()
50 |         activities = list()
51 |         sequence_ids = list()
52 | 
53 |         for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch):
54 |             features_batch[:total_length, batch_i, :] = np.nan_to_num(features)
55 |             labels_batch[:total_length, batch_i] = labels
56 |             for frame in range(features.shape[0]):
57 |                 probs_batch[frame, batch_i, int(labels[frame])] = 1.0
58 | 
59 |             merged_labels = list()
60 |             current_label = -1
61 |             for label in labels:
62 |                 if label != current_label:
63 |                     current_label = label
64 |                     merged_labels.append(current_label)
65 |             ctc_labels.append(merged_labels)
66 |             ctc_lengths.append(len(merged_labels))
67 |             total_lengths[batch_i] = total_length
68 |             activities.append(activity)
69 |             sequence_ids.append(sequence_id)
70 | 
71 |         features_batch = torch.FloatTensor(features_batch)
72 |         labels_batch = torch.LongTensor(labels_batch)
73 |         probs_batch = torch.FloatTensor(probs_batch)
74 |         total_lengths = torch.IntTensor(total_lengths)
75 |         ctc_lengths = torch.IntTensor(ctc_lengths)
76 | 
77 |         # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional_label
78 |         return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None


--------------------------------------------------------------------------------
/models/parsegraph.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 11/27/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | class SParseGraph(object):
 11 |     def __init__(self, start_frame, end_frame, subactivity=None, action=None, objects=list(), affordance_labels=list()):
 12 |         self._start_frame = start_frame
 13 |         self._end_frame = end_frame
 14 |         self._action = action
 15 |         self._subactivity = subactivity
 16 |         self._objects = objects
 17 |         self._affordance_labels = affordance_labels
 18 | 
 19 |         self._skeletons = None
 20 |         self._obj_positions = list()
 21 | 
 22 |     def __str__(self):
 23 |         return '{}-{} {} {} {}'.format(self._start_frame, self._end_frame, self._subactivity,
 24 |                                        self._objects, self._affordance_labels)
 25 | 
 26 |     def __repr__(self):
 27 |         return self.__str__()
 28 | 
 29 |     @property
 30 |     def id(self):
 31 |         return self._subactivity
 32 | 
 33 |     @property
 34 |     def subactivity(self):
 35 |         return self._subactivity
 36 | 
 37 |     @property
 38 |     def action(self):
 39 |         return self._action
 40 | 
 41 |     @property
 42 |     def start_frame(self):
 43 |         return self._start_frame
 44 | 
 45 |     @property
 46 |     def end_frame(self):
 47 |         return self._end_frame
 48 | 
 49 |     @property
 50 |     def objects(self):
 51 |         return self._objects
 52 | 
 53 |     @property
 54 |     def affordance(self):
 55 |         return self._affordance_labels
 56 | 
 57 |     @property
 58 |     def skeletons(self):
 59 |         return self._skeletons
 60 | 
 61 |     @property
 62 |     def obj_positions(self):
 63 |         return self._obj_positions
 64 | 
 65 |     def set_skeletons(self, skeletons):
 66 |         assert self._end_frame - self._start_frame + 1 == skeletons.shape[0]
 67 |         self._skeletons = skeletons
 68 | 
 69 |     def set_obj_positions(self, obj_positions):
 70 |         for obj in obj_positions:
 71 |             self._obj_positions.append(obj[self._start_frame : self._end_frame + 1])
 72 | 
 73 |     @subactivity.setter
 74 |     def subactivity(self, value):
 75 |         self._subactivity = value
 76 | 
 77 |     @end_frame.setter
 78 |     def end_frame(self, value):
 79 |         self._end_frame = value
 80 | 
 81 | 
 82 | class TParseGraph(object):
 83 |     def __init__(self, activity=None, sequence_id=None, subject=None):
 84 |         self._activity = activity
 85 |         self._sequence_id = sequence_id
 86 |         self._subject = subject
 87 |         self._terminals = list()
 88 | 
 89 |     def __str__(self):
 90 |         sequence = '{} * '.format(self._sequence_id)
 91 |         for t in self._terminals:
 92 |             sequence += t.id + ' '
 93 |         sequence += '#'
 94 |         return sequence
 95 | 
 96 |     def __repr__(self):
 97 |         return self.__str__()
 98 | 
 99 |     @property
100 |     def activity(self):
101 |         return self._activity
102 | 
103 |     @property
104 |     def id(self):
105 |         return self._sequence_id
106 | 
107 |     @property
108 |     def subject(self):
109 |         return self._subject
110 | 
111 |     @property
112 |     def terminals(self):
113 |         return self._terminals
114 | 
115 |     @property
116 |     def length(self):
117 |         if len(self._terminals) == 0:
118 |             return 0
119 |         else:
120 |             return  self._terminals[-1].end_frame - self._terminals[0].start_frame + 1
121 | 
122 |     @property
123 |     def start_frame(self):
124 |         assert(len(self._terminals) > 0, 'No spg added')
125 |         return self._terminals[0].start_frame
126 | 
127 |     @activity.setter
128 |     def activity(self, value):
129 |         self._activity = value
130 | 
131 |     def append_terminal(self, spg):
132 |         self._terminals.append(spg)
133 | 


--------------------------------------------------------------------------------
/experiments/exp_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/9/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import torch
10 | 
11 | import datasets.VCLA_GAZE.vcla_gaze_config as vcla_gaze_config
12 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
13 | import datasets.VCLA_GAZE.vcla_gaze as vcla_gaze
14 | 
15 | import datasets.CAD.cad_config as cad_config
16 | from datasets.CAD.metadata import CAD_METADATA
17 | import datasets.CAD.cad as cad
18 | 
19 | import datasets.WNP.wnp_config as wnp_config
20 | from datasets.WNP.metadata import WNP_METADATA
21 | import datasets.WNP.wnp as wnp
22 | 
23 | import datasets.Breakfast.breakfast_config as breakfast_config
24 | from datasets.Breakfast.metadata import BREAKFAST_METADATA
25 | import datasets.Breakfast.breakfast as breakfast
26 | 
27 | class Experiment(object):
28 |     def __init__(self, dataset='VCLA_GAZE'):
29 |         self.paths_dict = {
30 |                                 'WNP': wnp_config.Paths(),
31 |                                 'VCLA_GAZE': vcla_gaze_config.Paths(),
32 |                                 'CAD': cad_config.Paths(),
33 |                                 'Breakfast': breakfast_config.Paths()
34 |                             }
35 |         self.metadata_dict = {
36 |                                 'WNP': WNP_METADATA(),
37 |                                 'VCLA_GAZE': VCLA_METADATA(),
38 |                                 'CAD': CAD_METADATA(),
39 |                                 'Breakfast': BREAKFAST_METADATA()
40 |                             }
41 |         self.dataset_dict = {
42 |                                 'WNP': lambda path, mode, task, subsample: wnp.WNP(path, mode, task, subsample),
43 |                                 'VCLA_GAZE': lambda path, mode, task, subsample: vcla_gaze.VCLA_GAZE(path, mode, task, subsample),
44 |                                 'CAD': lambda path, mode, task, subsample: cad.CAD(path, mode, task, subsample),
45 |                                 'Breakfast': lambda path, mode, task, subsample: breakfast.Breakfast(path, mode, task, subsample)
46 |                             }
47 |         self.dataset = self.dataset_dict[dataset]
48 |         self.paths = self.paths_dict[dataset]
49 |         self.metadata = self.metadata_dict[dataset]
50 | 
51 |     def get_dataset(self, args, save=False):
52 |         all_set = None
53 |         train_set = self.dataset(args.paths, 'train', args.task, args.subsample)
54 |         val_set = self.dataset(args.paths, 'val', args.task, args.subsample)
55 |         test_set = self.dataset(args.paths, 'test', args.task, args.subsample)
56 |         if save:
57 |             all_set = self.dataset(args.paths, 'all', args.task, args.subsample)
58 |         all_loader = None
59 |         train_loader = torch.utils.data.DataLoader(train_set, collate_fn=train_set.collate_fn,
60 |                                                    batch_size=args.batch_size, num_workers=args.workers,
61 |                                                    pin_memory=True, shuffle=True)
62 |         val_loader = torch.utils.data.DataLoader(val_set, collate_fn=train_set.collate_fn,
63 |                                                  batch_size=args.batch_size, num_workers=args.workers, pin_memory=True)
64 |         test_loader = torch.utils.data.DataLoader(test_set, collate_fn=train_set.collate_fn,
65 |                                                   batch_size=args.batch_size, num_workers=args.workers, pin_memory=True)
66 |         if save:
67 |             all_loader = torch.utils.data.DataLoader(all_set, collate_fn=train_set.collate_fn,
68 |                                                  batch_size=args.batch_size, num_workers=args.workers, pin_memory=True)
69 |         features, labels, seg_lengths, total_length, activity, sequence_id, additional = train_set[0]
70 |         feature_size = features[0].shape[-1]
71 |         return feature_size, train_loader, val_loader, test_loader, all_loader
72 | 
73 |     def get_label_num(self, args):
74 |         if args.task == 'affordance':
75 |             return self.metadata.AFFORDANCE_NUM
76 |         else:
77 |             return self.metadata.ACTION_NUM


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/vcla_gaze_prior.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/2/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description: Prior calculation for VCLA_GAZE dataset
 7 |                 Need to first generate the activity_corpus.p using dataparser.py
 8 | 
 9 | """
10 | 
11 | import os
12 | import sys
13 | import pickle
14 | import json
15 | 
16 | import numpy as np
17 | import scipy.stats
18 | 
19 | import datasets.VCLA_GAZE.vcla_gaze_config as config
20 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
21 | metadata = VCLA_METADATA()
22 | 
23 | def learn_prior(paths):
24 |     def normalize_prob(cpt):
25 |         for s in range(cpt.shape[0]):
26 |             cpt[s, :] = cpt[s, :]/np.sum(cpt[s, :])
27 | 
28 |         return cpt
29 | 
30 |     if not os.path.exists(os.path.join(paths.tmp_root, 'activity_corpus.p')):
31 |         sys.exit('Ground truth pickle file not found.')
32 |     with open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb') as f:
33 |         activity_corpus = pickle.load(f)
34 | 
35 |     action_cpt = np.ones((len(metadata.subactivities), len(metadata.actions))) * 0.3
36 |     affordance_cpt = np.ones((len(metadata.subactivities), len(metadata.affordances))) * 0.1
37 |     object_cpt = np.ones((len(metadata.subactivities), len(metadata.objects))) * 0.0001
38 |     object_affordance_cpt = np.ones((len(metadata.objects), len(metadata.affordances))) * 0.0001
39 |     duration_dict = dict()
40 | 
41 |     for s in metadata.subactivities:
42 |         duration_dict[s] = list()
43 | 
44 |     for activity in activity_corpus:
45 |         for tpg in activity_corpus[activity]:
46 |             for t in tpg.terminals:
47 |                 s = t.subactivity
48 |                 duration_dict[s].append(t.end_frame - t.start_frame)
49 |                 duration_dict['prior'].append(t.end_frame - t.start_frame)
50 | 
51 |                 a = t.subactivity
52 |                 action_cpt[metadata.subactivity_index[s], metadata.action_index[a]] += 1
53 |                 action_cpt[metadata.subactivity_index['prior'], metadata.action_index[a]] += 1
54 |                 for u in t.affordance:
55 |                     affordance_cpt[metadata.subactivity_index[s], metadata.affordance_index[u]] += 1
56 |                     affordance_cpt[metadata.subactivity_index['prior'], metadata.affordance_index[u]] += 1
57 |                 for io, o in enumerate(t.objects):
58 |                     object_cpt[metadata.subactivity_index[s], metadata.object_index[o]] += 1
59 |                     object_cpt[metadata.subactivity_index['prior'], metadata.object_index[o]] += 1
60 |                     object_affordance_cpt[metadata.object_index[o], metadata.affordance_index[t.affordance[io]]] += 1
61 | 
62 |     object_affordance_cpt[:, -1] = 0
63 |     object_affordance_cpt[:, -1] = np.max(object_affordance_cpt, axis=1)
64 | 
65 |     action_cpt = normalize_prob(action_cpt)
66 |     affordance_cpt = normalize_prob(affordance_cpt)
67 |     object_cpt = normalize_prob(object_cpt)
68 |     object_affordance_cpt = normalize_prob(object_affordance_cpt)
69 |     with open(os.path.join(paths.prior_root, 'action_cpt.json'), 'w') as output_file:
70 |         json.dump(action_cpt.tolist(), output_file, indent=4, separators=(',', ': '))
71 |     with open(os.path.join(paths.prior_root, 'affordance_cpt.json'), 'w') as output_file:
72 |         json.dump(affordance_cpt.tolist(), output_file, indent=4, separators=(',', ': '))
73 |     with open(os.path.join(paths.prior_root, 'object_cpt.json'), 'w') as output_file:
74 |         json.dump(object_cpt.tolist(), output_file, indent=4, separators=(',', ': '))
75 |     with open(os.path.join(paths.prior_root, 'object_affordance_cpt.json'), 'w') as output_file:
76 |         json.dump(object_affordance_cpt.tolist(), output_file, indent=4, separators=(',', ': '))
77 | 
78 |     duration_prior = dict()
79 |     for s, durations in duration_dict.items():
80 |         mu, std = scipy.stats.norm.fit(durations)
81 |         duration_prior[s] = [mu, std]
82 | 
83 |     with open(os.path.join(paths.prior_root, 'duration_prior.json'), 'w') as output_file:
84 |         json.dump(duration_prior, output_file, indent=4, separators=(',', ': '))
85 | 
86 | 
87 | def main():
88 |     paths = config.Paths()
89 |     learn_prior(paths)
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     main()
94 | 


--------------------------------------------------------------------------------
/datasets/Breakfast/breakfast.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 4/20/19
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import os
10 | import pickle
11 | import numpy as np
12 | import torch
13 | import torch.utils.data
14 | from datasets.Breakfast.metadata import  BREAKFAST_METADATA
15 | 
16 | class Breakfast(torch.utils.data.Dataset):
17 |     def __init__(self, paths, mode, task='activity', subsample=None):
18 |         self.path = paths.inter_root
19 |         self.sequence_ids = list()
20 |         if subsample != 1:
21 |             with open(os.path.join(self.path, 'features', 'breakfast_{}_0_{}.p'.format(mode, subsample)), 'rb') as f:
22 |                 self.data = pickle.load(f, encoding='latin1aa')
23 |         else:
24 |             with open(os.path.join(self.path, 'features', 'breakfast_{}_0.p'.format(mode)), 'rb') as f:
25 |                 self.data = pickle.load(f, encoding='latin1')
26 |         for key in self.data.keys():
27 |             self.sequence_ids.append(key)
28 |         self.task = task
29 |         self.mode = mode
30 | 
31 |     def __getitem__(self, index):
32 |         sequence_id = self.sequence_ids[index]
33 |         return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \
34 |                self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \
35 |                self.data[sequence_id]['activity'], sequence_id, self.data[sequence_id]['all_labels']
36 | 
37 |     def __len__(self):
38 |         return len(self.sequence_ids)
39 | 
40 |     @staticmethod
41 |     def collate_fn(batch):
42 |         metadata = BREAKFAST_METADATA()
43 |         features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0]
44 |         feature_size = features.shape[1]
45 |         label_num = len(metadata.subactivities)
46 | 
47 |         max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch]))
48 |         features_batch = np.zeros((max_seq_length, len(batch), feature_size))
49 |         labels_batch = np.ones((max_seq_length, len(batch))) * -1
50 |         max_all_seq_length = np.max(np.array([len(additional) for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch]))
51 |         all_labels_batch = np.ones((max_all_seq_length, len(batch))) * -1
52 |         probs_batch = np.zeros((max_seq_length, len(batch), label_num))
53 |         total_lengths = np.zeros(len(batch))
54 |         ctc_labels = list()
55 |         ctc_lengths = list()
56 |         activities = list()
57 |         sequence_ids = list()
58 |         all_total_lengths = np.zeros(len(batch))
59 | 
60 |         for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch):
61 |             features_batch[:total_length, batch_i, :] = np.nan_to_num(features)
62 |             labels_batch[:total_length, batch_i] = labels
63 |             all_labels_batch[:len(additional), batch_i] = additional
64 |             all_total_lengths[batch_i] = len(additional)
65 |             for frame in range(features.shape[0]):
66 |                 probs_batch[frame, batch_i, int(labels[frame])] = 1.0
67 | 
68 |             merged_labels = list()
69 |             current_label = -1
70 |             for label in labels:
71 |                 if label != current_label:
72 |                     current_label = label
73 |                     merged_labels.append(current_label)
74 |             ctc_labels.append(merged_labels)
75 |             ctc_lengths.append(len(merged_labels))
76 |             total_lengths[batch_i] = total_length
77 |             activities.append(activity)
78 |             sequence_ids.append(sequence_id)
79 | 
80 |         features_batch = torch.FloatTensor(features_batch)
81 |         labels_batch = torch.LongTensor(labels_batch)
82 |         probs_batch = torch.FloatTensor(probs_batch)
83 |         total_lengths = torch.IntTensor(total_lengths)
84 |         ctc_lengths = torch.IntTensor(ctc_lengths)
85 |         all_labels_batch = torch.LongTensor(all_labels_batch)
86 |         all_total_lengths = torch.IntTensor(all_total_lengths)
87 | 
88 |         # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional
89 |         return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, (all_labels_batch, all_total_lengths)


--------------------------------------------------------------------------------
/experiments/STAOG/prob_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on 12/9/18
 3 | 
 4 | @author: Baoxiong Jia
 5 | 
 6 | Description:
 7 | 
 8 | """
 9 | import os
10 | import numpy as np
11 | import json
12 | 
13 | class Prob_Utils(object):
14 | 
15 |     @staticmethod
16 |     def get_likelihood_sum(action_log_likelihood, object_log_likelihood=None, affordance_log_likelihood=None, affordance=True):
17 |         '''
18 |         Precompute the sum of log probabilities in interval [i, j]
19 |         :param action_log_likelihood: action log likelihood
20 |         :param object_log_likelihood: object log likelihood
21 |         :param affordance_log_likelihood: affordance log likelihood
22 |         :return:
23 |                 action_log_likelihood_sum: action_label_num x frames x frames
24 |                 object_log_likelihood_sum: object_bbox_num x object_label_num x frames x frames
25 |                 affordance_log_likelihood_sum: object_bbox_num x affordance_label_num x frames x frames
26 |         '''
27 |         action_log_likelihood_sum = np.zeros(
28 |             action_log_likelihood.shape + (action_log_likelihood.shape[-1],))
29 |         for a in range(action_log_likelihood.shape[0]):
30 |             for i in range(action_log_likelihood.shape[1]):
31 |                 action_log_likelihood_sum[a, i, i] = action_log_likelihood[a, i]
32 |         for a in range(action_log_likelihood.shape[0]):
33 |             for i in range(action_log_likelihood.shape[1]):
34 |                 for j in range(i + 1, action_log_likelihood.shape[1]):
35 |                     action_log_likelihood_sum[a, i, j] = action_log_likelihood_sum[a, i, j - 1] + \
36 |                                                          action_log_likelihood[a, j]
37 | 
38 |         object_log_likelihood_sum = None
39 |         affordance_log_likelihood_sum = None
40 | 
41 |         if affordance:
42 |             object_log_likelihood_sum = np.zeros(object_log_likelihood.shape + (object_log_likelihood.shape[-1],))
43 |             for b in range(object_log_likelihood.shape[0]):
44 |                 for o in range(object_log_likelihood.shape[1]):
45 |                     for i in range(object_log_likelihood.shape[2]):
46 |                         object_log_likelihood_sum[b, o, i, i] = object_log_likelihood[b, o, i]
47 |             for b in range(object_log_likelihood.shape[0]):
48 |                 for o in range(object_log_likelihood.shape[1]):
49 |                     for i in range(object_log_likelihood.shape[2]):
50 |                         for j in range(i + 1, object_log_likelihood.shape[2]):
51 |                             object_log_likelihood_sum[b, o, i, j] = object_log_likelihood_sum[b, o, i, j - 1] + \
52 |                                                                     object_log_likelihood[b, o, j]
53 | 
54 |             affordance_log_likelihood_sum = np.zeros(
55 |                 affordance_log_likelihood.shape + (affordance_log_likelihood.shape[-1],))
56 |             for b in range(affordance_log_likelihood.shape[0]):
57 |                 for a in range(affordance_log_likelihood.shape[1]):
58 |                     for i in range(affordance_log_likelihood.shape[2]):
59 |                         affordance_log_likelihood_sum[b, a, i, i] = affordance_log_likelihood[b, a, i]
60 |             for b in range(affordance_log_likelihood.shape[0]):
61 |                 for a in range(affordance_log_likelihood.shape[1]):
62 |                     for i in range(affordance_log_likelihood.shape[2]):
63 |                         for j in range(i + 1, affordance_log_likelihood.shape[2]):
64 |                             affordance_log_likelihood_sum[b, a, i, j] = affordance_log_likelihood_sum[b, a, i, j - 1] + \
65 |                                                                         affordance_log_likelihood[b, a, j]
66 | 
67 |         return action_log_likelihood_sum, object_log_likelihood_sum, affordance_log_likelihood_sum
68 | 
69 |     @staticmethod
70 |     def combine_cpt(action_log_cpt, object_log_cpt, affordance_log_cpt, affordance=True):
71 |         '''
72 |         Combine action prior, object prior and affordance prior using the log probability
73 |         :param action_log_cpt: action log probability, indexed by (subactivity, action)
74 |         :param object_log_cpt: object log probability, indexed by (subactivity, object)
75 |         :param affordance_log_cpt: affordance log probability, indexed by (subactivity, affordance)
76 |         :return: combined log probability, indexed by (subactivity, action, object, affordance)
77 |         '''
78 |         if affordance:
79 |             combined_log_cpt = np.zeros((action_log_cpt.shape[0], action_log_cpt.shape[1],
80 |                                      object_log_cpt.shape[1], affordance_log_cpt.shape[1]))
81 |         else:
82 |             combined_log_cpt = np.zeros((action_log_cpt.shape[0], action_log_cpt.shape[1]))
83 |         for s in range(combined_log_cpt.shape[0]):
84 |             for a in range(action_log_cpt.shape[1]):
85 |                 if affordance:
86 |                     for o in range(object_log_cpt.shape[1]):
87 |                         for u in range(affordance_log_cpt.shape[1]):
88 |                             combined_log_cpt[s, a, o, u] = action_log_cpt[s, a] + object_log_cpt[s, o] + affordance_log_cpt[s, u]
89 |                 else:
90 |                     combined_log_cpt[s, a] = action_log_cpt[s, a]
91 |         return combined_log_cpt


--------------------------------------------------------------------------------
/utils/logutils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 10/30/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | import os
 10 | import shutil
 11 | import torch
 12 | import numpy as np
 13 | import sklearn.metrics
 14 | 
 15 | rgb_width = 1920
 16 | rgb_height = 1280
 17 | depth_width = 512
 18 | depth_height = 424
 19 | 
 20 | 
 21 | class AverageMeter(object):
 22 |     """Computes and stores the average and current value"""
 23 | 
 24 |     def __init__(self):
 25 |         self.reset()
 26 | 
 27 |     def reset(self):
 28 |         self.val = 0
 29 |         self.avg = 0
 30 |         self.sum = 0
 31 |         self.count = 0
 32 | 
 33 |     def update(self, val, n=1):
 34 |         self.val = val
 35 |         self.sum += val * n
 36 |         self.count += n
 37 |         self.avg = self.sum / self.count
 38 | 
 39 | 
 40 | class MultiAverageMeter(AverageMeter):
 41 |     def __init__(self):
 42 |         super(MultiAverageMeter, self).__init__()
 43 |         self.reset()
 44 | 
 45 |     def reset(self):
 46 |         self.vals = {}
 47 |         self.avgs = {}
 48 |         self.sums = {}
 49 |         self.counts = {}
 50 |         self.val = 0
 51 |         self.avg = 0
 52 | 
 53 |     # Return avg precision for affordance that is not null
 54 |     def update(self, key, val, n=1):
 55 |         if key not in self.vals.keys():
 56 |             self.vals[key] = 0
 57 |             self.avgs[key] = 0
 58 |             self.sums[key] = 0
 59 |             self.counts[key] = 0
 60 |         self.vals[key] = val
 61 |         self.sums[key] += val * n
 62 |         self.counts[key] += n
 63 |         self.avgs[key] = self.sums[key] / self.counts[key]
 64 | 
 65 |         val = 0
 66 |         avg = 0
 67 |         count = 0
 68 |         for key in self.vals:
 69 |             if key is not 'null':
 70 |                 val += self.vals[key]
 71 |                 avg += self.avgs[key]
 72 |                 count += 1
 73 |         if count != 0:
 74 |             self.val = val / count
 75 |             self.avg = avg / count
 76 |         else:
 77 |             self.val = -1
 78 |             self.avg = -1
 79 | 
 80 | class Logger(object):
 81 |     """record useful logging varaibles for training and validation"""
 82 |     def __init__(self):
 83 |         self.batch_time = AverageMeter()
 84 |         self.data_time = AverageMeter()
 85 |         self.losses = AverageMeter()
 86 |         self.multi_losses = MultiAverageMeter()
 87 |         self.top1 = MultiAverageMeter()
 88 | 
 89 | def compute_accuracy(gt_results, results, labels='all', metric='micro'):
 90 |     if labels == 'all':
 91 |         labels_list = list(set(gt_results + results))
 92 |     else:
 93 |         labels_list = list(set(gt_results + results))
 94 |         labels_list.remove(0)
 95 |     results = sklearn.metrics.precision_recall_fscore_support(gt_results, results, labels=labels_list, average=metric)
 96 |     if metric == 'micro':
 97 |         return results[0]
 98 |     else:
 99 |         return results[0], results[1], results[2]
100 | 
101 | def save_checkpoint(state_dict, is_best, args, filename='checkpoint.pth'):
102 |     if not os.path.exists(args.resume):
103 |         os.makedirs(args.resume)
104 |     torch.save(state_dict, os.path.join(args.resume, filename))
105 |     if is_best:
106 |         shutil.copyfile(os.path.join(args.resume, filename), os.path.join(args.resume, 'model_best.pth'))
107 | 
108 | def save_checkpoint_epoch(state_dict, epoch, args):
109 |     if not os.path.exists(args.resume):
110 |         os.makedirs(args.resume)
111 |     torch.save(state_dict, os.path.join(args.resume, 'checkpoint_{}.pth'.format(epoch)))
112 | 
113 | def load_checkpoint_epoch(args, model, epoch, optimizer=None, scheduler=None):
114 |     file_name = os.path.join(args.resume, 'checkpoint_{}.pth'.format(epoch))
115 |     print('Loading {}: {}'.format(file_name, os.path.isfile(file_name)))
116 |     if os.path.isfile(file_name):
117 |         checkpoint = torch.load(file_name)
118 |         print('Best precision:{}'.format(checkpoint['best_prec']))
119 |         args.start_epoch = checkpoint['epoch']
120 |         model.load_state_dict(checkpoint['state_dict'])
121 |         if optimizer != None:
122 |             optimizer.load_state_dict(checkpoint['optimizer'])
123 |         if scheduler != None:
124 |             scheduler.load_state_dict(checkpoint['scheduler'])
125 |         print('finished loading')
126 | 
127 | 
128 | def load_checkpoint(args, model, optimizer=None, scheduler=None):
129 |     print('Loading {}: {}'.format(os.path.join(args.resume, 'model_best.pth'), os.path.isfile(os.path.join(args.resume, 'model_best.pth'))))
130 |     if os.path.isfile(os.path.join(args.resume, 'model_best.pth')):
131 |         checkpoint = torch.load(os.path.join(args.resume, 'model_best.pth'))
132 |         print('Best precision:{}'.format(checkpoint['best_prec']))
133 |         args.start_epoch = checkpoint['epoch']
134 |         model.load_state_dict(checkpoint['state_dict'])
135 |         if optimizer != None:
136 |             optimizer.load_state_dict(checkpoint['optimizer'])
137 |         if scheduler != None:
138 |             scheduler.load_state_dict(checkpoint['scheduler'])
139 |         print('finished loading')
140 | 
141 | 
142 | 
143 | # TODO: Fix transform in both 3d and 2d
144 | def transform(skeleton, mean_skeleton, dims, anchor_points=[5, 9, 1]):
145 |     aligned_skeleton = skeleton
146 |     return aligned_skeleton


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/vcla_gaze.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 12/3/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | import numpy as np
 10 | import os
 11 | import torch
 12 | import torch.utils.data
 13 | import pickle
 14 | import datasets.VCLA_GAZE.vcla_gaze_config as config
 15 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
 16 | 
 17 | class VCLA_GAZE(torch.utils.data.Dataset):
 18 |     def __init__(self, paths, mode, task, subsample=None):
 19 |         self.path = paths.inter_root
 20 |         with open(os.path.join(self.path, 'features', 'vcla_gaze_{}.p'.format(mode)), 'rb') as f:
 21 |             self.data = pickle.load(f)
 22 |         self.sequence_ids = list()
 23 |         for key in self.data.keys():
 24 |             self.sequence_ids.append(key)
 25 |         self.task = task
 26 |         self.mode = mode
 27 | 
 28 |     def __getitem__(self, index):
 29 |         sequence_id = self.sequence_ids[index]
 30 |         if self.task == 'affordance':
 31 |             return self.data[sequence_id]['u_features'], self.data[sequence_id]['u_labels'],\
 32 |                    self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \
 33 |                    self.data[sequence_id]['activity'], sequence_id, None
 34 |         else:
 35 |             return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \
 36 |                    self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'],\
 37 |                    self.data[sequence_id]['activity'], sequence_id, None
 38 | 
 39 |     def __len__(self):
 40 |         return len(self.sequence_ids)
 41 | 
 42 |     @staticmethod
 43 |     def collate_fn(batch):
 44 |         metadata = VCLA_METADATA()
 45 |         affordance = False
 46 |         features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0]
 47 |         feature_dim = list(features.shape)
 48 |         if len(feature_dim) > 2:
 49 |             affordance = True
 50 |         max_seq_length = np.max(
 51 |             np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch]))
 52 |         feature_dim[0] = max_seq_length
 53 |         feature_dim.insert(1, len(batch))  # max_length * batch * (obj_num) * feature_size
 54 |         obj_nums = np.zeros(len(batch))
 55 |         if affordance:
 56 |             max_obj_num = metadata.MAXIMUM_OBJ_VIDEO
 57 |             feature_dim[-2] = max_obj_num
 58 |             total_lengths = np.zeros(len(batch) * max_obj_num)
 59 |         else:
 60 |             total_lengths = np.zeros(len(batch))
 61 |         features_batch = np.zeros(feature_dim)
 62 |         labels_batch = np.zeros(feature_dim[: -1])
 63 |         probs_batch = np.zeros(feature_dim[: 2] + [len(metadata.subactivities)])
 64 | 
 65 |         activities = list()
 66 |         sequence_ids = list()
 67 |         ctc_labels = list()
 68 |         ctc_lengths = list()
 69 |         for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch):
 70 |             for frame in range(features.shape[0]):
 71 |                 probs_batch[frame, batch_i, int(labels[frame])] = 1.0
 72 |             merged_labels = list()
 73 |             current_label = -1
 74 |             for label in labels:
 75 |                 if label != current_label:
 76 |                     current_label = label
 77 |                     merged_labels.append(current_label)
 78 |             ctc_labels.append(merged_labels)
 79 |             ctc_lengths.append(len(merged_labels))
 80 | 
 81 |             if affordance:
 82 |                 obj_num = labels.shape[1]
 83 |                 features_batch[:total_length, batch_i, :obj_num, :] = np.nan_to_num(features)
 84 |                 labels_batch[:total_length, batch_i, :obj_num] = labels
 85 |                 for rel_idx in range(3):
 86 |                     total_lengths[batch_i * 3 + rel_idx] = total_length
 87 |                 obj_nums[batch_i] = obj_num
 88 |             else:
 89 |                 features_batch[:total_length, batch_i, :] = np.nan_to_num(features)
 90 |                 labels_batch[:total_length, batch_i] = labels
 91 |                 total_lengths[batch_i] = total_length
 92 |             activities.append(activity)
 93 |             sequence_ids.append(sequence_id)
 94 | 
 95 |         features_batch = torch.FloatTensor(features_batch)
 96 |         labels_batch = torch.LongTensor(labels_batch)
 97 |         total_lengths = torch.IntTensor(total_lengths)
 98 |         obj_nums = torch.IntTensor(obj_nums)
 99 |         ctc_lengths = torch.IntTensor(ctc_lengths)
100 | 
101 |         # Feature_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, all_labels
102 |         return features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, None, None
103 | 
104 | 
105 | def main():
106 |     paths = config.Paths()
107 |     dataset = VCLA_GAZE(paths, 'train', 'affordance')
108 |     data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True,
109 |                                                num_workers=1, pin_memory=True)
110 |     features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = dataset[0]
111 | 
112 |     print('Finished')
113 | 
114 | if __name__ == '__main__':
115 |     main()
116 | 


--------------------------------------------------------------------------------
/datasets/CAD/finetune/parse_features.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Mar 13, 2017
  3 | 
  4 | @author: Siyuan Qi
  5 | 
  6 | Description of the file.
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import time
 12 | import json
 13 | import pickle
 14 | 
 15 | import datasets.CAD.cad_config as config
 16 | 
 17 | 
 18 | def parse_colon_seperated_features(colon_seperated):
 19 |     f_list = [int(x.split(':')[1]) for x in colon_seperated]
 20 |     return f_list
 21 | 
 22 | 
 23 | def read_features(filename):
 24 |     data = dict()
 25 |     with open(filename) as f:
 26 |         first_line = f.readline().strip()
 27 |         object_num = int(first_line.split(' ')[0])
 28 |         object_object_num = int(first_line.split(' ')[1])
 29 |         skeleton_object_num = int(first_line.split(' ')[2])
 30 | 
 31 |         # Object feature
 32 |         o_aff = []
 33 |         o_id = []
 34 |         o_fea = []
 35 |         for _ in range(object_num):
 36 |             line = f.readline()
 37 |             colon_seperated = [x.strip() for x in line.strip().split(' ')]
 38 |             o_aff.append(int(colon_seperated[0]))
 39 |             o_id.append(int(colon_seperated[1]))
 40 |             object_feature = parse_colon_seperated_features(colon_seperated[2:])
 41 |             assert len(object_feature) == 180
 42 |             o_fea.append(object_feature)
 43 |         data['o_aff'] = o_aff
 44 |         data['o_id'] = o_id
 45 |         data['o_fea'] = o_fea
 46 | 
 47 |         # Skeleton feature
 48 |         line = f.readline()
 49 |         colon_seperated = [x.strip() for x in line.strip().split(' ')]
 50 |         data['h_act'] = int(colon_seperated[0])
 51 |         skeleton_feature = parse_colon_seperated_features(colon_seperated[2:])
 52 |         assert len(skeleton_feature) == 630
 53 |         data['h_fea'] = skeleton_feature
 54 | 
 55 |         o_o_id = []
 56 |         o_o_fea = []
 57 |         # Object-object feature
 58 |         for _ in range(object_object_num):
 59 |             line = f.readline()
 60 |             colon_seperated = [x.strip() for x in line.strip().split(' ')]
 61 |             o_o_id.append([int(colon_seperated[2]), int(colon_seperated[3])])
 62 |             object_object_feature = parse_colon_seperated_features(colon_seperated[4:])
 63 |             assert len(object_object_feature) == 200
 64 |             o_o_fea.append(object_object_feature)
 65 |         data['o_o_id'] = o_o_id
 66 |         data['o_o_fea'] = o_o_fea
 67 | 
 68 |         s_o_id = []
 69 |         s_o_fea = []
 70 |         # Skeleton-object feature
 71 |         for _ in range(skeleton_object_num):
 72 |             line = f.readline()
 73 |             colon_seperated = [x.strip() for x in line.strip().split(' ')]
 74 |             s_o_id.append(int(colon_seperated[2]))
 75 |             skeleton_object_feature = parse_colon_seperated_features(colon_seperated[3:])
 76 |             assert len(skeleton_object_feature) == 400
 77 |             s_o_fea.append(skeleton_object_feature)
 78 |         data['s_o_id'] = s_o_id
 79 |         data['s_o_fea'] = s_o_fea
 80 | 
 81 |         for o_id, s_o_id in zip(data['o_id'] , data['s_o_id']):
 82 |             assert o_id == s_o_id
 83 |     return data
 84 | 
 85 | 
 86 | def collect_data(paths):
 87 |     segments_files_path = os.path.join(paths.data_root, 'features_cad120_ground_truth_segmentation', 'segments_svm_format')
 88 |     segments_feature_path = os.path.join(paths.data_root, 'features_cad120_ground_truth_segmentation', 'features_binary_svm_format')
 89 | 
 90 |     activity_corpus = pickle.load(open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb'))
 91 | 
 92 |     subject5_sequences = list()
 93 |     segment_count_dict = dict()
 94 |     for activity, tpgs in activity_corpus.items():
 95 |         for tpg in tpgs:
 96 |             segment_count_dict[tpg.id] = len(tpg.terminals)
 97 |             if tpg.subject == 'Subject5':
 98 |                 subject5_sequences.append(tpg.id)
 99 | 
100 |     data = dict()
101 |     for sequence_path_file in os.listdir(segments_files_path):
102 |         sequence_id = os.path.splitext(sequence_path_file)[0]
103 |         data[sequence_id] = list()
104 |         if sequence_id not in segment_count_dict:
105 |             continue
106 | 
107 |         with open(os.path.join(segments_files_path, sequence_path_file)) as f:
108 |             first_line = f.readline()
109 |             segment_feature_num = int(first_line.split(' ')[0])
110 |             # if sequence_id in subject5_sequences:
111 |             #     print sequence_id, segment_count_dict[sequence_id], segment_feature_num
112 |             #     assert segment_count_dict[sequence_id] == segment_feature_num
113 | 
114 |             last_oid = None
115 |             for _ in range(segment_feature_num):
116 |                 segment_feature_filename = f.readline().strip()
117 |                 segment_data = read_features(os.path.join(segments_feature_path, os.path.basename(segment_feature_filename)))
118 |                 data[sequence_id].append(segment_data)
119 |                 if last_oid:
120 |                     for o_id, s_o_id in zip(last_oid, segment_data['o_id']):
121 |                         assert o_id == s_o_id
122 |                 last_oid = segment_data['o_id']
123 | 
124 |     with open(os.path.join(paths.tmp_root, 'features.p'), 'wb') as f:
125 |         pickle.dump(data, f)
126 |     with open(os.path.join(paths.tmp_root, 'features.json'), 'w') as f:
127 |         json.dump(data, f, indent=4, separators=(',', ': '))
128 |     with open(os.path.join(paths.tmp_root, 'video_list.p'), 'wb') as f:
129 |         pickle.dump(list(data.keys()), f)
130 | 
131 | 
132 | def main():
133 |     paths = config.Paths()
134 |     start_time = time.time()
135 |     collect_data(paths)
136 |     print('Time elapsed: {}s'.format(time.time() - start_time))
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     main()
141 | 


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/finetune/vcla_gaze_finetune.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 10/19/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | import os
 10 | import time
 11 | import pickle
 12 | import numpy as np
 13 | import torch
 14 | import torchvision
 15 | from skimage import io
 16 | import glob
 17 | import cv2
 18 | import datasets.VCLA_GAZE.vcla_gaze_config as vcla_gaze_config
 19 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
 20 | import utils.plyutils as utils
 21 | metadata = VCLA_METADATA()
 22 | 
 23 | def match_path(img_dir, frame):
 24 |     rgb_name = glob.glob(os.path.join(img_dir, 'raw_rgb_{0:05}_*'.format(frame)))
 25 |     depth_name = glob.glob(os.path.join(img_dir, 'raw_depth_{0:05}_*'.format(frame)))
 26 |     aligned_name = glob.glob(os.path.join(img_dir, 'aligned_rgb_{0:05}.png').format(frame))
 27 |     return rgb_name[0], depth_name[0], aligned_name[0]
 28 | 
 29 | def get_valid_bbox(bbox):
 30 |     x_1 = int(bbox[1])
 31 |     y_1 = int(bbox[0])
 32 |     x_2 = int(bbox[3])
 33 |     y_2 = int(bbox[2])
 34 |     return x_1, y_1, x_2, y_2
 35 | 
 36 | class VCLA_GAZE_FEATURE(torch.utils.data.Dataset):
 37 |     def __init__(self, paths, sequence_ids, transform, input_size, name, task, verbose=False):
 38 |         self.root = paths.img_root
 39 |         self.tmp_root = paths.tmp_root
 40 |         self.inter_root = paths.inter_root
 41 |         self.imsize = input_size
 42 |         self.name = name
 43 |         self.transform = transform
 44 |         self.sequence_ids = sequence_ids
 45 |         self.task = task
 46 |         self.verbose = verbose
 47 |         with open(os.path.join(paths.tmp_root, 'image_data_list.p'), 'rb') as f:
 48 |             self.data_list = pickle.load(f)
 49 | 
 50 |     # Using framewise information for prediction purposes
 51 |     def __getitem__(self, index):
 52 |         sequence_id = self.sequence_ids[index]
 53 |         sequence_info = self.data_list[sequence_id]
 54 | 
 55 |         category, video_id, frame = sequence_id.split('$')
 56 |         frame = int(frame)
 57 | 
 58 |         img_dir = os.path.join(self.root, category, video_id, 'TPV')
 59 |         rgb_name, depth_name, aligned_name = match_path(img_dir, frame)
 60 | 
 61 |         rgb_image = torch.FloatTensor(io.imread(rgb_name))
 62 |         depth_image = torch.FloatTensor(np.array(io.imread(depth_name), dtype=np.double))
 63 |         aligned_image = torch.FloatTensor(io.imread(aligned_name))
 64 | 
 65 |         activity = torch.LongTensor([sequence_info['activity_mat']])
 66 |         object_pair = sequence_info['object_mat']
 67 |         object_labels = torch.LongTensor(object_pair[:, :-4])
 68 |         bboxs = object_pair[:, -4:]
 69 |         object_images = np.empty((1, 3, self.imsize[0], self.imsize[1]))
 70 |         for idx, bbox in enumerate(bboxs):
 71 |             object_image = np.zeros((3, self.imsize[0], self.imsize[1]), dtype=np.float)
 72 |             # Get valid bounding boxes
 73 |             x_1, y_1, x_2, y_2 = get_valid_bbox(bbox)
 74 |             if np.sum(bbox) != 0:
 75 |                 bbox_image = rgb_image[y_1 : y_2, x_1 : x_2, :]
 76 |                 object_image = self.transform(cv2.resize(bbox_image.numpy(), self.imsize, interpolation=cv2.INTER_LINEAR))
 77 |             object_images = np.vstack((object_images, np.expand_dims(object_image, axis=0)))
 78 |         object_images = torch.FloatTensor(object_images[1:])
 79 |         rgb_image = torch.FloatTensor(self.transform(cv2.resize(rgb_image.numpy(), self.imsize, interpolation=cv2.INTER_LINEAR)))
 80 |         affordance = torch.LongTensor(sequence_info['affordance_mat'])
 81 |         skeleton = torch.FloatTensor(sequence_info['skeleton_mat'])
 82 |         if self.task != 'affordance':
 83 |             affordance_features = torch.FloatTensor(np.load(os.path.join(self.inter_root, 'finetune', 'affordance', sequence_id + '.npy')))
 84 |             assert(affordance_features.shape[0] == 3)
 85 |         else:
 86 |             affordance_features = torch.Tensor([0])
 87 |         if self.verbose:
 88 |             return sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \
 89 |                                         object_images, affordance, skeleton, object_pair
 90 |         else:
 91 |             return sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \
 92 |                                         object_images, affordance, skeleton, affordance_features
 93 |     def __len__(self):
 94 |         return len(self.sequence_ids)
 95 | 
 96 | 
 97 | # For testing purposes
 98 | def main():
 99 |     paths = vcla_gaze_config.Paths()
100 |     start_time = time.time()
101 |     with open(os.path.join(paths.tmp_root, 'image_list.p'), 'rb') as f:
102 |         video_list = pickle.load(f)
103 |     train_ratio = 0.1
104 |     sequence_ids = np.random.permutation(video_list)
105 |     sequence_ids = sequence_ids[:int(train_ratio * len(sequence_ids))]
106 | 
107 |     input_imsize = (224, 224)
108 |     normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
109 |                                                  std=[0.229, 0.224, 0.225])
110 |     transform = torchvision.transforms.Compose([
111 |         torchvision.transforms.ToTensor(),
112 |         normalize,
113 |     ])
114 |     training_set = VCLA_GAZE_FEATURE(paths, sequence_ids, transform, input_imsize, 'test', 'activity', verbose=True)
115 | 
116 |     sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \
117 |                                             object_images, affordance, skeleton, object_pair = training_set[0]
118 |     utils.visualize_bbox_rgb(sequence_id, (rgb_image.permute(1, 2, 0), object_pair), metadata.objects)
119 |     utils.visualize_bbox_image(sequence_id, (object_labels, object_images), metadata.objects)
120 |     utils.visualize_skeleton_depth(sequence_id, (aligned_image, skeleton))
121 |     print('Time elapsed: {}s'.format(time.time() - start_time))
122 |     print(sequence_id)
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     main()


--------------------------------------------------------------------------------
/datasets/Breakfast/dataparser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 04/20/19
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import time
 12 | import json
 13 | import glob
 14 | import pickle
 15 | from random import shuffle
 16 | import numpy as np
 17 | import sys
 18 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src')
 19 | 
 20 | import datasets.Breakfast.breakfast_config as config
 21 | from datasets.Breakfast.metadata import BREAKFAST_METADATA
 22 | metadata = BREAKFAST_METADATA()
 23 | 
 24 | 
 25 | def parse_data(paths, subsample=False):
 26 |     metadata_path = os.path.join(paths.data_root, 'metadata')
 27 |     save_path = os.path.join(paths.inter_root, 'features')
 28 | 
 29 |     if not os.path.exists(save_path):
 30 |         os.makedirs(save_path)
 31 | 
 32 |     with open(os.path.join(metadata_path, 'sequence_ids.json'), 'r') as f:
 33 |         sequence_ids = json.load(f)
 34 |     data_dict = dict()
 35 |     count = 0
 36 |     for sequence_id in sequence_ids:
 37 |         data_dict[sequence_id] = dict()
 38 |         activity_id, video_id = sequence_id.split('$')
 39 |         feature_path = os.path.join(paths.data_root, 'fisher_vector', activity_id)
 40 |         annotation_file = os.path.join(metadata_path, 'annotations', sequence_id + '.p')
 41 |         feature_files = glob.glob(os.path.join(feature_path, video_id + '*'))
 42 |         features = None
 43 |         for feature_file in feature_files:
 44 |             features = np.loadtxt(feature_file)[:, 1:]
 45 |             break
 46 | 
 47 |         frames = features.shape[0]
 48 |         subsample_freq = 1000
 49 |         total_length = features.shape[0]
 50 |         subsample_indices = None
 51 |         if subsample:
 52 |             subsample_indices = np.arange(0, frames, subsample_freq)
 53 |             features = features[subsample_indices]
 54 |         labels = np.ones(features.shape[0]) * metadata.action_index['SIL']
 55 | 
 56 |         data_dict[sequence_id]['features'] = features
 57 |         data_dict[sequence_id]['total_length'] = features.shape[0]
 58 |         data_dict[sequence_id]['activity'] = activity_id
 59 |         data_dict[sequence_id]['seg_lengths'] = list()
 60 | 
 61 |         with open(annotation_file, 'rb') as f:
 62 |             start, end, activity = pickle.load(f)
 63 | 
 64 |         all_labels = np.ones(total_length) * metadata.action_index['SIL']
 65 |         all_segs = list()
 66 |         if(end[-1] != total_length):
 67 |             count += 1
 68 |             if(abs(end[-1] - total_length) > 10):
 69 |                 print('Feature error for {}'.format(sequence_id))
 70 |         for s, e, a in zip(start, end, activity):
 71 |             if (s > e):
 72 |                 print(s, e)
 73 |                 print('Error for {}'.format(sequence_id))
 74 |                 exit()
 75 |             e = min(e, total_length)
 76 |             all_segs.append(e - s + 1)
 77 |             all_labels[s - 1 : e] = metadata.action_index[a]
 78 | 
 79 |         if subsample:
 80 |             start = 0
 81 |             end = 0
 82 |             all_segs = list()
 83 |             for idx, sub_idx in enumerate(subsample_indices):
 84 |                 if idx == len(subsample_indices) - 1:
 85 |                     all_segs.append(idx - start + 1)
 86 |                     break
 87 |                 if all_labels[sub_idx] == all_labels[subsample_indices[idx + 1]]:
 88 |                     end = end + 1
 89 |                 else:
 90 |                     all_segs.append(end - start + 1)
 91 |                     start = end + 1
 92 |                     end = start
 93 | 
 94 |                 labels[idx] = all_labels[sub_idx]
 95 |         else:
 96 |             labels = all_labels
 97 | 
 98 |         data_dict[sequence_id]['labels'] = labels
 99 |         data_dict[sequence_id]['all_labels'] = all_labels
100 |         data_dict[sequence_id]['seg_lengths'] = all_segs
101 |         print('Finished processing for {}, from {} to {}'.format(sequence_id, frames, data_dict[sequence_id]['total_length']))
102 | 
103 | 
104 |     with open(os.path.join(metadata_path, 'train_test_split.json'), 'r') as f:
105 |         split = json.load(f)
106 | 
107 |     for split_idx, ids in enumerate(split):
108 |         train_dict = dict()
109 |         test_dict = dict()
110 |         for other_idx, other_ids in enumerate(split):
111 |             for id in other_ids:
112 |                 if other_idx != split_idx:
113 |                     train_dict[id] = data_dict[id]
114 |                 else:
115 |                     test_dict[id] = data_dict[id]
116 |         if not subsample:
117 |             train_file = 'breakfast_train_{}_ori.p'.format(split_idx)
118 |             test_file = 'breakfast_test_{}_ori.p'.format(split_idx)
119 |             val_file = 'breakfast_val_{}_ori.p'.format(split_idx)
120 |             all_file = 'breakfast_all_{}_ori.p'.format(split_idx)
121 |         else:
122 |             train_file = 'breakfast_train_{}_{}.p'.format(split_idx, subsample_freq)
123 |             test_file = 'breakfast_test_{}_{}.p'.format(split_idx, subsample_freq)
124 |             val_file = 'breakfast_val_{}_{}.p'.format(split_idx, subsample_freq)
125 |             all_file = 'breakfast_all_{}_{}.p'.format(split_idx, subsample_freq)
126 | 
127 |         with open(os.path.join(save_path, train_file), 'wb') as f:
128 |             pickle.dump(train_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
129 |         with open(os.path.join(save_path, test_file), 'wb') as f:
130 |             pickle.dump(test_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
131 |         with open(os.path.join(save_path, val_file), 'wb') as f:
132 |             pickle.dump(test_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
133 | 
134 |         with open(os.path.join(save_path, all_file), 'wb') as f:
135 |             pickle.dump(data_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
136 | 
137 | def main():
138 |     paths = config.Paths()
139 |     start_time = time.time()
140 |     parse_data(paths, subsample=False)
141 |     print('Time elapsed: {}'.format(time.time() - start_time))
142 | 
143 | if __name__ == '__main__':
144 |     main()


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/dataparser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 11/27/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import time
 12 | import pickle
 13 | from random import shuffle
 14 | import numpy as np
 15 | import sys
 16 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src')
 17 | 
 18 | import datasets.VCLA_GAZE.vcla_gaze_config as config
 19 | from models import parsegraph as parsegraph
 20 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
 21 | metadata = VCLA_METADATA()
 22 | 
 23 | 
 24 | def parse_data(paths):
 25 |     activity_feature_path = os.path.join(paths.inter_root, 'finetune', 'activity')
 26 |     affordance_feature_path = os.path.join(paths.inter_root, 'finetune', 'affordance')
 27 |     save_path = os.path.join(paths.inter_root, 'features')
 28 | 
 29 |     # for STAOG formulation
 30 |     activity_corpus = dict()
 31 |     with open(os.path.join(paths.tmp_root, 'video_data_list.p'), 'rb') as f:
 32 |         data_list = pickle.load(f)
 33 |     data_dict = dict()
 34 |     for sequence_id, data in data_list.items():
 35 |         names = sequence_id.split("$")
 36 |         activity_id, video_id = names[0], names[1]
 37 |         data_dict[sequence_id] = dict()
 38 |         if activity_id not in activity_corpus.keys():
 39 |             activity_corpus[activity_id] = list()
 40 |         tpg = parsegraph.TParseGraph(activity_id, sequence_id=video_id)
 41 |         segmentation = data['segment']
 42 |         activity = data['activity_mat']
 43 |         objects = data['object_mat']
 44 |         affordance = data['affordance_mat']
 45 |         skeleton = data['skeleton_mat']
 46 |         obj_nums = data['object_num']
 47 |         data_dict[sequence_id]['total_length'] = activity.shape[0]
 48 |         data_dict[sequence_id]['labels'] = activity
 49 |         data_dict[sequence_id]['u_labels'] = affordance[:, : obj_nums]
 50 |         data_dict[sequence_id]['seg_lengths'] = list()
 51 |         data_dict[sequence_id]['activity'] = activity_id
 52 | 
 53 |         # feature reformat for GEP
 54 |         activity_features = None
 55 |         affordance_features = None
 56 | 
 57 |         start_ori = segmentation[0][0]
 58 |         for (start, end) in segmentation:
 59 |             end = end - start_ori
 60 |             start = start - start_ori
 61 |             data_dict[sequence_id]['seg_lengths'].append(end - start + 1)
 62 |             subactivity = metadata.subactivities[int(activity[start])]
 63 |             object_data = objects[start : end + 1, : obj_nums, :]
 64 |             obj_positions = [object_data[obj_idx, metadata.OBJECT_NUM : ] for obj_idx in range(obj_nums)]
 65 |             obj_names = [metadata.objects[np.argmax(object_data[0, obj_idx, : metadata.OBJECT_NUM])] for obj_idx in range(obj_nums)]
 66 |             affordance_labels = affordance[start, : obj_nums]
 67 |             affordance_labels = [metadata.affordances[int(affordance_labels[obj_idx])] for obj_idx in range(obj_nums)]
 68 |             spg = parsegraph.SParseGraph(start, end, subactivity, subactivity, obj_names, affordance_labels)
 69 |             spg.set_obj_positions(obj_positions)
 70 |             spg.set_skeletons(skeleton[start : end + 1, :])
 71 |             tpg.append_terminal(spg)
 72 | 
 73 |             for feature_idx in range(start, end + 1):
 74 |                 image_id = sequence_id + '$' + str(feature_idx + start_ori)
 75 |                 activity_feature = np.load(os.path.join(activity_feature_path, '{}.npy'.format(image_id)))
 76 |                 affordance_feature = np.expand_dims(np.load(os.path.join(affordance_feature_path,
 77 |                                                                          '{}.npy'.format(image_id)))[: obj_nums, :], axis =0)
 78 |                 if activity_features is None:
 79 |                     activity_features = activity_feature
 80 |                 else:
 81 |                     activity_features = np.vstack((activity_features, activity_feature))
 82 |                 if affordance_features is None:
 83 |                     affordance_features = affordance_feature
 84 |                 else:
 85 |                     affordance_features = np.vstack((affordance_features, affordance_feature))
 86 | 
 87 |         data_dict[sequence_id]['features'] = activity_features
 88 |         data_dict[sequence_id]['u_features'] = affordance_features
 89 | 
 90 |         activity_corpus[activity_id].append(tpg)
 91 |         print('Finished processing for {}'.format(sequence_id))
 92 |     with open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'wb') as f:
 93 |         pickle.dump(activity_corpus, f)
 94 |     if not os.path.exists(save_path):
 95 |         os.makedirs(save_path)
 96 | 
 97 |     training_rate = 0.65
 98 |     validation_rate = 0.15
 99 |     training_num = training_rate * len(data_dict)
100 |     validation_num = (training_rate + validation_rate) * len(data_dict)
101 |     keys = list(data_dict.keys())
102 |     shuffle(keys)
103 | 
104 |     training_dict = dict()
105 |     validation_dict = dict()
106 |     testing_dict = dict()
107 | 
108 |     for idx, key in enumerate(keys):
109 |         if idx < training_num:
110 |             training_dict[key] = data_dict[key]
111 |         if idx >= training_num and idx < validation_num:
112 |             validation_dict[key] = data_dict[key]
113 |         if idx >= validation_num:
114 |             testing_dict[key] = data_dict[key]
115 | 
116 |     with open(os.path.join(save_path, 'vcla_gaze_all.p'), 'wb') as f:
117 |         pickle.dump(data_dict, f)
118 |     with open(os.path.join(save_path, 'vcla_gaze_train.p'), 'wb') as f:
119 |         pickle.dump(training_dict, f)
120 |     with open(os.path.join(save_path, 'vcla_gaze_val.p'), 'wb') as f:
121 |         pickle.dump(validation_dict, f)
122 |     with open(os.path.join(save_path, 'vcla_gaze_test.p'), 'wb') as f:
123 |         pickle.dump(testing_dict, f)
124 | 
125 | def main():
126 |     paths = config.Paths()
127 |     start_time = time.time()
128 |     parse_data(paths)
129 |     print('Time elapsed: {}'.format(time.time() - start_time))
130 | 
131 | if __name__ == '__main__':
132 |     main()


--------------------------------------------------------------------------------
/datasets/VCLA_GAZE/finetune/model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 10/29/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | import torch
 11 | import torchvision
 12 | from datasets.VCLA_GAZE.metadata import VCLA_METADATA
 13 | metadata = VCLA_METADATA()
 14 | 
 15 | model_dict = {
 16 |     'resnet': lambda num_classes, feature_dims : ResNet152(num_classes=num_classes, feature_dim=feature_dims),
 17 |     'densenet': lambda num_classes, feature_dims : DenseNet(num_classes=num_classes, feature_dim=feature_dims),
 18 |     'vgg16': lambda num_classes, feature_dims : VGG16(num_classes=num_classes, feature_dim=feature_dims)
 19 | }
 20 | 
 21 | class VGG16(torch.nn.Module):
 22 |     def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200):
 23 |         super(VGG16, self).__init__()
 24 |         self.features = torchvision.models.vgg16(pretrained=True).features
 25 |         self.classifier = torch.nn.Sequential(
 26 |             torch.nn.Linear(512 * 7 * 7, 4096),
 27 |             torch.nn.ReLU(True),
 28 |             torch.nn.Dropout(),
 29 |             torch.nn.Linear(4096, 4096),
 30 |             torch.nn.ReLU(),
 31 |             torch.Dropout(),
 32 |             torch.nn.Linear(4096, feature_dim)
 33 |         )
 34 |         self.last = torch.nn.Linear(feature_dim, num_classes)
 35 |         self._initialize_weights()
 36 | 
 37 |     def forward(self, x):
 38 |         x = self.features(x)
 39 |         x = x.view(x.size(0), -1)
 40 |         x = self.classifier(x)
 41 |         output = self.last(x)
 42 |         return x, output
 43 | 
 44 |     def _initialize_weights(self):
 45 |         for m in self.modules():
 46 |             if isinstance(m, torch.nn.BatchNorm2d):
 47 |                 torch.nn.init.constant_(m.weight, 1)
 48 |                 torch.nn.init.zeros_(m.bias)
 49 |             elif isinstance(m, torch.nn.Linear):
 50 |                 torch.nn.init.normal_(m.weight, 0, 0.01)
 51 |                 torch.nn.init.zeros_(m.bias)
 52 | 
 53 | class ResNet152(torch.nn.Module):
 54 |     def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200):
 55 |         super(ResNet152, self).__init__()
 56 |         self.features = torchvision.models.resnet152(pretrained=True)
 57 |         self.fc_ = torch.nn.Linear(1000, feature_dim)
 58 |         self.fc = torch.nn.Linear(feature_dim, num_classes)
 59 |         self._initialize_weights()
 60 | 
 61 |     def forward(self, x):
 62 |         x = self.features(x)
 63 |         x = x.view(x.size(0), -1)
 64 |         x = self.fc_(x)
 65 |         output = self.fc(x)
 66 |         return x, output
 67 | 
 68 |     def _initialize_weights(self):
 69 |         for m in self.modules():
 70 |             if isinstance(m, torch.nn.Linear):
 71 |                 torch.nn.init.normal_(m.weight, 0, 0.01)
 72 |                 torch.nn.init.zeros_(m.bias)
 73 | 
 74 | class DenseNet(torch.nn.Module):
 75 |     def __init__(self, num_classes=metadata.ACTION_NUM, feature_dim=200):
 76 |         super(DenseNet, self).__init__()
 77 |         self.features = torchvision.models.densenet161(pretrained=True)
 78 |         self.fc_ = torch.nn.Linear(1000, feature_dim)
 79 |         self.fc = torch.nn.Linear(feature_dim, num_classes)
 80 |         self._initialize_weights()
 81 | 
 82 |     def forward(self, x):
 83 |         x = self.features(x)
 84 |         x = x.view(x.size(0), -1)
 85 |         x = self.fc_(x)
 86 |         output = self.fc(x)
 87 |         return x, output
 88 | 
 89 |     def _initialize_weights(self):
 90 |         for m in self.modules():
 91 |             if isinstance(m, torch.nn.Linear):
 92 |                 torch.nn.init.normal_(m.weight, 0, 0.01)
 93 |                 torch.nn.init.zeros_(m.bias)
 94 | 
 95 | class AffordanceNet(torch.nn.Module):
 96 |     def __init__(self, num_classes, name='resnet', feature_dim=200):
 97 |         super(AffordanceNet, self).__init__()
 98 |         self.network = model_dict[name](num_classes, feature_dim)
 99 | 
100 |     def forward(self, x):
101 |         return self.network(x)
102 | 
103 | class ActivityNet(torch.nn.Module):
104 |     def __init__(self, num_classes, name='resnet', feature_dim=500, obj_feature_dim=200):
105 |         super(ActivityNet, self).__init__()
106 |         self.network = model_dict[name](num_classes, 2 * feature_dim)
107 |         self.pooling = torch.nn.MaxPool2d(3, stride=1, padding=1)
108 |         self.fc = torch.nn.Sequential(
109 |             torch.nn.Linear(2 * feature_dim + 3 * obj_feature_dim, 2 * feature_dim),
110 |             torch.nn.BatchNorm1d(2 * feature_dim),
111 |             torch.nn.ReLU(),
112 |             torch.nn.Linear(2 * feature_dim, feature_dim)
113 |         )
114 |         self.fc_ = torch.nn.Linear(feature_dim, num_classes)
115 | 
116 |     def forward(self, x, affordance_features):
117 |         feature, x = self.network(x)
118 |         u_feature = self.pooling(affordance_features).view(affordance_features.size(0), -1)
119 |         features = self.fc(torch.cat((feature, u_feature), 1))
120 |         output = self.fc_(features)
121 |         return features, output
122 | 
123 | class TaskNet(torch.nn.Module):
124 |     def __init__(self, task='affordance', name='resnet', feature_dim=1500, obj_feature_dim=1000):
125 |         super(TaskNet, self).__init__()
126 |         self.task = task
127 |         if task == 'affordance':
128 |             self.network = AffordanceNet(num_classes=metadata.AFFORDANCE_NUM, name=name, feature_dim=obj_feature_dim)
129 |         else:
130 |             self.network = ActivityNet(num_classes=metadata.ACTION_NUM, name=name, feature_dim=feature_dim, obj_feature_dim=obj_feature_dim)
131 | 
132 |     def forward(self, x, features=None):
133 |         if self.task == 'affordance':
134 |             return self.network(x)
135 |         else:
136 |             return self.network(x, features)
137 | 
138 | # For test purposes only
139 | def main():
140 |     normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
141 |                                                  std=[0.229, 0.224, 0.225])
142 |     transform = torchvision.transforms.Compose([
143 |         torchvision.transforms.ToTensor(),
144 |         normalize,
145 |     ])
146 | 
147 | if __name__ == '__main__':
148 |     main()


--------------------------------------------------------------------------------
/datasets/CAD/cad.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 12/9/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | import os
 10 | import pickle
 11 | import numpy as np
 12 | import torch
 13 | import torch.utils.data
 14 | from datasets.CAD.metadata import CAD_METADATA
 15 | class CAD(torch.utils.data.Dataset):
 16 |     def __init__(self, paths, mode, task, subsample=None):
 17 |         super(CAD, self).__init__()
 18 |         self.paths = paths.inter_root
 19 |         with open(os.path.join(self.paths, 'features', 'cad_{}.p'.format(mode)), 'rb') as f:
 20 |             self.data = pickle.load(f, encoding='latin1')
 21 |         self.sequence_ids = list()
 22 |         for key in self.data.keys():
 23 |             self.sequence_ids.append(key)
 24 |         self.task = task
 25 |         self.mode = mode
 26 | 
 27 |     def __getitem__(self, index):
 28 |         sequence_id = self.sequence_ids[index]
 29 |         if self.task == 'affordance':
 30 |             return self.data[sequence_id]['u_features'], self.data[sequence_id]['u_labels'],\
 31 |                    self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'], \
 32 |                    self.data[sequence_id]['activity'], sequence_id, None
 33 |         else:
 34 |             return self.data[sequence_id]['features'], self.data[sequence_id]['labels'], \
 35 |                    self.data[sequence_id]['seg_lengths'], self.data[sequence_id]['total_length'],\
 36 |                    self.data[sequence_id]['activity'], sequence_id, None
 37 | 
 38 |     def __len__(self):
 39 |         return len(self.sequence_ids)
 40 | 
 41 |     @staticmethod
 42 |     def collate_fn(batch):
 43 |         metadata = CAD_METADATA()
 44 |         features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0]
 45 |         feature_size = features[0].shape[1]
 46 |         label_num = len(metadata.subactivities)
 47 | 
 48 |         max_seq_length = np.max(np.array([total_length for (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in batch]))
 49 |         features_batch = np.zeros((max_seq_length, len(batch), feature_size))
 50 |         labels_batch = np.ones((max_seq_length, len(batch))) * -1
 51 |         probs_batch = np.zeros((max_seq_length, len(batch), label_num))
 52 |         total_lengths = np.zeros(len(batch))
 53 |         ctc_labels = list()
 54 |         ctc_lengths = list()
 55 |         activities = list()
 56 |         sequence_ids = list()
 57 | 
 58 |         for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(batch):
 59 |             current_len = 0
 60 |             ctc_labels.append(labels)
 61 |             ctc_lengths.append(len(labels))
 62 |             for seg_i, feature in enumerate(features):
 63 |                 features_batch[current_len:current_len + seg_lengths[seg_i], batch_i, :] = np.repeat(features[seg_i],
 64 |                                                                                                      seg_lengths[seg_i],
 65 |                                                                                                      axis=0)
 66 |                 labels_batch[current_len:current_len + seg_lengths[seg_i], batch_i] = labels[seg_i]
 67 |                 probs_batch[current_len:current_len + seg_lengths[seg_i], batch_i, labels[seg_i]] = 1.0
 68 |                 current_len += seg_lengths[seg_i]
 69 |             total_lengths[batch_i] = total_length
 70 |             activities.append(activity)
 71 |             sequence_ids.append(sequence_id)
 72 | 
 73 |         features_batch = torch.FloatTensor(features_batch)
 74 |         labels_batch = torch.LongTensor(labels_batch)
 75 |         probs_batch = torch.FloatTensor(probs_batch)
 76 |         total_lengths = torch.IntTensor(total_lengths)
 77 |         ctc_lengths = torch.IntTensor(ctc_lengths)
 78 | 
 79 |         return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None
 80 | 
 81 |     # @staticmethod
 82 |     # def collate_fn(batch):
 83 |     #     metadata = CAD_METADATA()
 84 |     #     features, labels, seg_lengths, total_length, activity, sequence_id, additional = batch[0]
 85 |     #     feature_size = features[0].shape[1]
 86 |     #     label_num = len(metadata.subactivities)
 87 |     #
 88 |     #     max_seq_length = len(labels)
 89 |     #     features_batch = np.zeros((max_seq_length, len(batch), feature_size))
 90 |     #     labels_batch = np.ones((max_seq_length, len(batch))) * -1
 91 |     #     probs_batch = np.zeros((max_seq_length, len(batch), label_num))
 92 |     #     total_lengths = np.zeros(len(batch))
 93 |     #     ctc_labels = list()
 94 |     #     ctc_lengths = list()
 95 |     #     activities = list()
 96 |     #     sequence_ids = list()
 97 |     #
 98 |     #     for batch_i, (features, labels, seg_lengths, total_length, activity, sequence_id, additional) in enumerate(
 99 |     #             batch):
100 |     #         current_len = 0
101 |     #         ctc_labels.append(labels)
102 |     #         ctc_lengths.append(len(labels))
103 |     #         for seg_i, feature in enumerate(features):
104 |     #             features_batch[current_len:current_len + seg_lengths[seg_i], batch_i, :] = np.repeat(features[seg_i],
105 |     #                                                                                                  1, axis=0)
106 |     #             labels_batch[current_len:current_len + 1, batch_i] = labels[seg_i]
107 |     #             probs_batch[current_len:current_len + 1, batch_i, labels[seg_i]] = 1.0
108 |     #             current_len += 1
109 |     #         total_lengths[batch_i] = total_length
110 |     #         activities.append(activity)
111 |     #         sequence_ids.append(sequence_id)
112 |     #
113 |     #     features_batch = torch.FloatTensor(features_batch)
114 |     #     labels_batch = torch.LongTensor(labels_batch)
115 |     #     probs_batch = torch.FloatTensor(probs_batch)
116 |     #     total_lengths = torch.IntTensor(total_lengths)
117 |     #     ctc_lengths = torch.IntTensor(ctc_lengths)
118 |     #
119 |     #     return features_batch, labels_batch, activities, sequence_ids, total_lengths, 0, ctc_labels, ctc_lengths, probs_batch, None


--------------------------------------------------------------------------------
/models/parser/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jan 25, 2018
  3 | 
  4 | @author: Siyuan Qi
  5 | 
  6 | Description of the file.
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import time
 12 | 
 13 | import nltk
 14 | import numpy as np
 15 | 
 16 | import config
 17 | import datasets
 18 | import models.parser.grammarutils as grammarutils
 19 | import models.parser.GEP as generalizedearley
 20 | import models.parser.GEP_online as generalizedearley_online
 21 | 
 22 | 
 23 | def parsing_examples():
 24 |     rules = list()
 25 |     rules.append("GAMMA -> R [1.0]")
 26 |     rules.append("R -> N O N [0.2]")
 27 |     rules.append("R -> N [0.3]")
 28 |     rules.append("R -> '0' [0.5]")
 29 |     rules.append("N -> '0' [0.1]")
 30 |     rules.append("N -> '1' [0.9]")
 31 |     rules.append("O -> '+' [0.4]")
 32 |     rules.append("O -> '-' [0.6]")
 33 | 
 34 |     grammar_rules = grammarutils.get_pcfg(rules)
 35 |     grammar = nltk.PCFG.fromstring(grammar_rules)
 36 | 
 37 |     sentence = '0'
 38 |     tokens = sentence.split(' ')
 39 | 
 40 |     # earley_parser = nltk.EarleyChartParser(grammar, trace=1)
 41 |     # e_chart = earley_parser.chart_parse(tokens)
 42 | 
 43 |     symbols = ['0', '1', '+', '-']
 44 |     symbol_index = dict()
 45 |     for s in symbols:
 46 |         symbol_index[s] = symbols.index(s)
 47 |     grammar_rules = grammarutils.get_pcfg(rules, index=True, mapping=None)
 48 |     grammar = nltk.PCFG.fromstring(grammar_rules)
 49 | 
 50 |     # grammar_file = os.path.join('/media/hdd/home/baoxiong/Projects/TPAMI2019/tmp/cad/grammar/cleaning_objects.pcfg')
 51 |     # import datasets.CAD.metadata as metadata
 52 |     # grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=metadata.CAD_METADATA().action_index)
 53 |     # symbols = metadata.CAD_METADATA().actions
 54 | 
 55 |     classifier_output = [
 56 |                             [0.7, 0.1, 0.1, 0.1],
 57 |                             [0.67, 0.11, 0.20, 0.02],
 58 |                             [0.25, 0.25, 0.49, 0.01],
 59 |                             [0.01, 0.14, 0.49, 0.36],
 60 |                             [0.04, 0.20, 0.7, 0.06],
 61 |                             [0.15, 0.6, 0.20, 0.05],
 62 |                             [0.01, 0.7, 0.19, 0.1],
 63 |                             [0.1, 0.7, 0.1, 0.1],
 64 |                             [0.14, 0.63, 0.1, 0.13],
 65 |                         ]
 66 |     classifier_output = np.array(classifier_output)
 67 | 
 68 |     # # np.random.seed(0)
 69 |     # classifier_output = np.random.rand(2000, 10)
 70 |     # classifier_output = classifier_output / np.sum(classifier_output, axis=1)[:, None]  # Normalize to probability
 71 | 
 72 |     # # print(classifier_output.shape)
 73 |     gen_earley_parser = generalizedearley.GeneralizedEarley(grammar, mapping=symbol_index)
 74 |     start_time = time.time()
 75 |     best_string, prob = gen_earley_parser.parse(classifier_output)
 76 |     gen_earley_parser.cached_prob_tex()
 77 |     print('classic gep', best_string, prob, '{}s'.format(time.time()-start_time))
 78 | 
 79 |     print('================================================================')
 80 |     print('================================================================')
 81 |     # Online GEP
 82 |     gen_earley_parser_online = generalizedearley_online.GeneralizedEarley(grammar, len(symbols), mapping=symbol_index)
 83 |     start_time = time.time()
 84 |     for t in range(classifier_output.shape[0]):
 85 |         gen_earley_parser_online.update_prob(classifier_output[t, :])
 86 |         best_string, prob = gen_earley_parser_online.parse()
 87 |         gen_earley_parser_online.future_predict()
 88 |     print('online gep', best_string, prob)
 89 |     print('{}s'.format(time.time()-start_time))
 90 |     exit()
 91 | 
 92 |     # for t : {update gep -> parse}
 93 |     best_string, prob = gen_earley_parser.parse(classifier_output)
 94 |     prob_sum = 0
 95 |     for key, data in gen_earley_parser._cached_log_prob.items():
 96 |         print('-----------------------------------------------------------')
 97 |         print('String: {}'.format(key))
 98 |         print('Parsing Probability: {}'.format(np.exp(data[-2])))
 99 |         print('Grammar Prefix Probability: {}'.format(gen_earley_parser._cached_grammar_prob[key]))
100 |     # print(best_string, np.exp(prob))
101 | 
102 | 
103 | def test_generalized_earley(grammar, classifier_output):
104 |     gen_earley_parser = generalizedearley.GeneralizedEarley(grammar)
105 |     best_string, prob = gen_earley_parser.parse(classifier_output)
106 |     print('best_string with prob {:.3f}:'.format(prob), best_string)
107 |     print(gen_earley_parser.compute_labels())
108 |     print(np.argmax(classifier_output, axis=1))
109 | 
110 | 
111 | def test_earley(grammar, tokens):
112 |     earley_parser = nltk.EarleyChartParser(grammar, trace=1)
113 |     e_chart = earley_parser.chart_parse(tokens)
114 |     for edge in e_chart.edges():
115 |         print(edge, edge.end())
116 | 
117 |     print(grammarutils.earley_predict(grammar, tokens))
118 | 
119 | 
120 | def test_valid():
121 |     paths = config.Paths()
122 |     grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', 'stacking_objects.pcfg')
123 | 
124 |     # sentence = 'null reaching moving placing'
125 |     # grammar = grammarutils.read_grammar(grammar_file, index=False)
126 |     # test_earley(grammar, sentence.split())
127 | 
128 |     sentence = 'null reaching'
129 |     tokens = sentence.split()
130 |     grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index)
131 |     seg_length = 15
132 |     correct_prob = 0.8
133 |     classifier_output = np.ones((seg_length*2, 10)) * 1e-10
134 |     classifier_output[:seg_length, datasets.cad_metadata.subactivity_index[tokens[0]]] = correct_prob
135 |     classifier_output[seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]]] = correct_prob
136 | 
137 |     classifier_output[:seg_length, datasets.cad_metadata.subactivity_index[tokens[0]]+1] = 1 - correct_prob
138 |     classifier_output[seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]]+1] = 1 - correct_prob
139 |     test_generalized_earley(grammar, classifier_output)
140 | 
141 | 
142 | def test_time():
143 |     paths = config.Paths()
144 |     start_time = time.time()
145 |     np.random.seed(int(start_time))
146 |     classifier_output = np.random.rand(100000, 10)
147 |     classifier_output = classifier_output / np.sum(classifier_output, axis=1)[:, None]  # Normalize to probability
148 |     for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')):
149 |         if not pcfg.endswith('.pcfg'):
150 |             continue
151 |         grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg)
152 |         grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index)
153 |         test_generalized_earley(grammar, classifier_output)
154 |     print('Time elapsed: {}s'.format(time.time() - start_time))
155 | 
156 | 
157 | def test_grammar():
158 |     paths = config.Paths()
159 |     for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')):
160 |         if not pcfg.endswith('.pcfg'):
161 |             continue
162 |         grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg)
163 |         grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index)
164 |         corpus_file = os.path.join(paths.tmp_root, 'corpus', 'cad', pcfg.replace('pcfg', 'txt'))
165 |         with open(corpus_file, 'r') as f:
166 |             for line in f:
167 |                 tokens = [str(datasets.cad_metadata.subactivity_index[token]) for token in line.strip(' *#\n').split(' ')]
168 |                 earley_parser = nltk.EarleyChartParser(grammar, trace=0)
169 |                 e_chart = earley_parser.chart_parse(tokens)
170 |                 print(e_chart.edges()[-1])
171 | 
172 | 
173 | def visualize_grammar():
174 |     paths = config.Paths()
175 |     dataset_name = 'wnp'
176 |     for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', dataset_name)):
177 |         if not pcfg.endswith('.pcfg'):
178 |             continue
179 |         grammar_file = os.path.join(paths.tmp_root, 'grammar', dataset_name, pcfg)
180 |         grammar = grammarutils.read_grammar(grammar_file, insert=False)
181 |         dot_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.dot'))
182 |         pdf_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.pdf'))
183 |         grammarutils.grammar_to_dot(grammar, dot_filename)
184 |         os.system('dot -Tpdf {} -o {}'.format(dot_filename, pdf_filename))
185 | 
186 | 
187 | def main():
188 |     # test_grammar()
189 |     # test_valid()
190 |     # test_time()
191 |     # visualize_grammar()
192 |     parsing_examples()
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     main()


--------------------------------------------------------------------------------
/experiments/GEP/gep_ablation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 12/11/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | # System imports
 11 | import sys
 12 | sys.path.append('/mnt/hdd/home/baoxiong/Projects/TPAMI2019/src')
 13 | 
 14 | import os
 15 | import argparse
 16 | import json
 17 | from tqdm import tqdm
 18 | 
 19 | # Libraries
 20 | import numpy as np
 21 | import torch
 22 | import torch.nn.functional as F
 23 | 
 24 | # Local imports
 25 | import models.parser.GEP_adj as GEP
 26 | import models.parser.grammarutils as grammarutils
 27 | import utils.logutils as logutils
 28 | import utils.evalutils as evalutils
 29 | import utils.vizutils as vizutils
 30 | import experiments.exp_config as exp_config
 31 | 
 32 | def inference(model_outputs, activities, sequence_ids, args):
 33 |     model_output_probs = torch.nn.Softmax(dim=-1)(model_outputs)
 34 |     model_output_probs = model_output_probs.data.cpu().numpy()
 35 |     batch_earley_pred_labels = list()
 36 |     batch_tokens = list()
 37 |     batch_seg_pos = list()
 38 |     for batch_i in range(model_outputs.size()[1]):
 39 |         grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i]+'.pcfg')
 40 |         grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.action_index)
 41 |         gen_earley_parser = GEP.GeneralizedEarley(grammar, args.prior)
 42 |         best_string, prob = gen_earley_parser.parse(model_output_probs[:, batch_i, :])
 43 |         # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob)))
 44 | 
 45 |         # Back trace to get labels of the entire sequence
 46 |         earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels()
 47 |         batch_earley_pred_labels.append(earley_pred_labels)
 48 |         batch_tokens.append(tokens)
 49 |         batch_seg_pos.append(seg_pos)
 50 | 
 51 |     _, nn_pred_labels = torch.max(model_outputs, dim=2)
 52 | 
 53 |     return nn_pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos
 54 | 
 55 | def validate(data_loader, model, args):
 56 |     all_gt_detections = list()
 57 |     all_detections = list()
 58 | 
 59 |     task_acc_ratio = logutils.AverageMeter()
 60 |     task_macro_prec = logutils.AverageMeter()
 61 |     task_macro_rec = logutils.AverageMeter()
 62 |     task_macro_f1 = logutils.AverageMeter()
 63 |     task_acc_ratio_nn = logutils.AverageMeter()
 64 | 
 65 |     for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')):
 66 |         features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit
 67 |         epsilon = torch.log(torch.tensor(1e-4))
 68 |         maximum = torch.log(torch.tensor(1 - 1e-4 * (len(args.metadata.actions) - 1)))
 69 |         model_outputs = torch.ones((features_batch.size(0), features_batch.size(1), len(args.metadata.actions))) * epsilon
 70 |         model_outputs = model_outputs.scatter_(2, labels_batch.type(torch.LongTensor).unsqueeze(1), maximum)
 71 |         model_outputs = F.softmax(model_outputs / args.temperature, dim=-1)
 72 |         # model_outputs = torch.ones((features_batch.size(0), features_batch.size(1), len(args.metadata.actions))) / len(args.metadata.actions)
 73 | 
 74 |         # Inference
 75 |         tqdm.write('[{}] Inference'.format(sequence_ids[0]))
 76 |         _, nn_pred_labels = torch.max(model_outputs, dim=-1)
 77 |         nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist()
 78 |         pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args)
 79 |         # Evaluation
 80 |         # Frame-wise detection
 81 |         detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()]
 82 |         if args.subsample != 1:
 83 |             all_total_labels, all_total_lengths = additional
 84 |             gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist()
 85 |             video_length = len(gt_detections)
 86 | 
 87 |             detections = evalutils.upsample(detections, freq=args.subsample, length=video_length)
 88 |             nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length)
 89 |         else:
 90 |             gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist()
 91 |             detections = detections[:total_lengths[0]]
 92 |         video_length = len(gt_detections)
 93 | 
 94 |         # vizutils.plot_segmentation([gt_detections, nn_detections, detections], video_length,
 95 |         #                            filename=os.path.join(args.paths.visualize_root, '{}.jpg'.format(sequence_ids[0])), border=False)
 96 | 
 97 |         micro_prec = logutils.compute_accuracy(gt_detections, detections)
 98 |         micro_prec_nn = logutils.compute_accuracy(gt_detections, nn_detections)
 99 |         macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_detections, detections, metric='macro')
100 |         task_acc_ratio.update(micro_prec, video_length)
101 |         task_acc_ratio_nn.update(micro_prec_nn, video_length)
102 |         task_macro_prec.update(macro_prec, video_length)
103 |         task_macro_rec.update(macro_rec, video_length)
104 |         task_macro_f1.update(macro_f1, video_length)
105 | 
106 |         all_gt_detections.extend(gt_detections)
107 |         all_detections.extend(detections)
108 | 
109 |         micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections)
110 |         macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections,
111 |                                                                            metric='macro')
112 |         tqdm.write('[Evaluation] Micro Prec: {}\t'
113 |                    'Macro Precision: {}\t'
114 |                    'Macro Recall: {}\t'
115 |                    'Macro F-score: {}'.format(micro_prec, macro_prec, macro_recall, macro_fscore))
116 | 
117 |     micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections)
118 |     macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, metric='macro')
119 |     tqdm.write('Detection:\n'
120 |                'Micro Prec: {}\t'
121 |                'NN Prec:{}\t'
122 |                'Macro Precision: {}\t'
123 |                'Macro Recall: {}\t'
124 |                'Macro F-score: {}\n\n'.format(micro_prec, task_acc_ratio_nn.avg, macro_prec, macro_recall, macro_fscore))
125 | 
126 | def main(args):
127 |     exp_info = exp_config.Experiment(args.dataset)
128 |     paths = exp_info.paths
129 |     args.paths = paths
130 |     args.metadata = exp_info.metadata
131 | 
132 |     np.random.seed(args.seed)
133 |     torch.manual_seed(args.seed)
134 | 
135 |     args.batch_size = 1
136 |     feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True)
137 | 
138 |     validate(test_loader, None, args=args)
139 | 
140 | 
141 | def parse_args():
142 |     parser = argparse.ArgumentParser()
143 |     def str2bool(v):
144 |         if v.lower() in ('yes', 'true', 't', 'y', '1'):
145 |             return True
146 |         elif v.lower() in ('no', 'false', 'f', 'n', '0'):
147 |             return False
148 |         else:
149 |             return argparse.ArgumentTypeError('Unsupported value encountered')
150 |     parser.add_argument('--dataset', default='CAD', type=str,
151 |                         help='indicating which dataset to use')
152 |     parser.add_argument('--seed', default=12345, type=int,
153 |                         help='Default seed for all random generators')
154 |     parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool,
155 |                         help='Option flag for using cuda trining (default: True)')
156 |     parser.add_argument('--workers', default=1, type=int, metavar='N',
157 |                         help='number of data loading workers (default: 1)')
158 |     parser.add_argument('--task', default='activity', type=str,
159 |                         help='Default working task activity/affordance')
160 |     parser.add_argument('--epochs', default=100, type=int, metavar='N',
161 |                         help='number of epochs for training (default: 100)')
162 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N',
163 |                         help='batch size for training (default: 1)')
164 |     parser.add_argument('--subsample', default=1, type=int,
165 |                         help='subsample frequency for Breakfast dataset')
166 |     parser.add_argument('--temperature', default=1.0, type=float,
167 |                         help='The temperature used for ablative study')
168 |     parser.add_argument('--prior', default=False, type=str2bool,
169 |                         help='Flag indicating prior usage (default: False)')
170 |     args = parser.parse_args()
171 |     return args
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     args = parse_args()
176 |     main(args)
177 | 


--------------------------------------------------------------------------------
/utils/vizutils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Feb 28, 2017
  3 | 
  4 | @author: Siyuan Qi
  5 | 
  6 | Description of the file.
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import itertools
 12 | import pickle
 13 | 
 14 | import numpy as np
 15 | import matplotlib
 16 | import matplotlib.pyplot as plt
 17 | import sklearn.metrics
 18 | # import tabulate
 19 | 
 20 | import config
 21 | import datasets.VCLA_GAZE.metadata as metadata
 22 | 
 23 | 
 24 | def plot_segmentation(input_labels_list, endframe, vmax=None, filename=None, border=True, cmap=plt.get_cmap('gist_rainbow')):
 25 |     plt_idx = 0
 26 |     aspect_ratio = 60
 27 |     fig = plt.figure(figsize=(28, 3))
 28 |     for input_labels in input_labels_list:
 29 |         seg_image = np.empty((int(endframe/aspect_ratio), endframe))
 30 | 
 31 |         for frame in range(endframe):
 32 |             seg_image[:, frame] = input_labels[frame]
 33 | 
 34 |         plt_idx += 1
 35 |         ax = plt.subplot(len(input_labels_list), 1, plt_idx)
 36 |         if not border:
 37 |             ax.axis('off')
 38 |         if vmax:
 39 |             ax.imshow(seg_image, vmin=0, vmax=vmax, cmap=cmap)
 40 |         else:
 41 |             ax.imshow(seg_image, cmap=cmap)
 42 |         ax.set_ylabel('LSTM')
 43 | 
 44 |     plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 45 |     if not filename:
 46 |         plt.show()
 47 |     else:
 48 |         plt.savefig(filename)
 49 |         plt.close()
 50 | 
 51 | 
 52 | def visualize_tpg_labeling(gt_subactivity, gt_affordance, tpg, obj_num, end_frame):
 53 |     # Visualization of segmentation and labeling results for subactivity and affordance
 54 |     start_frame = tpg.terminals[0].start_frame
 55 |     end_frame = np.min([gt_subactivity.shape[0], tpg.terminals[-1].end_frame-start_frame, end_frame])
 56 |     # Get labels for every frame
 57 |     subactivity_lables = np.empty(end_frame, dtype=int)
 58 |     affordance_labels = np.empty((obj_num, end_frame), dtype=int)
 59 |     for spg in tpg.terminals:
 60 |         # Note: a spg spans [spg.start_frame, spg.end_frame], hence need to +1 in range()
 61 |         for frame in range(spg.start_frame, spg.end_frame+1):
 62 |             # print frame, spg.subactivity, metadata.subactivities[spg.subactivity]
 63 |             if frame >= end_frame + start_frame:
 64 |                 break
 65 |             subactivity_lables[frame-start_frame] = spg.subactivity
 66 |             affordance_labels[:, frame-start_frame] = spg.affordance
 67 | 
 68 |     # Add labels to the plot list
 69 |     plot_labels = [gt_subactivity[:end_frame], subactivity_lables, (gt_subactivity[:end_frame]-subactivity_lables) == 0]
 70 |     for o in range(obj_num):
 71 |         plot_labels.append(gt_affordance[o, :end_frame])
 72 |         plot_labels.append(affordance_labels[o, :])
 73 |         plot_labels.append((gt_affordance[o, :end_frame]-affordance_labels[o, :]) == 0)
 74 |     plot_segmentation(plot_labels, end_frame)
 75 | 
 76 | 
 77 | def plot_confusion_matrix(cm, classes, filename=None, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
 78 |     """
 79 |     This function prints and plots the confusion matrix.
 80 |     Normalization can be applied by setting `normalize=True`.
 81 |     """
 82 |     if normalize:
 83 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
 84 |         print("Normalized confusion matrix")
 85 |     else:
 86 |         print('Confusion matrix, without normalization')
 87 |     thresh = cm.max() / 2.
 88 | 
 89 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
 90 |     plt.title(title)
 91 |     # plt.colorbar()
 92 |     tick_marks = np.arange(len(classes))
 93 |     plt.xticks(tick_marks, classes, rotation=45, ha='right')
 94 |     plt.yticks(tick_marks, classes)
 95 | 
 96 |     ax = plt.gca()
 97 |     ax.tick_params(axis=u'both', which=u'both', length=0)
 98 |     # matplotlib.rcParams.update({'font.size': 15})
 99 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
100 |         if cm[i, j] != 0:
101 |             plt.text(j, i, '{0:.2f}'.format(cm[i, j]), verticalalignment='center', horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
102 | 
103 |     plt.tight_layout()
104 |     # plt.ylabel('True label')
105 |     # plt.xlabel('Predicted label')
106 |     if not filename:
107 |         plt.show()
108 |     else:
109 |         plt.savefig(filename)
110 |         plt.close()
111 | 
112 | 
113 | def save_results(paths, results):
114 |     result_folder = os.path.join(paths.tmp_root, 'results')
115 |     if not os.path.exists(result_folder):
116 |         os.makedirs(result_folder)
117 |         os.makedirs(os.path.join(result_folder, 'figs'))
118 | 
119 |     with open(os.path.join(result_folder, 'labels.p'), 'wb') as f:
120 |         pickle.dump(results, f)
121 | 
122 | 
123 | def load_results(paths):
124 |     with open(os.path.join(paths.tmp_root, 'results', 'labels.p'), 'rb') as f:
125 |         results = pickle.load(f)
126 |     return results
127 | 
128 | '''''
129 | def print_latex_table(data, row_labels, col_labels):
130 |     data = data * 100
131 |     row_labels = np.array(row_labels)
132 |     row_labels = np.reshape(row_labels, [row_labels.shape[0], 1])
133 |     data = np.hstack((row_labels, data))
134 |     print
135 |     print(tabulate.tabulate(data, tablefmt="latex", floatfmt=".1f", numalign="center", headers=col_labels))
136 | '''''
137 | 
138 | def analyze_results(paths):
139 |     def get_f1_score(precision, recall):
140 |         return 2 * (precision * recall) / (precision + recall)
141 | 
142 |     def format_table(predict_frame):
143 |         data = np.empty((2, 8))
144 |         data[0, 0:3] = 1.0/len(metadata.subactivities[:-1])
145 |         data[0, 3] = get_f1_score(data[0, 0], data[0, 0])
146 |         data[0, 4:7] = 1.0/len(metadata.affordances)
147 |         data[0, 7] = get_f1_score(data[0, 4], data[0, 4])
148 | 
149 |         precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities)-1), average='micro')
150 |         data[1, 0] = precision
151 |         precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities)-1), average='macro')
152 |         data[1, 1] = precision
153 |         data[1, 2] = recall
154 |         data[1, 3] = get_f1_score(precision, recall)
155 | 
156 |         precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances)), average='micro')
157 |         data[1, 4] = precision
158 |         precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances)), average='macro')
159 |         data[1, 5] = precision
160 |         data[1, 6] = recall
161 |         data[1, 7] = get_f1_score(precision, recall)
162 | 
163 |         print_latex_table(data, methods, metrics)
164 | 
165 |     # ====================== Function starts here ======================
166 |     # fig_folder = os.path.join(paths.tmp_root, 'results', 'figs')
167 |     fig_folder = os.path.join(paths.project_root, 'fig', 'raw')
168 |     if not os.path.exists(fig_folder):
169 |         os.makedirs(fig_folder)
170 | 
171 |     seg_gt_s, seg_pred_s, seg_gt_u, seg_pred_u, gt_s, pred_s, gt_u, pred_u, gt_e, pred_e = load_results(paths)
172 | 
173 |     methods = ['chance', 'ours']
174 |     metrics = ['P/R', 'Prec.', 'Recall', 'F1-score', 'P/R', 'Prec.', 'Recall', 'F1-score']
175 |     # Evaluation
176 |     # TODO: see if need to exclude "null" class
177 |     # Online detection
178 |     predict_frame = 0
179 |     format_table(predict_frame)
180 | 
181 |     # Future detection
182 |     predict_frame = 40
183 |     for i in range(predict_frame):
184 |         gt_s[predict_frame].extend(gt_s[i])
185 |         pred_s[predict_frame].extend(pred_s[i])
186 |         gt_u[predict_frame].extend(gt_u[i])
187 |         pred_u[predict_frame].extend(pred_u[i])
188 |     format_table(predict_frame)
189 | 
190 |     # Plot confusion matrices
191 |     predict_frame = 0
192 |     confusion_matrix = sklearn.metrics.confusion_matrix(gt_u[predict_frame], pred_u[predict_frame], labels=range(len(metadata.affordances)))
193 |     plot_confusion_matrix(confusion_matrix, metadata.affordances, normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_affordance.pdf'))
194 | 
195 |     confusion_matrix = sklearn.metrics.confusion_matrix(gt_s[predict_frame], pred_s[predict_frame], labels=range(len(metadata.subactivities) - 1))
196 |     plot_confusion_matrix(confusion_matrix, metadata.subactivities[:-1], normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_subactivity.pdf'))
197 | 
198 |     confusion_matrix = sklearn.metrics.confusion_matrix(gt_e, pred_e, labels=range(len(metadata.activities)))
199 |     plot_confusion_matrix(confusion_matrix, metadata.activities, normalize=True, title='', filename=os.path.join(fig_folder, 'confusion_event.pdf'))
200 | 
201 | 
202 | def main():
203 |     paths = config.Paths()
204 |     analyze_results(paths)
205 |     pass
206 | 
207 | 
208 | if __name__ == '__main__':
209 |     main()
210 | 


--------------------------------------------------------------------------------
/experiments/GEP/gep_seg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 5/21/19
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | 
 11 | # System imports
 12 | import sys
 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src')
 14 | 
 15 | import os
 16 | import argparse
 17 | import json
 18 | from tqdm import tqdm
 19 | 
 20 | # Libraries
 21 | import numpy as np
 22 | import torch
 23 | 
 24 | # Local imports
 25 | from models.BiLSTM import BiLSTM
 26 | from models.LSTM_pred import LSTM_Pred
 27 | import models.parser.GEP_online as GEP
 28 | import models.parser.grammarutils as grammarutils
 29 | import utils.logutils as logutils
 30 | import utils.evalutils as evalutils
 31 | import experiments.exp_config as exp_config
 32 | 
 33 | def predict(detection_outputs, activities, total_lengths, args):
 34 |     detection_outputs_probs = torch.nn.Softmax(dim=-1)(detection_outputs)
 35 |     detection_outputs_probs = detection_outputs_probs.data.cpu().numpy()
 36 |     class_num = detection_outputs_probs.shape[2]
 37 |     pred_probs = np.empty_like(detection_outputs_probs)
 38 |     for batch_i in range(detection_outputs_probs.shape[1]):
 39 |         grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i] + '.pcfg')
 40 |         grammar = grammarutils.read_grammar(grammar_file, index=True)
 41 |         gen_earley_parser = GEP.GeneralizedEarley(grammar, class_num, mapping=args.metadata.action_index)
 42 |         for frame in range(total_lengths[batch_i]):
 43 |             gen_earley_parser.update_prob(detection_outputs_probs[frame, batch_i, :])
 44 |             gen_earley_parser.parse()
 45 |             pred_probs[frame, batch_i, :] = gen_earley_parser.future_predict(args.epsilon)
 46 |     return pred_probs
 47 | 
 48 | def get_gt_pred(labels, total_lengths):
 49 |     all_gt_pred_labels = list()
 50 |     for i_batch in range(labels.size(1)):
 51 |         gt_pred_labels = list()
 52 |         seg_length = int(total_lengths[i_batch])
 53 |         current_label = int(labels[0, i_batch])
 54 |         for f in range(seg_length):
 55 |             if int(labels[f, i_batch]) != current_label:
 56 |                 current_label = int(labels[f, i_batch])
 57 |                 gt_pred_labels.extend([current_label for _ in range(f-len(gt_pred_labels)-1)])
 58 |         gt_pred_labels.extend([int(labels[seg_length-1, i_batch]) for _ in range(seg_length-len(gt_pred_labels))])
 59 |         all_gt_pred_labels.extend(gt_pred_labels)
 60 |     return all_gt_pred_labels
 61 | 
 62 | def validate(data_loader, detection_model, prediction_model, args):
 63 |     all_gt_segment_predictions = list()
 64 |     all_segment_predictions = list()
 65 |     all_nn_segment_predictions = list()
 66 | 
 67 |     task_acc_ratio = logutils.AverageMeter()
 68 |     task_macro_prec = logutils.AverageMeter()
 69 |     task_macro_rec = logutils.AverageMeter()
 70 |     task_macro_f1 = logutils.AverageMeter()
 71 |     task_acc_ratio_nn = logutils.AverageMeter()
 72 | 
 73 |     # switch to evaluate mode
 74 |     detection_model.eval()
 75 |     prediction_model.eval()
 76 | 
 77 |     for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')):
 78 |         features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit
 79 | 
 80 |         prediction_output = prediction_model(features_batch)
 81 |         detection_output = detection_model(features_batch)
 82 | 
 83 |         pred_probs = predict(detection_output, activities, total_lengths, args)
 84 |         pred_labels = np.argmax(pred_probs * prediction_output.data.cpu().numpy(), axis=-1).flatten().tolist()
 85 | 
 86 |         _, nn_pred_labels = torch.max(prediction_output, dim=-1)
 87 |         gt_pred_labels = get_gt_pred(labels_batch, total_lengths)
 88 |         video_length = len(gt_pred_labels)
 89 |         nn_pred_labels = nn_pred_labels.cpu().data.numpy().flatten().tolist()
 90 | 
 91 |         micro_prec = logutils.compute_accuracy(gt_pred_labels, pred_labels)
 92 |         nn_micro_prec = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels)
 93 |         macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels, metric='macro')
 94 |         task_acc_ratio.update(micro_prec, video_length)
 95 |         task_acc_ratio_nn.update(nn_micro_prec, video_length)
 96 |         task_macro_prec.update(macro_prec, video_length)
 97 |         task_macro_rec.update(macro_rec, video_length)
 98 |         task_macro_f1.update(macro_f1, video_length)
 99 | 
100 |         all_gt_segment_predictions.extend(gt_pred_labels)
101 |         all_segment_predictions.extend(pred_labels)
102 |         all_nn_segment_predictions.extend(nn_pred_labels)
103 | 
104 |         tqdm.write('Task {} {} Batch [{}/{}]\t'
105 |                    'Acc {top1.val:.4f} ({top1.avg:.4f})\t'
106 |                    'NN Acc {nn.val:.4f} ({nn.avg:.4f})\t'
107 |                    'Prec {prec.val:.4f} ({prec.avg:.4f})\t'
108 |                    'Recall {recall.val:.4f} ({recall.avg:.4f})\t'
109 |                    'F1 {f1.val:.4f} ({f1.avg:.4f})'.format(
110 |                 args.task, 'test', batch_idx, len(data_loader), top1=task_acc_ratio, nn=task_acc_ratio_nn,
111 |                 prec=task_macro_prec, recall=task_macro_rec, f1=task_macro_f1))
112 | 
113 |     micro_prec = logutils.compute_accuracy(all_gt_segment_predictions, all_segment_predictions)
114 |     nn_micro_prec = logutils.compute_accuracy(all_gt_segment_predictions, all_nn_segment_predictions)
115 |     macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_segment_predictions, all_segment_predictions, metric='weighted')
116 |     tqdm.write('[Evaluation] Micro Prec: {}\t'
117 |                'NN Micro Prec: {}\t'
118 |                'Macro Precision: {}\t'
119 |                'Macro Recall: {}\t'
120 |                'Macro F-score: {}'.format(micro_prec, nn_micro_prec, macro_prec, macro_recall, macro_fscore))
121 | 
122 | def main(args):
123 |     exp_info = exp_config.Experiment(args.dataset)
124 |     paths = exp_info.paths
125 |     args.paths = paths
126 |     args.metadata = exp_info.metadata
127 | 
128 |     np.random.seed(args.seed)
129 |     torch.manual_seed(args.seed)
130 | 
131 |     batch_size = args.batch_size
132 |     args.batch_size = 1
133 |     feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True)
134 |     label_num = exp_info.get_label_num(args)
135 | 
136 |     hidden_size = 256
137 |     hidden_layers = 2
138 |     args.save_path = os.path.join(paths.inter_root, 'likelihood', args.task)
139 |     args.resume = os.path.join(paths.checkpoint_root,
140 |                                'detection_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs,
141 |                                                                                  args.lr, args.batch_size,
142 |                                                                                  args.lr_decay,
143 |                                                                                  1 if not args.subsample else args.subsample,
144 |                                                                                  args.dropout_rate))
145 |     detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num)
146 |     detection_model = torch.nn.DataParallel(detection_model)
147 |     logutils.load_checkpoint(args, detection_model)
148 | 
149 |     args.resume = os.path.join(paths.checkpoint_root,
150 |                                'segment_prediction_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs,
151 |                                                                                              args.lr, args.batch_size,
152 |                                                                                              args.lr_decay,
153 |                                                                                              1 if not args.subsample else args.subsample,
154 |                                                                                              args.dropout_rate))
155 |     prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num)
156 |     prediction_model = torch.nn.DataParallel(prediction_model)
157 |     logutils.load_checkpoint(args, prediction_model)
158 | 
159 |     validate(test_loader, detection_model, prediction_model, args=args)
160 | 
161 | if __name__ == '__main__':
162 |     parser = argparse.ArgumentParser()
163 |     parser.add_argument('--dataset', default='VCLA_GAZE', type=str,
164 |                         help='indicating which dataset to use')
165 |     parser.add_argument('--seed', default=12345, type=int,
166 |                         help='Default seed for all random generators')
167 |     parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool,
168 |                         help='Option flag for using cuda trining (default: True)')
169 |     parser.add_argument('--workers', default=1, type=int, metavar='N',
170 |                         help='number of data loading workers (default: 1)')
171 |     parser.add_argument('--task', default='activity', type=str,
172 |                         help='Default working task activity/affordance')
173 |     parser.add_argument('--epochs', default=50, type=int, metavar='N',
174 |                         help='number of epochs for training (default: 100)')
175 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N',
176 |                         help='batch size for training (default: 1)')
177 |     parser.add_argument('--lr', default=1e-4, type=float,
178 |                         help='learning rate for the feature extraction process (default: 1e-3)')
179 |     parser.add_argument('--lr_decay', default=1., type=float,
180 |                         help='decay rate of learning rate (default: between 0.01 and 1)')
181 |     parser.add_argument('--lr_freq', default=25, type=float,
182 |                         help='learing rate decay frequency while updating')
183 |     parser.add_argument('--subsample', default=None, type=int,
184 |                         help='subsample frequency for Breakfast dataset')
185 |     parser.add_argument('--dropout_rate', default=0, type=float,
186 |                         help='Dropout rate for LSTM training')
187 |     parser.add_argument('--epsilon', default=1e-10, type=float,
188 |                         help='Balance between top-down bottom-up prediction')
189 |     args = parser.parse_args()
190 |     main(args)
191 | 


--------------------------------------------------------------------------------
/experiments/GEP/gep_pred_parse_prediction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 5/21/19
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | 
 11 | # System imports
 12 | import sys
 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src')
 14 | 
 15 | import os
 16 | import argparse
 17 | import json
 18 | from tqdm import tqdm
 19 | 
 20 | # Libraries
 21 | import numpy as np
 22 | import torch
 23 | 
 24 | # Local imports
 25 | from models.LSTM_pred import LSTM_Pred
 26 | from models.BiLSTM import BiLSTM
 27 | from models.MLP import MLP
 28 | import models.parser.GEP_old as GEP
 29 | import models.parser.grammarutils as grammarutils
 30 | import utils.logutils as logutils
 31 | import experiments.exp_config as exp_config
 32 | 
 33 | def inference(prob_mat, activity, sequence_id, args):
 34 |     grammar_file = os.path.join(args.paths.grammar_root, activity+'.pcfg')
 35 |     grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.subactivity_index)
 36 |     gen_earley_parser = GEP.GeneralizedEarley(grammar)
 37 |     best_string, prob = gen_earley_parser.parse(prob_mat)
 38 |     # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob)))
 39 | 
 40 |     # Back trace to get labels of the entire sequence
 41 |     earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels()
 42 |     nn_pred_labels = np.argmax(prob_mat, axis=1)
 43 |     return nn_pred_labels, earley_pred_labels, tokens, seg_pos
 44 | 
 45 | def predict():
 46 |     return
 47 | 
 48 | def validate(data_loader, detection_model, prediction_model, args):
 49 |     all_gt_frame_predictions = list()
 50 |     all_frame_predictions = list()
 51 |     all_nn_frame_predictions = list()
 52 | 
 53 |     task_acc_ratio = logutils.AverageMeter()
 54 |     task_acc_ratio_nn = logutils.AverageMeter()
 55 | 
 56 |     # switch to evaluate mode
 57 |     detection_model.eval()
 58 |     prediction_model.eval()
 59 | 
 60 |     for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')):
 61 |         features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit
 62 |         detection_likelihood = torch.nn.Softmax(dim=-1)(detection_model(features_batch)).data.cpu().numpy()
 63 | 
 64 |         padding = features_batch[0, :, :].repeat(args.using_pred_duration - 1, 1, 1)
 65 |         prediction_features = torch.cat((padding, features_batch), dim=0)
 66 |         prediction_output = prediction_model(prediction_features)
 67 |         prediction_likelihood = torch.nn.Softmax(dim=-1)(prediction_output).data.cpu().numpy()
 68 | 
 69 |         for batch_i in range(features_batch.size(1)):
 70 |             _, pred_labels = torch.max(prediction_output[:total_lengths[batch_i] - 1, batch_i, :], dim=-1)
 71 |             prediction_likelihood = prediction_likelihood[:total_lengths[batch_i] - 1, batch_i, :]
 72 | 
 73 |             skip_size = args.using_pred_duration - args.pred_duration
 74 | 
 75 |             # for frame in range(0, total_lengths[batch_i]-1, skip_size):
 76 |             for frame in range(0, total_lengths[batch_i] - args.using_pred_duration, skip_size):
 77 |                 det = detection_likelihood[:frame + 1, batch_i, :]
 78 |                 # det = detection_likelihood[:frame+1+args.using_pred_duration, batch_i, :]
 79 |                 gt_det = torch.zeros(det.shape)
 80 |                 gt_det.scatter_(1, labels_batch[:frame+1,batch_i].unsqueeze(1), 1)
 81 |                 gt_det = gt_det * 0.95 + (0.05/10) * torch.ones(det.shape)
 82 |                 gt_det = gt_det.numpy()
 83 | 
 84 |                 pred = prediction_likelihood[frame:frame+args.using_pred_duration, :]
 85 |                 prob_mat = np.concatenate((det, pred), axis=0)
 86 |                 pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(prob_mat, activities[batch_i],
 87 |                                                                                                sequence_ids[batch_i], args)
 88 | 
 89 |                 # Testing
 90 |                 gep_predictions = batch_earley_pred_labels[frame+1:frame+args.using_pred_duration+1]
 91 |                 all_frame_predictions.extend(gep_predictions)
 92 |                 nn_frame_predictions = pred_labels[frame+1:frame+args.using_pred_duration+1]
 93 |                 all_nn_frame_predictions.extend(nn_frame_predictions)
 94 |                 gt_frame_predictions = labels_batch[frame+1:frame + args.using_pred_duration + 1,
 95 |                                        batch_i].cpu().numpy().tolist()
 96 |                 all_gt_frame_predictions.extend(gt_frame_predictions)
 97 | 
 98 |                 video_length = len(gt_frame_predictions)
 99 |                 micro_prec_nn = logutils.compute_accuracy(gt_frame_predictions, nn_frame_predictions)
100 |                 task_acc_ratio_nn.update(micro_prec_nn, video_length)
101 | 
102 |                 continue
103 |             micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_frame_predictions)
104 |             nn_mirco_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions)
105 |             macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_frame_predictions,
106 |                                                                                all_frame_predictions,
107 |                                                                                metric='macro')
108 |             tqdm.write('[Evaluation] Micro Prec: {}\t'
109 |                        'NN Precision: {}\t'
110 |                        'Macro Precision: {}\t'
111 |                        'Macro Recall: {}\t'
112 |                        'Macro F-score: {}'.format(micro_prec, nn_mirco_prec, macro_prec, macro_recall, macro_fscore))
113 | 
114 | 
115 | def main(args):
116 |     exp_info = exp_config.Experiment(args.dataset)
117 |     paths = exp_info.paths
118 |     args.paths = paths
119 |     args.metadata = exp_info.metadata
120 | 
121 |     np.random.seed(args.seed)
122 |     torch.manual_seed(args.seed)
123 | 
124 |     batch_size = args.batch_size
125 |     args.batch_size = 1
126 |     feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True)
127 |     label_num = exp_info.get_label_num(args)
128 | 
129 |     hidden_size = 256
130 |     hidden_layers = 2
131 | 
132 |     args.resume = os.path.join(paths.checkpoint_root, 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.model, args.epochs,
133 |                                                                       args.lr, args.batch_size, args.lr_decay,
134 |                                                                           1 if not args.subsample else args.subsample,
135 |                                                                         args.dropout_rate))
136 |     if args.model == 'lstm':
137 |         detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num)
138 |     else:
139 |         detection_model = MLP(feature_size, hidden_size, label_num)
140 |     detection_model = torch.nn.DataParallel(detection_model)
141 |     logutils.load_checkpoint(args, detection_model)
142 | 
143 |     args.resume = os.path.join(paths.checkpoint_root,
144 |                                'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(args.task, args.model, args.epochs,
145 |                                                                                              args.lr, args.batch_size,
146 |                                                                                              args.lr_decay,
147 |                                                                                              1 if not args.subsample else args.subsample,
148 |                                                                                              args.dropout_rate,
149 |                                                                                              args.using_pred_duration))
150 |     if args.model == 'lstm':
151 |         prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num)
152 |     else:
153 |         prediction_model = MLP(feature_size, hidden_size, label_num)
154 |     prediction_model = torch.nn.DataParallel(prediction_model)
155 |     logutils.load_checkpoint(args, prediction_model)
156 | 
157 |     validate(test_loader, detection_model, prediction_model, args=args)
158 | 
159 | if __name__ == '__main__':
160 |     parser = argparse.ArgumentParser()
161 |     parser.add_argument('--dataset', default='CAD', type=str,
162 |                         help='indicating which dataset to use')
163 |     parser.add_argument('--model', default='lstm', type=str,
164 |                         help='Model for classification (default: LSTM)')
165 |     parser.add_argument('--seed', default=12345, type=int,
166 |                         help='Default seed for all random generators')
167 |     parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool,
168 |                         help='Option flag for using cuda trining (default: True)')
169 |     parser.add_argument('--workers', default=1, type=int, metavar='N',
170 |                         help='number of data loading workers (default: 1)')
171 |     parser.add_argument('--task', default='activity', type=str,
172 |                         help='Default working task activity/affordance')
173 |     parser.add_argument('--epochs', default=100, type=int, metavar='N',
174 |                         help='number of epochs for training (default: 100)')
175 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N',
176 |                         help='batch size for training (default: 1)')
177 |     parser.add_argument('--lr', default=1e-4, type=float,
178 |                         help='learning rate for the feature extraction process (default: 1e-3)')
179 |     parser.add_argument('--lr_decay', default=1,
180 |                         help='decay rate of learning rate (default: between 0.01 and 1)')
181 |     parser.add_argument('--lr_freq', default=25, type=float,
182 |                         help='learing rate decay frequency while updating')
183 |     parser.add_argument('--subsample', default=None, type=int,
184 |                         help='subsample frequency for Breakfast dataset')
185 |     parser.add_argument('--dropout_rate', default=0, type=float,
186 |                         help='Dropout rate for LSTM training')
187 |     parser.add_argument('--pred_duration', default=45, type=int,
188 |                         help='length of frame prediction')
189 |     parser.add_argument('--using_pred_duration', default=55, type=int,
190 |                         help='Using model that is trained to predict')
191 |     args = parser.parse_args()
192 |     main(args)
193 | 


--------------------------------------------------------------------------------
/datasets/CAD/dataparser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Feb 17, 2017
  3 | 
  4 | @author: Siyuan Qi
  5 | 
  6 | Description of the file.
  7 | 
  8 | """
  9 | 
 10 | import os
 11 | import time
 12 | import fnmatch
 13 | import pickle
 14 | import json
 15 | 
 16 | import numpy as np
 17 | import cv2
 18 | 
 19 | # Local imports
 20 | import datasets.CAD.cad_config as config
 21 | from datasets.CAD.metadata import CAD_METADATA
 22 | from models import parsegraph
 23 | metadata = CAD_METADATA()
 24 | 
 25 | 
 26 | def save_activity_corpus(paths, activity_corpus):
 27 |     if not os.path.exists(os.path.join(paths.tmp_root, 'corpus')):
 28 |         os.makedirs(os.path.join(paths.tmp_root, 'corpus'))
 29 | 
 30 |     for event, tpgs in activity_corpus.items():
 31 |         corpus_filename = os.path.join(paths.tmp_root, 'corpus', event+'.txt')
 32 |         with open(corpus_filename, 'w') as f:
 33 |             for tpg in tpgs:
 34 |                 f.write(str(tpg)+'\n')
 35 | 
 36 | 
 37 | def save_action_gt(paths, skeletons, skeleton_labels):
 38 |     action_gt = dict()
 39 |     for s in skeletons:
 40 |         assert skeletons[s].shape[0] == len(skeleton_labels[s])
 41 |         skeletons[s] = skeletons[s].tolist()
 42 |     action_gt['skeletons'] = skeletons
 43 |     action_gt['skeleton_labels'] = skeleton_labels
 44 | 
 45 |     with open(os.path.join(paths.tmp_root, 'action.json'), 'w') as f:
 46 |         json.dump(action_gt, f, indent=4, separators=(',', ': '))
 47 | 
 48 | 
 49 | def get_position_indices():
 50 |     start = 1
 51 |     position_indices = list()
 52 |     for i in range(11):
 53 |         position_indices.extend(range(start+i*14+10, start+i*14+13))
 54 |     start += 11*14
 55 |     for i in range(4):
 56 |         position_indices.extend(range(start+i*4, start+i*4+3))
 57 |     return position_indices
 58 | 
 59 | 
 60 | def get_left_handed_indices():
 61 |     flipped_joint_indices = [0, 1, 2, 5, 6, 3, 4, 9, 10, 7, 8, 12, 11, 14, 13]
 62 |     left_handed_indices = list()
 63 |     for joint in flipped_joint_indices:
 64 |         left_handed_indices.extend([3*joint, 3*joint+1, 3*joint+2])
 65 |     return left_handed_indices
 66 | 
 67 | 
 68 | def get_skeletons(skeletons, eventdir, sequence_ids, left_handed=False):
 69 |     position_indices = get_position_indices()
 70 |     left_handed_indices = get_left_handed_indices()
 71 |     x_indices = [i*3 for i in range(15)]
 72 |     for sequence_id in sequence_ids:
 73 |         raw_skeleton_data = np.genfromtxt(os.path.join(eventdir, sequence_id+'.txt'), delimiter=',', skip_footer=1, usecols=range(171))
 74 |         assert raw_skeleton_data[-1, 0] == raw_skeleton_data.shape[0]
 75 |         joint_positions = raw_skeleton_data[:, position_indices]/1000.0
 76 |         if left_handed:
 77 |             joint_positions[:, x_indices] = -joint_positions[:, x_indices]
 78 |             joint_positions = joint_positions[:, left_handed_indices]
 79 |         skeletons[sequence_id] = joint_positions
 80 | 
 81 |     return skeletons
 82 | 
 83 | 
 84 | def get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed=False):
 85 |     # Intrinsic camera parameters
 86 |     fx = 525.0  # focal length x
 87 |     fy = 525.0  # focal length y
 88 |     cx = 319.5  # optical center x
 89 |     cy = 239.5  # optical center y
 90 |     z_scale = 12.5
 91 | 
 92 |     for sequence_id in sequence_ids:
 93 |         print('get_obj_positions', sequence_id)
 94 |         obj_positions[sequence_id] = list()
 95 |         for filename in sorted(os.listdir(eventdir)):
 96 |             if fnmatch.fnmatch(filename, '{}_obj*.txt'.format(sequence_id)):
 97 |                 print(filename)
 98 |                 position_sequence = list()
 99 | 
100 |                 with open(os.path.join(eventdir, filename)) as f:
101 |                     last_image_bbx = None
102 |                     for line in f:
103 |                         line = line.split(',')
104 |                         frame = line[0]
105 |                         # if not os.path.exists(os.path.join(eventdir.replace('annotations', 'rgbd_images'), sequence_id, 'Depth_{}.png'.format(frame))):
106 |                         #     exit(1)
107 |                         depth = cv2.imread(os.path.join(eventdir.replace('annotations', 'rgbd_images'), sequence_id, 'Depth_{}.png'.format(frame)), -1)
108 |                         depth = depth.astype(float) / z_scale
109 | 
110 |                         image_bbx = [int(c) for c in line[2:6]]
111 |                         if not (0 < image_bbx[0] < 640 and 0 < image_bbx[2] < 640 and 0 < image_bbx[1] < 480 and 0 < image_bbx[3] < 480):
112 |                             if last_image_bbx:
113 |                                 image_bbx = last_image_bbx
114 |                             else:
115 |                                 continue
116 |                         else:
117 |                             last_image_bbx = image_bbx
118 |                         # image_bbx = [0, 0, depth.shape[1], depth.shape[0]]
119 | 
120 |                         step = 10
121 |                         # # If the object is invisible, this will be an empty array
122 |                         positions = np.empty((len(range(image_bbx[0], image_bbx[2], step))*len(range(image_bbx[1], image_bbx[3], step)), 3))
123 |                         pt_count = 0
124 |                         for u in range(image_bbx[0], image_bbx[2], step):
125 |                             for v in range(image_bbx[1], image_bbx[3], step):
126 |                                 z = depth[v, u]
127 |                                 if z == 0:
128 |                                     continue
129 |                                 if not left_handed:
130 |                                     x = (u - cx) * z / fx
131 |                                 else:
132 |                                     x = -(u - cx) * z / fx
133 |                                 y = -(v - cy) * z / fy  # Note: need to flip y to align depth with skeleton
134 | 
135 |                                 positions[pt_count, :] = np.array((x, y, z))
136 |                                 pt_count += 1
137 |                         positions = positions[:pt_count, :]
138 |                         position_sequence.append(positions)
139 |                         # if np.isnan(np.mean(positions, axis=0)[0]):
140 |                         #     print image_bbx, pt_count, positions.shape, positions
141 |                         #     exit(1)
142 |                         # position_sequence.append(np.mean(positions, 0))
143 |                         # position_sequence.append(np.ones((10, 3)))
144 | 
145 |                 obj_positions[sequence_id].append(position_sequence)
146 | 
147 |     return obj_positions
148 | 
149 | 
150 | def parse_data(paths):
151 |     if os.path.exists(os.path.join(paths.tmp_root, 'activity_corpus.p')):
152 |         activity_corpus = pickle.load(open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'rb'))
153 |     else:
154 |         activity_corpus = dict()
155 |         label_list = dict()
156 |         skeletons = dict()
157 |         skeleton_labels = dict()
158 |         obj_positions = dict()
159 |         for datadir in os.listdir(os.path.join(paths.data_root)):
160 |             datadir = os.path.join(paths.data_root, datadir)
161 |             if os.path.isdir(datadir) and datadir.endswith('annotations'):
162 |                 subject = os.path.split(datadir)[1].strip('_annotations')
163 |                 print(subject)
164 |                 left_handed = subject == 'Subject3'
165 |                 for event in os.listdir(datadir):
166 |                     # if event != 'stacking_objects':
167 |                     #     continue
168 |                     if event not in activity_corpus:
169 |                         activity_corpus[event] = list()
170 |                     eventdir = os.path.join(datadir, event)
171 | 
172 |                     sequence_objects = dict()
173 |                     sequence_ids = list()
174 |                     with open(os.path.join(eventdir, 'activityLabel.txt')) as f:
175 |                         for line in f:
176 |                             activity_labels = line.strip(',\n').split(',')
177 |                             sequence_ids.append(activity_labels[0])
178 |                             activity_corpus[event].append(parsegraph.TParseGraph(event, activity_labels[0], subject))
179 |                             sequence_objects[activity_labels[0]] = [o.split(':')[-1] for o in activity_labels[3:]]
180 | 
181 |                     get_skeletons(skeletons, eventdir, sequence_ids, left_handed)
182 |                     get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed)
183 |                     get_obj_positions(obj_positions, eventdir, sequence_ids, left_handed)
184 | 
185 |                     # Parse data into spatial-temporal parse graphs
186 |                     with open(os.path.join(eventdir, 'labeling.txt')) as f:
187 |                         rel_idx = 0
188 |                         for line in f:
189 |                             sequence_labeling = line.strip().split(',')
190 |                             sequence_id = sequence_labeling[0]
191 |                             tpg = next(tpg for tpg in activity_corpus[event] if tpg.id == sequence_id)
192 |                             start_frame = int(sequence_labeling[1])
193 |                             end_frame = int(sequence_labeling[2])
194 |                             subactivity = sequence_labeling[3]
195 |                             affordance_labels = sequence_labeling[4:]
196 | 
197 |                             # Create ground truth for action recognition
198 |                             if sequence_id not in skeleton_labels:
199 |                                 skeleton_labels[sequence_id] = ['null' for _ in range(skeletons[sequence_id].shape[0])]
200 | 
201 |                             for frame in range(start_frame-1, end_frame):
202 |                                 if frame >= skeletons[sequence_id].shape[0]:
203 |                                     break
204 |                                 skeleton_labels[sequence_id][frame] = subactivity
205 | 
206 |                             # Create ground truth ST-pgs
207 |                             label_list[sequence_id + '$' + str(rel_idx)] = dict()
208 |                             label_list[sequence_id + '$' + str(rel_idx)]['activity'] = metadata.action_index[subactivity]
209 |                             label_list[sequence_id + '$' + str(rel_idx)]['affordance'] = affordance_labels
210 |                             rel_idx += 1
211 |                             spg = parsegraph.SParseGraph(start_frame - 1, end_frame - 1, subactivity, subactivity, sequence_objects.get(sequence_id), affordance_labels)
212 |                             spg.set_skeletons(skeletons[sequence_id][start_frame - 1:end_frame, :])
213 |                             spg.set_obj_positions(obj_positions[sequence_id])
214 |                             tpg.append_terminal(spg)
215 | 
216 |         pickle.dump(label_list, open(os.path.join(paths.tmp_root, 'label_list.p'), 'wb'))
217 |         pickle.dump(activity_corpus, open(os.path.join(paths.tmp_root, 'activity_corpus.p'), 'wb'))
218 |         pickle.dump(skeletons, open(os.path.join(paths.tmp_root, 'skeletons.p'), 'wb'))
219 |         pickle.dump(obj_positions, open(os.path.join(paths.tmp_root, 'obj_positions.p'), 'wb'))
220 | 
221 | 
222 | def main():
223 |     paths = config.Paths()
224 |     start_time = time.time()
225 | 
226 |     parse_data(paths)
227 | 
228 |     print('Time elapsed: {}'.format(time.time() - start_time))
229 | 
230 | 
231 | if __name__ == '__main__':
232 |     main()
233 | 


--------------------------------------------------------------------------------
/datasets/helmert.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 10/30/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | import numpy as np
 10 | 
 11 | def helmert_affine_3d(datum1, datum2):
 12 |     '''
 13 |     :param datum1: n*3 matrix
 14 |     :param datum2: n*3 matrix
 15 |     :return: 
 16 |     '''
 17 |     s1 = datum1.shape
 18 |     s2 = datum2.shape
 19 |     N = s1.shape[0]
 20 | 
 21 |     G = np.zeros((3 * N, 12), dtype=np.float)
 22 |     E1 = np.ones((N, 1), dtype=np.float)
 23 |     Z1 = np.zeros((N, 1), dtype=np.float)
 24 |     z3 = np.zeros(N, dtype=np.float)
 25 |     G
 26 | 
 27 | def helmert_3d(datum1, datum2, type='7p', without_scale=0, approx = np.zeros(3, dtype=np.float)):
 28 |     '''
 29 |     :param datum1: n*3 matrix
 30 |     :param datum2: n*3 matrix
 31 |     :param type: '7p' or '10p'
 32 |     :param without_scale: 0
 33 |     :param approx: (3,) vector
 34 |     :return:
 35 |     '''
 36 |     sof = 1
 37 |     # Check parameter validity
 38 |     assert(len(datum1.shape) == 2 and len(datum2.shape) == 2, 'datum1 and datum2 should be matrices')
 39 |     assert(len(approx.shape) == 2 and approx.shape[1] == 3, 'the approx vector should be 1 * 3')
 40 |     assert(isinstance(type, str), 'type parameter should be a string')
 41 |     if type is '7p':
 42 |         rc = np.zeros(3, dtype=np.float)
 43 |     else:
 44 |         # Case '10p'
 45 |         rc = np.mean(datum1, axis=0)
 46 | 
 47 |     # Check data validity
 48 |     N1 = datum1.shape[0]
 49 |     N2 = datum2.shape[0]
 50 |     N = N1
 51 |     assert(N1 == N2, 'datum1 and datum2 should have the same dimension')
 52 |     assert(datum1.shape[1] == 3 and datum2.shape[1] == 3, 'both datum matrix should be of N*3')
 53 | 
 54 |     # naeh should be (7,) vector, set the naeh vector
 55 |     naeh = np.concatenate((np.zeros(3, dtype=np.float), approx, np.array([1], dtype=np.float)))
 56 |     if np.array_equal(approx, np.zeros(3,dtype=np.float)) and N > 3:
 57 |         # TODO: add helmert_affine_3d transformation and debug
 58 |         pass
 59 |     if without_scale != 0:
 60 |         naeh[6] = without_scale
 61 | 
 62 |     wert_A = np.array([1e-8, 1e-8])
 63 |     zaehl = 0
 64 |     x0 = naeh[0]
 65 |     y0 = naeh[1]
 66 |     z0 = naeh[2]
 67 |     ex = naeh[3]
 68 |     ey = naeh[4]
 69 |     ez = naeh[5]
 70 |     m = naeh[6]
 71 |     tp = np.array([x0, y0, z0, ex, ey, ez, m])
 72 |     qbb = np.eye(3 * N)
 73 |     while True:
 74 |         A = np.zeros((3 * N, 7), dtype=np.float)
 75 |         w = np.zeros((3 * N, 1), dtype = np.float)
 76 |         for i in range(N):
 77 |             A[i * 3][0] = -1
 78 |             A[i * 3 + 1][1] = -1
 79 |             A[i * 3 + 2][2] = -1
 80 |             A[i * 3][3]= -m * ((np.cos(ex) * np.sin(ey) * np.cos(ez) - np.sin(ex) * np.sin(ez)) *(datum1[i][1] - rc[1])
 81 |                                + (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex) * np.sin(ey)) *(datum1[i][2] - rc[2]))
 82 |             A[i * 3][4] = -m * ((-np.sin(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) +
 83 |                                 (np.sin(ex) * np.cos(ey) * np.cos(ez)) * (datum1[i][1] - rc[1]) +
 84 |                                 (-np.cos(ex) * np.cos(ey) * np.cos(ez)) * (datum1[i][2] - rc[3]))
 85 |             A[i * 3][5] = -m * ((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) +
 86 |                                 (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1]-rc[2]) +
 87 |                                 (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex)* np.cos(ex)) * (datum1[i][2]-rc[3]))
 88 |             A[i * 3][6] = -((np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) +
 89 |                             (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex) * np.sin(ez)) * (datum1[i][1] - rc[1]) +
 90 |                             (-np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2]))
 91 |             A[i * 3 + 1][3] = -m * ((-np.cos(ex) * np.sin(ey) * np.sin(ez) - np.sin(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) +
 92 |                                     (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][2] - rc[2]))
 93 |             A[i * 3 + 1][4] = -m * ((np.sin(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) +
 94 |                                     (-np.sin(ex) * np.cos(ey) * np.sin(ez)) * (datum1[i][1] - rc[1]) +
 95 |                                     (np.cos(ex) * np.cos(ey) * np.sin(ez)) * (datum1[i][2] - rc[2]))
 96 |             A[i * 3 + 1][5] = -m * ((-np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) +
 97 |                                     (-np.sin(ex) * np.sin(ey) * np.cos(ez) - np.cos(ex) * np.sin(ez)) * (datum1[i][1] - rc[1]) +
 98 |                                     (np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2]))
 99 |             A[i * 3 + 1][6] = -((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[1]) +
100 |                                 (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) +
101 |                                 (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex) * np.cos(ez)) * (datum1[i][2] - rc[2]))
102 |             A[i * 3 + 2][3] = -m * ((-np.cos(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) +
103 |                                     (-np.sin(ex) * np.cos(ey)) * (datum1[i][2] - rc[2]))
104 |             A[i * 3 + 2][4] = -m * ((np.cos(ey)) * (datum1[i][0] - rc[0]) +
105 |                                     (np.sin(ex) * np.sin(ey)) * (datum1[i][1] - rc[1]) +
106 |                                     (-np.cos(ex) * np.sin(ey)) * (datum1[i][2] - rc[2]))
107 |             A[i * 3 + 2][5] = 0
108 |             A[i * 3 + 2][6] = -((np.sin(ey)) * (datum1[i][0] - rc[0]) +
109 |                                 (-np.sin(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) +
110 |                                 (np.cos(ex) * np.cos(ey)) * (datum1[i][2] - rc[2]))
111 |             w[i * 3][0] = -rc[0] + datum2[i][0]- x0 - m * ((np.cos(ey) * np.cos(ez)) * (datum1[i][0] - rc[0]) +
112 |                                                         (np.sin(ex) * np.sin(ey) * np.cos(ez) + np.cos(ex)* np.sin(ez)) * (datum1[i][1] - rc[1]) +
113 |                                                         (-np.cos(ex) * np.sin(ey) * np.cos(ez) + np.sin(ex) * np.sin(ez)) * (datum1[i][2] - rc[2]))
114 |             w[i * 3 + 1][0] = -rc[1] + datum2[i][1] - y0 - m * ((-np.cos(ey) * np.sin(ez)) * (datum1[i][0] - rc[0]) +
115 |                                                                 (-np.sin(ex) * np.sin(ey) * np.sin(ez) + np.cos(ex) * np.cos(ez)) * (datum1[i][1] - rc[1]) +
116 |                                                                 (np.cos(ex) * np.sin(ey) * np.sin(ez) + np.sin(ex) * np.cos(ez)) * (datum1[i][2] - rc[2]))
117 |             w[i * 3 + 2][0] = -rc[2] + datum2[i][2] - z0 - m * ((np.sin(ey))*(datum1[i][0] - rc[0]) +
118 |                                                                 (-np.sin(ex) * np.cos(ey)) * (datum1[i][1] - rc[1]) +
119 |                                                                 (np.cos(ex) * np.cos(ey)) * (datum1[i][2] - rc[2]))
120 |         if without_scale != 0:
121 |             A = A[:, : -1]
122 | 
123 |         w = -1. * w
124 |         r = A.shape[0] - A.shape[1]
125 |         pbb = np.linalg.inv(qbb)
126 |         quadra_A = np.matmul(np.matmul(A.T, pbb), A)
127 |         inv_quadra_A = np.linalg.inv(quadra_A)
128 |         delta_x = np.matmul(inv_quadra_A, np.matmul(np.matmul(A.T, pbb), w))
129 |         v = np.matmul(A, delta_x) - w
130 |         quadra_v = np.matmul(np.matmul(v.T, pbb), v)
131 |         sig0p = np.sqrt(quadra_v / r)
132 |         qxxda = inv_quadra_A
133 |         kxxda = sig0p ** 2 * qxxda
134 |         ac = np.sqrt(np.diag(kxxda))
135 | 
136 |         delta_x = delta_x.reshape((-1, ))   # reshape to row vector
137 |         testv = np.sqrt((delta_x[0] ** 2 + delta_x[1] ** 2 + delta_x[2] ** 2) / 3.)
138 |         testd = np.sqrt((delta_x[3] ** 2 + delta_x[4] ** 2 + delta_x[5] ** 2) / 3.)
139 |         zaehl = zaehl + 1
140 |         x0 = x0 + delta_x[0]
141 |         y0 = y0 + delta_x[1]
142 |         z0 = z0 + delta_x[2]
143 |         ex = ex + delta_x[3]
144 |         ey = ey + delta_x[4]
145 |         ez = ez + delta_x[5]
146 |         if without_scale == 0 and (m + delta_x[6]) > 1e-15: # This condition is to prevent numerical problems with m-->0
147 |             m = m + delta_x[6]
148 |         tp = np.array([x0, y0, z0, ex, ey, ez, m])
149 |         if abs(testv) < wert_A[0] and abs(testd) < wert_A[1]:
150 |             break
151 |         elif zaehl > 1000:
152 |             sof = 0
153 |             print('Iteration Limit Warning: Calculation not converging after 1000 iterations. I am aborting. Results may be inaccurate.')
154 |             break
155 | 
156 |     if len(np.argwhere(np.abs(tp[3:6]) > 2 * np.pi)) > 0:
157 |         print('Approximate Accuracy Warning: Rotation angles seem to be big. A better approximation is regarded. Results will be inaccurate.')
158 | 
159 |     idz = np.zeros_like(datum1)
160 |     for i in range(N):
161 |         idz[i][1] = rc[1] + tp[1] + tp[6] * ((-np.cos(tp[4]) * np.sin(tp[5])) * (datum1[i][0] - rc[0]) +
162 |                                              (-np.sin(tp[3]) * np.sin(tp[4]) * np.sin(tp[5]) + np.cos(tp[3]) * np.cos(tp[5])) * (datum1[i][1] - rc[1]) +
163 |                                              (np.cos(tp[3]) * np.sin(tp[4]) * np.sin(tp[5]) + np.sin(tp[3]) * np.cos(tp[5]))*(datum1[i][2] - rc[2]))
164 |         idz[i][0] = rc[0] + tp[0] + tp[6] * ((np.cos(tp[4]) * np.cos(tp[5])) * (datum1[i][0] - rc[0]) +
165 |                                              (np.sin(tp[3]) * np.sin(tp[4]) * np.cos(tp[5]) + np.cos(tp[3]) * np.sin(tp[5])) *(datum1[i][1] - rc[1]) +
166 |                                              (-np.cos(tp[3]) * np.sin(tp[4]) * np.cos(tp[5]) + np.sin(tp[3]) * np.sin(tp[5])) * (datum1[i][2] - rc[2]))
167 |         idz[i][2] = rc[2] + tp[2] + tp[6] * ((np.sin(tp[4])) * (datum1[i][0] - rc[0]) +
168 |                                              (-np.sin(tp[3]) * np.cos(tp[4])) * (datum1[i][1] - rc[1]) +
169 |                                              (np.cos(tp[3]) * np.cos(tp[4])) * (datum1[i][2] - rc[2]))
170 |     tr = datum2 - idz
171 |     return tp, rc, ac, tr, sof
172 | 
173 | def helmert_2d():
174 |     pass
175 | 
176 | def test():
177 |     cases = [
178 |                 (
179 |                     np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000],
180 |                            [0.140380900000000, 0.314954300000000, 1.89607300000000],
181 |                            [-0.153808100000000, -0.135794000000000, 1.85765100000000],
182 |                            [0.0416980000000000, -0.239627400000000, 1.69971600000000]]),
183 |                     np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000],
184 |                            [0.140380900000000, 0.314954300000000, 1.89607300000000],
185 |                            [-0.153808100000000, -0.135794000000000, 1.85765100000000],
186 |                            [0.0416980000000000, -0.239627400000000, 1.69971600000000]])
187 |                 ),
188 |                 (
189 |                     np.array([[0.0343117800000000, 0.219011300000000, 1.65202900000000],
190 |                               [0.144150100000000, 0.265266300000000, 1.86847700000000],
191 |                               [-0.174984600000000, -0.176452300000000, 1.83610600000000],
192 |                               [0.0246211000000000, -0.278663500000000, 1.68597900000000]]),
193 |                     np.array([[0.0304347500000000, 0.271670000000000, 1.67570700000000],
194 |                               [0.140380900000000, 0.314954300000000, 1.89607300000000],
195 |                               [-0.153808100000000, -0.135794000000000, 1.85765100000000],
196 |                               [0.0416980000000000, -0.239627400000000, 1.69971600000000]])
197 |                 )
198 |             ]
199 |     for case in cases:
200 |         output = helmert_3d(case[0], case[1])
201 | if __name__ == '__main__':
202 |     test()


--------------------------------------------------------------------------------
/experiments/GEP/gep.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 12/11/18
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | # System imports
 11 | import sys
 12 | sys.path.append('/mnt/hdd/home/baoxiong/Projects/TPAMI2019/src')
 13 | 
 14 | import os
 15 | import argparse
 16 | import json
 17 | from tqdm import tqdm
 18 | 
 19 | # Libraries
 20 | import numpy as np
 21 | import torch
 22 | 
 23 | # Local imports
 24 | import models.BiLSTM as lstm_model
 25 | import models.MLP as mlp_model
 26 | import models.parser.GEP_adj as GEP
 27 | import models.parser.grammarutils as grammarutils
 28 | import utils.logutils as logutils
 29 | import utils.evalutils as evalutils
 30 | import utils.vizutils as vizutils
 31 | import experiments.exp_config as exp_config
 32 | 
 33 | def inference(model_outputs, activities, sequence_ids, args):
 34 |     model_output_probs = torch.nn.Softmax(dim=-1)(model_outputs)
 35 |     model_output_probs = model_output_probs.data.cpu().numpy()
 36 |     batch_earley_pred_labels = list()
 37 |     batch_tokens = list()
 38 |     batch_seg_pos = list()
 39 |     for batch_i in range(model_outputs.size()[1]):
 40 |         grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i]+'.pcfg')
 41 |         grammar = grammarutils.read_grammar(grammar_file, index=True, mapping=args.metadata.action_index)
 42 |         gen_earley_parser = GEP.GeneralizedEarley(grammar)
 43 |         best_string, prob = gen_earley_parser.parse(model_output_probs[:, batch_i, :])
 44 |         # print([int(s) for s in best_string.split()], "{:.2e}".format(decimal.Decimal(prob)))
 45 | 
 46 |         # Back trace to get labels of the entire sequence
 47 |         earley_pred_labels, tokens, seg_pos = gen_earley_parser.compute_labels()
 48 |         batch_earley_pred_labels.append(earley_pred_labels)
 49 |         batch_tokens.append(tokens)
 50 |         batch_seg_pos.append(seg_pos)
 51 | 
 52 |     _, nn_pred_labels = torch.max(model_outputs, dim=2)
 53 | 
 54 |     return nn_pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos
 55 | 
 56 | def validate(data_loader, model, args):
 57 |     all_gt_detections = list()
 58 |     all_detections = list()
 59 | 
 60 |     task_acc_ratio = logutils.AverageMeter()
 61 |     task_macro_prec = logutils.AverageMeter()
 62 |     task_macro_rec = logutils.AverageMeter()
 63 |     task_macro_f1 = logutils.AverageMeter()
 64 |     task_acc_ratio_nn = logutils.AverageMeter()
 65 | 
 66 |     # switch to evaluate mode
 67 |     model.eval()
 68 | 
 69 |     for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')):
 70 |         features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit
 71 |         print(os.path.join(args.save_path,  '{}_out_s{}_b{}_c{}.npy'.format(sequence_ids[0],
 72 |                                  args.subsample, args.using_batch_size, args.trained_epochs)))
 73 |         # exit()
 74 |         model_outputs = torch.tensor(np.load(os.path.join(args.save_path,  '{}_out_s{}_b{}_c{}.npy'.format(sequence_ids[0],
 75 |                                  args.subsample, args.using_batch_size, args.trained_epochs)))).unsqueeze(1)
 76 | 
 77 |         # Inference
 78 |         tqdm.write('[{}] Inference'.format(sequence_ids[0]))
 79 | 
 80 |         seg_path = os.path.join(args.paths.inter_root, 'segmentation')
 81 |         if not os.path.exists(seg_path):
 82 |             os.makedirs(seg_path)
 83 | 
 84 |         # # If no prior model outputs are provided
 85 |         # if not os.path.isfile(os.path.join(seg_path, '{}.npy'.format(sequence_ids[0]))):
 86 |         #     _, nn_pred_labels = torch.max(model_outputs, dim=-1)
 87 |         #     nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist()
 88 |         #     pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args)
 89 |         #
 90 |         #     # Evaluation
 91 |         #     # Frame-wise detection
 92 |         #     detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()]
 93 |         #     if args.subsample != 1:
 94 |         #         all_total_labels, all_total_lengths = additional
 95 |         #         gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist()
 96 |         #         video_length = len(gt_detections)
 97 |         #
 98 |         #         detections = evalutils.upsample(detections, freq=args.subsample, length=video_length)
 99 |         #         nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length)
100 |         #     else:
101 |         #         gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist()
102 |         #         detections = detections[:total_lengths[0]]
103 |         #     np.save(os.path.join(args.paths.inter_root, 'segmentation', '{}.npy'.format(sequence_ids[0])),
104 |         #             [gt_detections, nn_detections, detections])
105 |         # else:
106 |         #     results = np.load(os.path.join(seg_path, '{}.npy'.format(sequence_ids[0])))
107 |         #     gt_detections, nn_detections, detections = results[0], results[1], results[2]
108 | 
109 |         _, nn_pred_labels = torch.max(model_outputs, dim=-1)
110 |         nn_detections = nn_pred_labels.cpu().data.numpy().flatten().tolist()
111 |         pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(model_outputs, activities, sequence_ids, args)
112 | 
113 |         # Evaluation
114 |         # Frame-wise detection
115 |         detections = [l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist()]
116 |         if args.subsample != 1:
117 |             all_total_labels, all_total_lengths = additional
118 |             gt_detections = all_total_labels[:all_total_lengths[0]].flatten().tolist()
119 |             video_length = len(gt_detections)
120 | 
121 |             detections = evalutils.upsample(detections, freq=args.subsample, length=video_length)
122 |             nn_detections = evalutils.upsample(nn_detections, freq=args.subsample, length=video_length)
123 |         else:
124 |             gt_detections = labels_batch[:total_lengths[0]].cpu().data.numpy().flatten().tolist()
125 |             detections = detections[:total_lengths[0]]
126 |         video_length = len(gt_detections)
127 | 
128 |        	# # Visualization code for figures
129 |         # vizutils.plot_segmentation([gt_detections, nn_detections, detections], video_length,
130 |         #                            filename=os.path.join(args.paths.visualize_root, '{}.jpg'.format(sequence_ids[0])), border=False)
131 | 
132 |         micro_prec = logutils.compute_accuracy(gt_detections, detections)
133 |         micro_prec_nn = logutils.compute_accuracy(gt_detections, nn_detections)
134 |         macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_detections, detections, metric='macro')
135 |         task_acc_ratio.update(micro_prec, video_length)
136 |         task_acc_ratio_nn.update(micro_prec_nn, video_length)
137 |         task_macro_prec.update(macro_prec, video_length)
138 |         task_macro_rec.update(macro_rec, video_length)
139 |         task_macro_f1.update(macro_f1, video_length)
140 | 
141 |         all_gt_detections.extend(gt_detections)
142 |         all_detections.extend(detections)
143 | 
144 |         micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections)
145 |         macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections,
146 |                                                                            metric='macro')
147 |         tqdm.write('[Evaluation] Micro Prec: {}\t'
148 |                    'Macro Precision: {}\t'
149 |                    'Macro Recall: {}\t'
150 |                    'Macro F-score: {}'.format(micro_prec, macro_prec, macro_recall, macro_fscore))
151 | 
152 |     micro_prec = logutils.compute_accuracy(all_gt_detections, all_detections)
153 |     macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_detections, all_detections, metric='macro')
154 |     tqdm.write('Detection:\n'
155 |                'Micro Prec: {}\t'
156 |                'NN Prec:{}\t'
157 |                'Macro Precision: {}\t'
158 |                'Macro Recall: {}\t'
159 |                'Macro F-score: {}\n\n'.format(micro_prec, task_acc_ratio_nn.avg, macro_prec, macro_recall, macro_fscore))
160 | 
161 | def main(args):
162 |     exp_info = exp_config.Experiment(args.dataset)
163 |     paths = exp_info.paths
164 |     args.paths = paths
165 |     args.metadata = exp_info.metadata
166 | 
167 |     np.random.seed(args.seed)
168 |     torch.manual_seed(args.seed)
169 | 
170 |     args.batch_size = 1
171 |     feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True)
172 |     label_num = exp_info.get_label_num(args)
173 | 
174 |     hidden_size = 256
175 |     hidden_layers = 2
176 |     if args.model == 'lstm':
177 |         parsing_model = lstm_model.BiLSTM(feature_size, hidden_size, hidden_layers, label_num)
178 |     else:
179 |         parsing_model = mlp_model.MLP(feature_size, hidden_size, label_num)
180 |     parsing_model = torch.nn.DataParallel(parsing_model)
181 |     prev = args.subsample
182 |     args.subsample = 1
183 |     args.save_path = os.path.join(paths.inter_root, 'likelihood', args.task, args.model)
184 |     args.resume = os.path.join(paths.checkpoint_root,
185 |                                'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.model, args.epochs,
186 |                                                                       args.lr, args.using_batch_size, args.lr_decay,
187 |                                                                           1 if not args.subsample else args.subsample,
188 |                                                                         args.dropout_rate))
189 |     args.subsample = prev
190 |     logutils.load_checkpoint(args, parsing_model)
191 |     validate(test_loader, parsing_model, args=args)
192 | 
193 | if __name__ == '__main__':
194 |     parser = argparse.ArgumentParser()
195 |     parser.add_argument('--dataset', default='CAD', type=str,
196 |                         help='indicating which dataset to use')
197 |     parser.add_argument('--model', default='lstm', type=str,
198 |                         help='Model for classification (default: LSTM)')
199 |     parser.add_argument('--seed', default=12345, type=int,
200 |                         help='Default seed for all random generators')
201 |     parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool,
202 |                         help='Option flag for using cuda trining (default: True)')
203 |     parser.add_argument('--workers', default=1, type=int, metavar='N',
204 |                         help='number of data loading workers (default: 1)')
205 |     parser.add_argument('--task', default='activity', type=str,
206 |                         help='Default working task activity/affordance')
207 |     parser.add_argument('--epochs', default=100, type=int, metavar='N',
208 |                         help='number of epochs for training (default: 100)')
209 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N',
210 |                         help='batch size for training (default: 1)')
211 |     parser.add_argument('--using_batch_size', default=1, type=int, metavar='N',
212 |                         help='using model trained on args.using_batch_size')
213 |     parser.add_argument('--lr', default=1e-4, type=float,
214 |                         help='learning rate for the feature extraction process (default: 1e-3)')
215 |     parser.add_argument('--lr_decay', default=1,
216 |                         help='decay rate of learning rate (default: between 0.01 and 1)')
217 |     parser.add_argument('--lr_freq', default=25, type=float,
218 |                         help='learing rate decay frequency while updating')
219 |     parser.add_argument('--subsample', default=1, type=int,
220 |                         help='subsample frequency for Breakfast dataset')
221 |     parser.add_argument('--dropout_rate', default=0, type=float,
222 |                         help='Dropout rate for LSTM training')
223 |     parser.add_argument('--trained_epochs', default=100, type=int,
224 |                         help='The number of iterations for trained model')
225 |     args = parser.parse_args()
226 |     main(args)
227 | 


--------------------------------------------------------------------------------
/experiments/GEP/gep_pred_topdown.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on 5/21/19
  3 | 
  4 | @author: Baoxiong Jia
  5 | 
  6 | Description:
  7 | 
  8 | """
  9 | 
 10 | 
 11 | # System imports
 12 | import sys
 13 | sys.path.append('/media/hdd/home/baoxiong/Projects/TPAMI2019/src')
 14 | 
 15 | import os
 16 | import argparse
 17 | import json
 18 | import time
 19 | import copy
 20 | 
 21 | 
 22 | # Libraries
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import torch
 26 | 
 27 | # Local imports
 28 | from models.BiLSTM import BiLSTM
 29 | from models.LSTM_pred import LSTM_Pred
 30 | import models.parser.GEP_online as GEP
 31 | import models.parser.grammarutils as grammarutils
 32 | import utils.logutils as logutils
 33 | import experiments.exp_config as exp_config
 34 | 
 35 | def predict(parser, detection_output, duration_prior, record, frame, args, epsilon=1e-5):
 36 |     detection_output_prob = torch.nn.Softmax(dim=-1)(detection_output).data.cpu().numpy()
 37 |     parser.update_prob(detection_output_prob)
 38 |     best_l, _ = parser.parse()
 39 |     current_token = args.metadata.action_index[best_l.split()[-1]]
 40 |     if 'last' not in record.keys() or current_token != record['last']:
 41 |         record['last'] = current_token
 42 |         record['start'] = frame
 43 | 
 44 |     pred_duration = args.using_pred_duration
 45 |     pred_labels = list()
 46 |     predict_parser = copy.deepcopy(parser)
 47 |     mu, sigma = duration_prior[args.metadata.actions[current_token]]
 48 |     current_duration = max(0, int(mu) - (frame - record['start'] + 1))
 49 |     pred_labels.extend([current_token for _ in range(current_duration)])
 50 |     pred_duration -= current_duration
 51 |     while pred_duration > 0:
 52 |         prob = np.ones(len(args.metadata.actions)) * epsilon
 53 |         prob[current_token] = 1.0
 54 |         prob = prob / sum(prob)
 55 |         for _ in range(current_duration):
 56 |             predict_parser.update_prob(prob)
 57 |         predict_parser.parse()
 58 |         predict_mat = predict_parser.future_predict()
 59 |         current_token = np.argmax(predict_mat, axis=-1)
 60 |         mu, sigma = duration_prior[args.metadata.actions[current_token]]
 61 |         current_duration = int(mu)
 62 |         pred_duration -= current_duration
 63 |         pred_labels.extend([current_token for _ in range(current_duration)])
 64 |     pred_labels = pred_labels[: args.using_pred_duration]
 65 |     return pred_labels
 66 | 
 67 | def validate(data_loader, detection_model, prediction_model, args):
 68 |     all_gt_frame_predictions = list()
 69 |     all_frame_predictions = list()
 70 |     all_nn_frame_predictions = list()
 71 | 
 72 |     task_acc_ratio = logutils.AverageMeter()
 73 |     task_macro_prec = logutils.AverageMeter()
 74 |     task_macro_rec = logutils.AverageMeter()
 75 |     task_macro_f1 = logutils.AverageMeter()
 76 |     task_acc_ratio_nn = logutils.AverageMeter()
 77 | 
 78 |     # switch to evaluate mode
 79 |     detection_model.eval()
 80 |     prediction_model.eval()
 81 | 
 82 |     for batch_idx, data_unit in enumerate(tqdm(data_loader, desc='GEP evaluation')):
 83 |         features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = data_unit
 84 | 
 85 |         padding = features_batch[0, :, :].repeat(args.using_pred_duration - 1, 1, 1)
 86 |         prediction_features = torch.cat((padding, features_batch), dim=0)
 87 |         prediction_output = prediction_model(prediction_features)
 88 |         detection_output = detection_model(features_batch)
 89 | 
 90 |         _, detection_labels = torch.max(detection_output, dim=-1)
 91 |         detection_labels = detection_labels.cpu().numpy()
 92 | 
 93 |         for batch_i in range(detection_output.size(1)):
 94 | 
 95 |             gt_all_pred_labels = labels_batch[1: total_lengths[batch_i], batch_i].cpu().numpy().tolist()
 96 |             _, nn_all_pred_labels = torch.max(prediction_output[:total_lengths[batch_i] - 1, batch_i, :], dim=-1)
 97 |             nn_all_pred_labels = nn_all_pred_labels.cpu().numpy().tolist()
 98 | 
 99 |             # Initialization of Earley Parser
100 |             class_num = detection_output.shape[2]
101 |             grammar_file = os.path.join(args.paths.grammar_root, activities[batch_i] + '.pcfg')
102 |             grammar = grammarutils.read_grammar(grammar_file, index=True)
103 |             gen_earley_parser = GEP.GeneralizedEarley(grammar, class_num, mapping=args.metadata.action_index)
104 |             with open(os.path.join(args.paths.prior_root, 'duration_prior.json')) as f:
105 |                 duration_prior = json.load(f)
106 | 
107 |             record = dict()
108 | 
109 |             start_time = time.time()
110 |             for frame in range(total_lengths[batch_i] - args.using_pred_duration):
111 |                 nn_pred_labels = nn_all_pred_labels[frame : frame + args.using_pred_duration]
112 |                 gt_pred_labels = gt_all_pred_labels[frame : frame + args.using_pred_duration]
113 |                 update_length = len(nn_pred_labels)
114 | 
115 |                 pred_labels = predict(gen_earley_parser, detection_output[frame, batch_i, :],
116 |                                                                                     duration_prior, record, frame, args)
117 |                 # gt = torch.ones(detection_output.size(2)) * 1e-5
118 |                 # gt[labels_batch[frame, batch_i]] = 1
119 |                 # gt = torch.log(gt / torch.sum(gt))
120 |                 # pred_labels = predict(gen_earley_parser, gt,
121 |                 #                       duration_prior, record, frame, args)
122 |                 # print(frame)
123 |                 # print('detection_labels', detection_labels[max(0, frame - 44) : frame + 1, batch_i].tolist())
124 |                 # print('gt_detect labels', labels_batch[max(0, frame - 44) :frame+1, batch_i].cpu().numpy().tolist())
125 |                 # print('gt_predic_labels', gt_pred_labels)
126 |                 # print('nn_predic_labels', nn_pred_labels)
127 |                 # print('xx_predic_labels', pred_labels)
128 | 
129 |                 micro_prec = logutils.compute_accuracy(gt_pred_labels, pred_labels)
130 |                 nn_micro_prec = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels)
131 |                 macro_prec, macro_rec, macro_f1 = logutils.compute_accuracy(gt_pred_labels, nn_pred_labels,
132 |                                                                             metric='macro')
133 |                 task_acc_ratio.update(micro_prec, update_length)
134 |                 task_acc_ratio_nn.update(nn_micro_prec, update_length)
135 |                 task_macro_prec.update(macro_prec, update_length)
136 |                 task_macro_rec.update(macro_rec, update_length)
137 |                 task_macro_f1.update(macro_f1, update_length)
138 | 
139 |                 all_gt_frame_predictions.extend(gt_pred_labels)
140 |                 all_frame_predictions.extend(pred_labels)
141 |                 all_nn_frame_predictions.extend(nn_pred_labels)
142 | 
143 | 
144 |             print(time.time() - start_time)
145 | 
146 |         tqdm.write('Task {} {} Batch [{}/{}]\t'
147 |                    'Acc {top1.val:.4f} ({top1.avg:.4f})\t'
148 |                    'NN Acc {nn.val:.4f} ({nn.avg:.4f})\t'
149 |                    'Prec {prec.val:.4f} ({prec.avg:.4f})\t'
150 |                    'Recall {recall.val:.4f} ({recall.avg:.4f})\t'
151 |                    'F1 {f1.val:.4f} ({f1.avg:.4f})'.format(
152 |                 args.task, 'test', batch_idx, len(data_loader), top1=task_acc_ratio, nn=task_acc_ratio_nn,
153 |                 prec=task_macro_prec, recall=task_macro_rec, f1=task_macro_f1))
154 | 
155 |     micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_frame_predictions)
156 |     nn_micro_prec = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions)
157 |     macro_prec, macro_recall, macro_fscore = logutils.compute_accuracy(all_gt_frame_predictions, all_nn_frame_predictions, metric='weighted')
158 |     tqdm.write('[Evaluation] Micro Prec: {}\t'
159 |                'NN Micro Prec: {}\t'
160 |                'Macro Precision: {}\t'
161 |                'Macro Recall: {}\t'
162 |                'Macro F-score: {}'.format(micro_prec, nn_micro_prec, macro_prec, macro_recall, macro_fscore))
163 | def main(args):
164 |     exp_info = exp_config.Experiment(args.dataset)
165 |     paths = exp_info.paths
166 |     args.paths = paths
167 |     args.metadata = exp_info.metadata
168 | 
169 |     np.random.seed(args.seed)
170 |     torch.manual_seed(args.seed)
171 | 
172 |     batch_size = args.batch_size
173 |     args.batch_size = 1
174 |     feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(args, save=True)
175 |     label_num = exp_info.get_label_num(args)
176 | 
177 |     hidden_size = 256
178 |     hidden_layers = 2
179 | 
180 |     args.resume = os.path.join(paths.checkpoint_root, 'detection_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(args.task, args.epochs,
181 |                                                                       args.lr, args.batch_size, args.lr_decay,
182 |                                                                           1 if not args.subsample else args.subsample,
183 |                                                                         args.dropout_rate))
184 |     detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num)
185 |     detection_model = torch.nn.DataParallel(detection_model)
186 |     logutils.load_checkpoint(args, detection_model)
187 | 
188 |     args.resume = os.path.join(paths.checkpoint_root,
189 |                                'frame_prediction_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(args.task, args.epochs,
190 |                                                                                              args.lr, args.batch_size,
191 |                                                                                              args.lr_decay,
192 |                                                                                              1 if not args.subsample else args.subsample,
193 |                                                                                              args.dropout_rate,
194 |                                                                                              args.using_pred_duration))
195 |     prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num)
196 |     prediction_model = torch.nn.DataParallel(prediction_model)
197 |     logutils.load_checkpoint(args, prediction_model)
198 | 
199 |     validate(test_loader, detection_model, prediction_model, args=args)
200 | 
201 | if __name__ == '__main__':
202 |     parser = argparse.ArgumentParser()
203 |     parser.add_argument('--dataset', default='VCLA_GAZE', type=str,
204 |                         help='indicating which dataset to use')
205 |     parser.add_argument('--seed', default=12345, type=int,
206 |                         help='Default seed for all random generators')
207 |     parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool,
208 |                         help='Option flag for using cuda trining (default: True)')
209 |     parser.add_argument('--workers', default=1, type=int, metavar='N',
210 |                         help='number of data loading workers (default: 1)')
211 |     parser.add_argument('--task', default='activity', type=str,
212 |                         help='Default working task activity/affordance')
213 |     parser.add_argument('--epochs', default=50, type=int, metavar='N',
214 |                         help='number of epochs for training (default: 100)')
215 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N',
216 |                         help='batch size for training (default: 1)')
217 |     parser.add_argument('--lr', default=1e-4, type=float,
218 |                         help='learning rate for the feature extraction process (default: 1e-3)')
219 |     parser.add_argument('--lr_decay', default=1, type=float,
220 |                         help='decay rate of learning rate (default: between 0.01 and 1)')
221 |     parser.add_argument('--lr_freq', default=25, type=float,
222 |                         help='learing rate decay frequency while updating')
223 |     parser.add_argument('--subsample', default=None, type=int,
224 |                         help='subsample frequency for Breakfast dataset')
225 |     parser.add_argument('--dropout_rate', default=0, type=float,
226 |                         help='Dropout rate for LSTM training')
227 |     parser.add_argument('--using_pred_duration', default=45, type=int,
228 |                         help='Using model that is trained to predict')
229 |     args = parser.parse_args()
230 |     main(args)
231 | 


--------------------------------------------------------------------------------
/models/parser/GEP_old.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Jan 25, 2018
  3 | 
  4 | @author: Siyuan Qi
  5 | 
  6 | Description of the file.
  7 | 
  8 | """
  9 | 
 10 | import queue as Queue
 11 | 
 12 | import numpy as np
 13 | import nltk.grammar
 14 | 
 15 | 
 16 | class State(object):
 17 |     def __init__(self, r, dot, i, j, prefix, prob):
 18 |         self._r = r
 19 |         self._dot = dot
 20 |         self._i = i
 21 |         self._j = j
 22 |         self._prefix = prefix
 23 |         self._prob = prob
 24 | 
 25 |     def is_complete(self):
 26 |         return self._dot == len(self._r.rhs())
 27 | 
 28 |     def next_symbol(self):
 29 |         if self.is_complete():
 30 |             return None
 31 |         return self._r.rhs()[self._dot]
 32 | 
 33 |     def __repr__(self):
 34 |         rhs = [str(n) for n in self._r.rhs()]
 35 |         rhs = ' '.join(rhs[:self._dot]) + " * " + ' '.join(rhs[self._dot:])
 36 |         return '[{}:{}:{}] {} -> {} : {:.3f} "{}"'\
 37 |             .format(self._dot, self._i, self._j, self._r.lhs(), rhs, self._prob, ' '.join(self._prefix))
 38 | 
 39 |     @property
 40 |     def r(self): return self._r
 41 | 
 42 |     @property
 43 |     def dot(self): return self._dot
 44 | 
 45 |     @property
 46 |     def i(self): return self._i
 47 | 
 48 |     @property
 49 |     def j(self): return self._j
 50 | 
 51 |     @property
 52 |     def prefix(self): return self._prefix
 53 | 
 54 |     @property
 55 |     def prob(self): return self._prob
 56 | 
 57 |     def prefix_str(self):
 58 |         return ' '.join(self._prefix)
 59 | 
 60 | 
 61 | class GeneralizedEarley(object):
 62 |     def __init__(self, grammar):
 63 |         self._grammar = grammar
 64 |         self._classifier_output = None
 65 |         self._total_frame = 0
 66 |         self._cached_prob = None
 67 |         self._state_set = None
 68 |         self._queue = None
 69 |         self._prefix_queue = None
 70 |         self._max_prob = None
 71 |         self._best_l = None
 72 |         self._parse_init()
 73 | 
 74 |     def _parse_init(self, classifier_output=None):
 75 |         self._queue = Queue.PriorityQueue()
 76 |         self._prefix_queue = Queue.PriorityQueue()
 77 |         self._state_set = [[[]]]
 78 |         for r in self._grammar.productions():
 79 |             if str(r.lhs()) == 'GAMMA':
 80 |                 self._state_set[0][0].append(State(r, 0, 0, 0, [], 0.0))
 81 |                 break
 82 |         self._queue.put((1.0 - 1.0, (0, 0, '', self._state_set[0][0])))
 83 |         self._max_prob = -np.inf
 84 | 
 85 |         if classifier_output is not None:
 86 |             if len(classifier_output.shape) != 2:
 87 |                 raise ValueError('Classifier output shape not recognized, expecting (frame_num, class_num).')
 88 |             self._classifier_output = classifier_output
 89 |             self._cached_prob = dict()
 90 |             self._total_frame = self._classifier_output.shape[0]
 91 |             self._class_num = self._classifier_output.shape[1]
 92 |             self._cached_prob[''] = np.ones(self._total_frame + 1) * np.finfo('d').min
 93 |             self._cached_prob[''][self._total_frame] = 0.0
 94 | 
 95 |     def parse(self, classifier_output):
 96 |         self._parse_init(classifier_output)
 97 |         count = 0
 98 |         while not self._queue.empty():
 99 |             count += 1
100 |             # print(count)
101 |             _, (m, n, set_l, current_set) = self._queue.get()
102 |             # print(set_l)
103 |             branch_probs = dict()
104 |             branch_probs[set_l] = self._cached_prob[set_l][self._total_frame-1]
105 |             for s in current_set:
106 |                 l = ' '.join(s.prefix)
107 |                 if self._cached_prob[l][self._total_frame-1] > self._max_prob:
108 |                     self._max_prob = self._cached_prob[l][self._total_frame-1]
109 |                     self._best_l = l
110 | 
111 |                 if s.is_complete():
112 |                     self.complete(m, n, s)
113 |                 elif nltk.grammar.is_nonterminal(s.next_symbol()):
114 |                     self.predict(m, n, s)
115 |                 elif nltk.grammar.is_terminal(s.next_symbol()):
116 |                     if m == self._total_frame:
117 |                         continue
118 |                     new_l = self.scan(m, n, s)
119 |                     branch_probs[new_l] = self._cached_prob[new_l][self._total_frame]
120 |                 else:
121 |                     raise ValueError('No operation (predict, scan, complete) applies to state {}'.format(s))
122 | 
123 |             # Early stop
124 |             if not self._queue.empty():
125 |                 _, best_prefix_string = self._prefix_queue.get()
126 |                 max_prefix_prob = self._cached_prob[best_prefix_string][self._total_frame]
127 |             else:
128 |                 max_prefix_prob = - np.inf
129 |             max_branch_prob = max([val for key, val in branch_probs.items()])
130 |             if branch_probs[set_l] == max_branch_prob:
131 |                 if max_branch_prob > self._max_prob:
132 |                     self._best_l, self._max_prob = set_l, max_branch_prob
133 |                 if self._max_prob > max_prefix_prob:
134 |                     # print('Find best parse before exhausting all strings.')  # TODO: check validity
135 |                     return self._best_l, self._max_prob
136 |         return self._best_l, self._max_prob
137 | 
138 |     def get_log_prob_sum(self):
139 |         log_prob = np.log(self._classifier_output).transpose()
140 |         log_prob_sum = np.zeros((self._class_num, self._total_frame, self._total_frame))
141 |         for c in range(self._class_num):
142 |             for b in range(self._total_frame):
143 |                 log_prob_sum[c, b, b] = log_prob[c, b]
144 |         for c in range(self._class_num):
145 |             for b in range(self._total_frame):
146 |                 for e in range(b+1, self._total_frame):
147 |                     log_prob_sum[c, b, e] = log_prob_sum[c, b, e-1] + log_prob[c, e]
148 |         return log_prob, log_prob_sum
149 | 
150 |     def compute_labels(self):
151 |         log_prob, log_prob_sum = self.get_log_prob_sum()
152 | 
153 |         tokens = [int(token) for token in self._best_l.split(' ')]
154 |         dp_tables = np.zeros((len(tokens), self._total_frame))
155 |         traces = np.zeros_like(dp_tables)
156 | 
157 |         for end in range(0, self._total_frame):
158 |             dp_tables[0, end] = log_prob_sum[tokens[0], 0, end]
159 | 
160 |         for token_i, token in enumerate(tokens):
161 |             if token_i == 0:
162 |                 continue
163 |             for end in range(token_i, self._total_frame):
164 |                 max_log_prob = -np.inf
165 |                 for begin in range(token_i, end+1):
166 |                     check_prob = dp_tables[token_i-1, begin-1] + log_prob_sum[token, begin, end]
167 |                     if check_prob > max_log_prob:
168 |                         max_log_prob = check_prob
169 |                         traces[token_i, end] = begin-1
170 |                 dp_tables[token_i, end] = max_log_prob
171 | 
172 |         # Back tracing
173 |         token_pos = [-1 for _ in tokens]
174 |         token_pos[-1] = self._total_frame - 1
175 |         for token_i in reversed(range(len(tokens)-1)):
176 |             token_pos[token_i] = int(traces[token_i+1, token_pos[token_i+1]])
177 | 
178 |         labels = - np.ones(self._total_frame).astype(np.int)
179 |         labels[:token_pos[0]+1] = tokens[0]
180 |         for token_i in range(1, len(tokens)):
181 |             labels[token_pos[token_i-1]+1:token_pos[token_i]+1] = tokens[token_i]
182 | 
183 |         return labels, self._best_l.split(' '), token_pos
184 | 
185 |     def complete(self, m, n, s):
186 |         for back_s in self._state_set[s.i][s.j]:
187 |             if str(back_s.next_symbol()) == str(s.r.lhs()):
188 |                 new_s = State(back_s.r, back_s.dot+1, back_s.i, back_s.j, s.prefix, s.prob)
189 |                 # # if str(new_s.r.lhs()) == 'GAMMA':
190 |                 # #     print(new_s.prefix)
191 | 
192 |                 # # For grammars that don't have recursive rules
193 |                 # self._state_set[m][n].append(new_s)
194 | 
195 |                 # For grammars that have recursive rules
196 |                 state_exist = False
197 |                 for exist_s in self._state_set[m][n]:
198 |                     if str(exist_s) == str(new_s):
199 |                         state_exist = True
200 |                         break
201 |                 if not state_exist:
202 |                     # print 'complete: S[{}, {}]'.format(m, n), new_s
203 |                     self._state_set[m][n].append(new_s)
204 | 
205 |     def predict(self, m, n, s):
206 |         expand_symbol = str(s.next_symbol())
207 |         for r in self._grammar.productions():
208 |             if expand_symbol == str(r.lhs()):
209 |                 new_s = State(r, 0, m, n, s.prefix, s.prob)
210 | 
211 |                 # # For grammars that don't have recursive rules
212 |                 # self._state_set[m][n].append(new_s)
213 | 
214 |                 # For grammars that have recursive rules
215 |                 state_exist = False
216 |                 for exist_s in self._state_set[m][n]:
217 |                     if str(exist_s) == str(new_s):
218 |                         state_exist = True
219 |                         break
220 |                 if not state_exist:
221 |                     # print 'predict: S[{}, {}]'.format(m, n), new_s
222 |                     self._state_set[m][n].append(new_s)
223 | 
224 |     def scan(self, m, n, s):
225 |         new_prefix = s.prefix[:]
226 |         new_prefix.append(str(s.next_symbol()))
227 |         prob = self.compute_prob(new_prefix)
228 |         new_s = State(s.r, s.dot+1, s.i, s.j, new_prefix, prob)
229 |         if m == len(self._state_set) - 1:
230 |             new_n = 0
231 |             self._state_set.append([])
232 |         else:
233 |             new_n = len(self._state_set[m+1])
234 | 
235 |         # To eliminate same prefix branches
236 |         state_exist = False
237 |         for state_set in self._state_set[m+1]:
238 |             exist_s = state_set[0]
239 |             if exist_s.prefix_str() == new_s.prefix_str():
240 |                 state_exist = True
241 |                 break
242 | 
243 |         new_prefix_str = ' '.join(new_prefix)
244 |         if not state_exist:
245 |             # print 'scan: S[{}, {}]'.format(m+1, new_n), new_s
246 |             self._state_set[m+1].append([])
247 |             self._state_set[m+1][new_n].append(new_s)
248 |             self._queue.put((1.0 - prob, (m + 1, new_n, new_prefix_str, self._state_set[m + 1][new_n])))
249 |             self._prefix_queue.put((1.0 - prob, new_prefix_str))
250 | 
251 |         return new_prefix_str
252 | 
253 |     def update_prob(self, prefix):
254 |         pass
255 | 
256 |     def compute_prob(self, prefix):
257 |         l = ' '.join(prefix)
258 |         if l not in self._cached_prob:
259 |             k = int(prefix[-1])
260 |             l_minus = ' '.join(prefix[:-1])
261 |             self._cached_prob[l] = np.ones(self._total_frame + 1) * np.finfo('d').min
262 |             if len(prefix) == 1:
263 |                 self._cached_prob[l][0] = np.log(self._classifier_output[0, k])
264 | 
265 |             # Compute p(l)
266 |             for t in range(1, self._total_frame):
267 |                 max_log = max(self._cached_prob[l][t-1], self._cached_prob[l_minus][t-1])
268 |                 self._cached_prob[l][t] = np.log(self._classifier_output[t, k]) + max_log + np.log(np.exp(self._cached_prob[l][t-1]-max_log) + np.exp(self._cached_prob[l_minus][t-1]-max_log))
269 | 
270 |             # Compute p(l...)
271 |             if self._total_frame == 1:
272 |                 max_log = self._cached_prob[l][0]
273 |             else:
274 |                 max_log = max(self._cached_prob[l][0], np.max(self._cached_prob[l_minus][0:self._total_frame - 1]))
275 |             self._cached_prob[l][self._total_frame] = np.exp(self._cached_prob[l][0]-max_log)
276 |             for t in range(1, self._total_frame):
277 |                 self._cached_prob[l][self._total_frame] += self._classifier_output[t, k] * np.exp(self._cached_prob[l_minus][t-1]-max_log)
278 |             self._cached_prob[l][self._total_frame] = np.log(self._cached_prob[l][self._total_frame]) + max_log
279 |         # TODO: change self._cached_prob[l][self._total_frame] to be self.prefx_prob[l]
280 |         # TODO: update self._cached_prob[l] from [0, self._total_frame - 1] so that it can be adjusted online
281 |         # Search according to prefix probability (Prefix probability stored in the last dimension!!!!!!)
282 |         return self._cached_prob[l][self._total_frame]
283 | 
284 | 
285 | def main():
286 |     pass
287 | 
288 | 
289 | if __name__ == '__main__':
290 |     main()


--------------------------------------------------------------------------------