├── GCN_lib
    ├── Rs_GCN.py
    └── __init__.py
├── README.md
├── __init__.py
├── coco-caption
    ├── LICENSE
    ├── pyciderevalcap
    │   ├── __init__.py
    │   ├── cider
    │   │   ├── __init__.py
    │   │   ├── cider.py
    │   │   └── cider_scorer.py
    │   ├── ciderD
    │   │   ├── __init__.py
    │   │   ├── ciderD.py
    │   │   └── ciderD_scorer.py
    │   ├── eval.py
    │   └── tokenizer
    │   │   ├── __init__.py
    │   │   ├── ptbtokenizer.py
    │   │   └── stanford-corenlp-3.4.1.jar
    ├── pycocoevalcap
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   └── __init__.cpython-36.pyc
    │   ├── bleu
    │   │   ├── LICENSE
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── bleu.cpython-35.pyc
    │   │   │   ├── bleu.cpython-36.pyc
    │   │   │   ├── bleu_scorer.cpython-35.pyc
    │   │   │   └── bleu_scorer.cpython-36.pyc
    │   │   ├── bleu.py
    │   │   └── bleu_scorer.py
    │   ├── cider
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── cider.cpython-35.pyc
    │   │   │   ├── cider.cpython-36.pyc
    │   │   │   ├── cider_scorer.cpython-35.pyc
    │   │   │   └── cider_scorer.cpython-36.pyc
    │   │   └── cider.py
    │   ├── eval.py
    │   ├── meteor
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── meteor.cpython-35.pyc
    │   │   │   └── meteor.cpython-36.pyc
    │   │   ├── meteor-1.5.jar
    │   │   └── meteor.py
    │   ├── rouge
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── rouge.cpython-35.pyc
    │   │   │   └── rouge.cpython-36.pyc
    │   │   └── rouge.py
    │   └── tokenizer
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-35.pyc
    │   │       ├── __init__.cpython-36.pyc
    │   │       ├── ptbtokenizer.cpython-35.pyc
    │   │       └── ptbtokenizer.cpython-36.pyc
    │   │   ├── ptbtokenizer.py
    │   │   └── stanford-corenlp-3.4.1.jar
    └── pycocotools
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   └── mask.py
├── cocoapi-master
    ├── LuaAPI
    │   ├── CocoApi.lua
    │   ├── MaskApi.lua
    │   ├── cocoDemo.lua
    │   ├── env.lua
    │   ├── init.lua
    │   └── rocks
    │   │   └── coco-scm-1.rockspec
    ├── MatlabAPI
    │   ├── CocoApi.m
    │   ├── CocoEval.m
    │   ├── CocoUtils.m
    │   ├── MaskApi.m
    │   ├── cocoDemo.m
    │   ├── evalDemo.m
    │   ├── gason.m
    │   └── private
    │   │   ├── gasonMex.cpp
    │   │   ├── gasonMex.mexa64
    │   │   ├── gasonMex.mexmaci64
    │   │   ├── getPrmDflt.m
    │   │   └── maskApiMex.c
    ├── PythonAPI
    │   ├── Makefile
    │   ├── build
    │   │   ├── common
    │   │   │   └── maskApi.o
    │   │   ├── lib.linux-x86_64-2.7
    │   │   │   └── pycocotools
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── _mask.so
    │   │   │   │   ├── coco.py
    │   │   │   │   ├── cocoeval.py
    │   │   │   │   └── mask.py
    │   │   └── temp.linux-x86_64-2.7
    │   │   │   └── pycocotools
    │   │   │       └── _mask.o
    │   ├── dist
    │   │   └── pycocotools-2.0-py2.7-linux-x86_64.egg
    │   ├── pycocotools.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   ├── requires.txt
    │   │   └── top_level.txt
    │   ├── pycocotools
    │   │   ├── __init__.py
    │   │   ├── _mask.c
    │   │   ├── _mask.pyx
    │   │   ├── _mask.so
    │   │   ├── coco.py
    │   │   ├── cocoeval.py
    │   │   └── mask.py
    │   └── setup.py
    ├── README.txt
    ├── common
    │   ├── gason.cpp
    │   ├── gason.h
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── license.txt
    └── results
    │   ├── captions_val2014_fakecap_results.json
    │   ├── instances_val2014_fakebbox100_results.json
    │   ├── instances_val2014_fakesegm100_results.json
    │   ├── person_keypoints_val2014_fakekeypoints100_results.json
    │   └── val2014_fake_eval_res.txt
├── data.py
├── evaluate_models.py
├── evaluation.py
├── evaluation_models.py
├── fig
    ├── Q_i2t.png
    ├── Q_t2i_2.png
    ├── model.png
    └── teaser.png
├── misc
    ├── __init__.py
    ├── cocoeval.py
    ├── rewards.py
    └── utils.py
├── model.py
├── models
    ├── Attention.py
    ├── DecoderRNN.py
    ├── EncoderRNN.py
    ├── S2VTAttModel.py
    ├── S2VTModel.py
    └── __init__.py
├── opts.py
├── requirement.txt
├── train.py
├── vocab.py
└── vocab
    ├── 10crop_precomp_vocab.pkl
    ├── coco_precomp_vocab.pkl
    ├── coco_vocab.pkl
    ├── f30k_precomp_vocab.pkl
    ├── f30k_vocab.pkl
    ├── f8k_precomp_vocab.pkl
    └── f8k_vocab.pkl


/GCN_lib/Rs_GCN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | 
 6 | 
 7 | class Rs_GCN(nn.Module):
 8 | 
 9 |     def __init__(self, in_channels, inter_channels, bn_layer=True):
10 |         super(Rs_GCN, self).__init__()
11 | 
12 |         self.in_channels = in_channels
13 |         self.inter_channels = inter_channels
14 | 
15 |         if self.inter_channels is None:
16 |             self.inter_channels = in_channels // 2
17 |             if self.inter_channels == 0:
18 |                 self.inter_channels = 1
19 | 
20 | 
21 |         conv_nd = nn.Conv1d
22 |         max_pool = nn.MaxPool1d
23 |         bn = nn.BatchNorm1d
24 | 
25 |         self.g = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
26 |                          kernel_size=1, stride=1, padding=0)
27 | 
28 |         if bn_layer:
29 |             self.W = nn.Sequential(
30 |                 conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels,
31 |                         kernel_size=1, stride=1, padding=0),
32 |                 bn(self.in_channels)
33 |             )
34 |             nn.init.constant(self.W[1].weight, 0)
35 |             nn.init.constant(self.W[1].bias, 0)
36 |         else:
37 |             self.W = conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels,
38 |                              kernel_size=1, stride=1, padding=0)
39 |             nn.init.constant(self.W.weight, 0)
40 |             nn.init.constant(self.W.bias, 0)
41 | 
42 |         self.theta = None
43 |         self.phi = None
44 | 
45 | 
46 |         self.theta = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
47 |                              kernel_size=1, stride=1, padding=0)
48 |         self.phi = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
49 |                            kernel_size=1, stride=1, padding=0)
50 | 
51 | 
52 | 
53 | 
54 |     def forward(self, v):
55 |         '''
56 |         :param v: (B, D, N)
57 |         :return:
58 |         '''
59 |         batch_size = v.size(0)
60 | 
61 |         g_v = self.g(v).view(batch_size, self.inter_channels, -1)
62 |         g_v = g_v.permute(0, 2, 1)
63 | 
64 |         theta_v = self.theta(v).view(batch_size, self.inter_channels, -1)
65 |         theta_v = theta_v.permute(0, 2, 1)
66 |         phi_v = self.phi(v).view(batch_size, self.inter_channels, -1)
67 |         R = torch.matmul(theta_v, phi_v)
68 |         N = R.size(-1)
69 |         R_div_C = R / N
70 | 
71 |         y = torch.matmul(R_div_C, g_v)
72 |         y = y.permute(0, 2, 1).contiguous()
73 |         y = y.view(batch_size, self.inter_channels, *v.size()[2:])
74 |         W_y = self.W(y)
75 |         v_star = W_y + v
76 | 
77 |         return v_star
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/GCN_lib/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Visual Semantic Reasoning for Image-Text Matching (VSRN)
 2 | PyTorch code for VSRN described in the paper "Visual Semantic Reasoning for Image-Text Matching". The paper will appear in ICCV 2019 as oral presentation. It is built on top of the [VSE++](https://github.com/fartashf/vsepp).
 3 | 
 4 | [Kunpeng Li](https://kunpengli1994.github.io/), [Yulun Zhang](http://yulunzhang.com/), [Kai Li](http://kailigo.github.io/), Yuanyuan Li and [Yun Fu](http://www1.ece.neu.edu/~yunfu/). "Visual Semantic Reasoning for Image-Text Matching", ICCV, 2019. [[pdf](https://arxiv.org/pdf/1909.02701.pdf)]
 5 | 
 6 | ## Introduction
 7 | Image-text matching has been a hot research topic bridging the vision and language areas. It remains challenging because the current representation of image usually lacks global semantic concepts as in its corresponding text caption. To address this issue, we propose a simple and interpretable reasoning model to generate visual representation that captures key objects and semantic concepts of a scene. Specifically, we first build up connections between image regions and perform reasoning with Graph Convolutional Networks to generate features with semantic relationships. Then, we propose to use the gate and memory mechanism to perform global semantic reasoning on these relationship-enhanced features, select the discriminative information and gradually generate the representation for the whole scene. 
 8 | 
 9 | Experiments validate that our method achieves a new state-of-the-art for the image-text matching on MS-COCO and Flickr30K datasets. It outperforms the current best method SCAN by 6.8\% relatively for image retrieval and 4.8\% relatively for caption retrieval on MS-COCO (Recall@1 using 1K test set). On Flickr30K, our model improves image retrieval by 12.6\% relatively and caption retrieval by 5.8\% relatively (Recall@1). 
10 | 
11 | Besides, since our method only relies on the simple inner product as the similarity function, it is quite efficient at the inference stage. It is around 30 times faster than the current best method SCAN when tested on MS-COCO 1K dataset.
12 | 
13 | ![model](/fig/model.png)
14 | 
15 | ## Requirements 
16 | We recommended the following dependencies.
17 | 
18 | * Python 2.7 
19 | * [PyTorch](http://pytorch.org/) (0.4.1)
20 | * [NumPy](http://www.numpy.org/) (>1.12.1)
21 | * [TensorBoard](https://github.com/TeamHG-Memex/tensorboard_logger)
22 | * [pycocotools](https://github.com/cocodataset/cocoapi)
23 | * [torchvision]()
24 | * [matplotlib]()
25 | 
26 | 
27 | * Punkt Sentence Tokenizer:
28 | ```python
29 | import nltk
30 | nltk.download()
31 | > d punkt
32 | ```
33 | 
34 | ## Download data
35 | 
36 | Download the dataset files and pre-trained models. We use splits produced by [Andrej Karpathy](http://cs.stanford.edu/people/karpathy/deepimagesent/). 
37 | 
38 | We follow [bottom-up attention model](https://github.com/peteanderson80/bottom-up-attention) and [SCAN](https://github.com/kuanghuei/SCAN) to obtain image features for fair comparison. More details about data pre-processing (optional) can be found [here](https://github.com/kuanghuei/SCAN/blob/master/README.md#data-pre-processing-optional). All the data needed for reproducing the experiments in the paper, including image features and vocabularies, can be downloaded from [SCAN](https://github.com/kuanghuei/SCAN) by using:
39 | 
40 | ```bash
41 | wget https://scanproject.blob.core.windows.net/scan-data/data.zip
42 | ```
43 | 
44 | You can also get the data from google drive: https://drive.google.com/drive/u/1/folders/1os1Kr7HeTbh8FajBNegW8rjJf6GIhFqC. We refer to the path of extracted files for `data.zip` as `$DATA_PATH`. 
45 | 
46 | ## Evaluate pre-trained models
47 | Modify the model_path and data_path in the evaluation_models.py file. Then Run `evaluation_models.py`:
48 | 
49 | ```bash
50 | python evaluation_models.py
51 | ```
52 | 
53 | To do cross-validation on MSCOCO 1K test set (5 folders average), pass `fold5=True`. Pass `fold5=False` for evaluation on MSCOCO 5K test set. Pretrained models for MSCOCO and Flickr30K can be downloaded from https://drive.google.com/file/d/1y8Ywa2vrPB7m_Q_Ku69z7EdwsLB9gsJW/view?usp=sharing 
54 | 
55 | You can also use the following code to evaluate each single model on Flickr30K, MSCOCO 1K and MSCOCO 5K :
56 | 
57 | ```python
58 | from vocab import Vocabulary
59 | import evaluation
60 | evaluation.evalrank("pretrain_model/flickr/model_fliker_1.pth.tar", data_path="$DATA_PATH", split="test", fold5=False)'
61 | evaluation.evalrank("pretrain_model/coco/model_coco_1.pth.tar", data_path="$DATA_PATH", split="testall", fold5=True)'
62 | evaluation.evalrank("pretrain_model/coco/model_coco_1.pth.tar", data_path="$DATA_PATH", split="testall", fold5=False)'
63 | ```
64 | 
65 | ## Training new models
66 | Run `train.py`:
67 | 
68 | For MSCOCO:
69 | 
70 | ```bash
71 | python train.py --data_path $DATA_PATH --data_name coco_precomp --logger_name runs/coco_VSRN --max_violation
72 | ```
73 | 
74 | For Flickr30K:
75 | 
76 | ```bash
77 | python train.py --data_path $DATA_PATH --data_name f30k_precomp --logger_name runs/flickr_VSRN --max_violation --lr_update 10  --max_len 60
78 | ```
79 | 
80 | 
81 | ## Reference
82 | 
83 | If you found this code useful, please cite the following paper:
84 | 
85 |     @inproceedings{li2019vsrn,
86 |       title={Visual semantic reasoning for image-text matching},
87 |       author={Li, Kunpeng and Zhang, Yulun and Li, Kai and Li, Yuanyuan and Fu, Yun},
88 |       booktitle={ICCV},
89 |       year={2019}
90 |     }
91 | 
92 | ## License
93 | 
94 | [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/coco-caption/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 DingXia
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/cider/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/cider/cider.py:
--------------------------------------------------------------------------------
 1 | # Filename: cider.py
 2 | #
 3 | #
 4 | # Description: Describes the class to compute the CIDEr
 5 | # (Consensus-Based Image Description Evaluation) Metric
 6 | #          by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 7 | #
 8 | # Creation Date: Sun Feb  8 14:16:54 2015
 9 | #
10 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
11 | # Tsung-Yi Lin <tl483@cornell.edu>
12 | 
13 | from cider_scorer import CiderScorer
14 | 
15 | 
16 | class Cider:
17 |     """
18 |     Main Class to compute the CIDEr metric
19 | 
20 |     """
21 |     def __init__(self, n=4, df="corpus"):
22 |         """
23 |         Initialize the CIDEr scoring function
24 |         : param n (int): n-gram size
25 |         : param df (string): specifies where to get the IDF values from
26 |                     takes values 'corpus', 'coco-train'
27 |         : return: None
28 |         """
29 |         # set cider to sum over 1 to 4-grams
30 |         self._n = n
31 |         self._df = df
32 |         self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
33 | 
34 |     def compute_score(self, gts, res):
35 |         """
36 |         Main function to compute CIDEr score
37 |         : param  gts (dict) : {image:tokenized reference sentence}
38 |         : param res (dict)  : {image:tokenized candidate sentence}
39 |         : return: cider (float) : computed CIDEr score for the corpus
40 |         """
41 | 
42 |         # clear all the previous hypos and refs
43 |         self.cider_scorer.clear()
44 | 
45 |         for res_id in res:
46 | 
47 |             hypo = res_id['caption']
48 |             ref = gts[res_id['image_id']]
49 | 
50 |             # Sanity check.
51 |             assert(type(hypo) is list)
52 |             assert(len(hypo) == 1)
53 |             assert(type(ref) is list)
54 |             assert(len(ref) > 0)
55 |             self.cider_scorer += (hypo[0], ref)
56 | 
57 |         (score, scores) = self.cider_scorer.compute_score()
58 | 
59 |         return score, scores
60 | 
61 |     def method(self):
62 |         return "CIDEr"
63 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/cider/cider_scorer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Tsung-Yi Lin <tl483@cornell.edu>
  3 | # Ramakrishna Vedantam <vrama91@vt.edu>
  4 | 
  5 | import copy
  6 | import pickle
  7 | from collections import defaultdict
  8 | import numpy as np
  9 | import math
 10 | import os
 11 | 
 12 | def precook(s, n=4, out=False):
 13 |     """
 14 |     Takes a string as input and returns an object that can be given to
 15 |     either cook_refs or cook_test. This is optional: cook_refs and cook_test
 16 |     can take string arguments as well.
 17 |     :param s: string : sentence to be converted into ngrams
 18 |     :param n: int    : number of ngrams for which representation is calculated
 19 |     :return: term frequency vector for occuring ngrams
 20 |     """
 21 |     words = s.split()
 22 |     counts = defaultdict(int)
 23 |     for k in xrange(1,n+1):
 24 |         for i in xrange(len(words)-k+1):
 25 |             ngram = tuple(words[i:i+k])
 26 |             counts[ngram] += 1
 27 |     return counts
 28 | 
 29 | def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
 30 |     '''Takes a list of reference sentences for a single segment
 31 |     and returns an object that encapsulates everything that BLEU
 32 |     needs to know about them.
 33 |     :param refs: list of string : reference sentences for some image
 34 |     :param n: int : number of ngrams for which (ngram) representation is calculated
 35 |     :return: result (list of dict)
 36 |     '''
 37 |     return [precook(ref, n) for ref in refs]
 38 | 
 39 | def cook_test(test, n=4):
 40 |     '''Takes a test sentence and returns an object that
 41 |     encapsulates everything that BLEU needs to know about it.
 42 |     :param test: list of string : hypothesis sentence for some image
 43 |     :param n: int : number of ngrams for which (ngram) representation is calculated
 44 |     :return: result (dict)
 45 |     '''
 46 |     return precook(test, n, True)
 47 | 
 48 | class CiderScorer(object):
 49 |     """CIDEr scorer.
 50 |     """
 51 | 
 52 |     def copy(self):
 53 |         ''' copy the refs.'''
 54 |         new = CiderScorer(n=self.n)
 55 |         new.ctest = copy.copy(self.ctest)
 56 |         new.crefs = copy.copy(self.crefs)
 57 |         return new
 58 | 
 59 |     def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
 60 |         ''' singular instance '''
 61 |         self.n = n
 62 |         self.sigma = sigma
 63 |         self.crefs = []
 64 |         self.ctest = []
 65 |         self.df_mode = df_mode
 66 |         if self.df_mode != "corpus":
 67 |             self.document_frequency = pickle.load(open(os.path.join('data', df_mode + '.p'),'r'))       
 68 |         self.cook_append(test, refs)
 69 |         self.ref_len = None
 70 |     
 71 |     def clear(self):
 72 |         self.crefs = []
 73 |         self.ctest = []
 74 | 
 75 |     def cook_append(self, test, refs):
 76 |         '''called by constructor and __iadd__ to avoid creating new instances.'''
 77 | 
 78 |         if refs is not None:
 79 |             self.crefs.append(cook_refs(refs))
 80 |             if test is not None:
 81 |                 self.ctest.append(cook_test(test)) ## N.B.: -1
 82 |             else:
 83 |                 self.ctest.append(None) # lens of crefs and ctest have to match
 84 | 
 85 |     def size(self):
 86 |         assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
 87 |         return len(self.crefs)
 88 | 
 89 |     def __iadd__(self, other):
 90 |         '''add an instance (e.g., from another sentence).'''
 91 | 
 92 |         if type(other) is tuple:
 93 |             ## avoid creating new CiderScorer instances
 94 |             self.cook_append(other[0], other[1])
 95 |         else:
 96 |             self.ctest.extend(other.ctest)
 97 |             self.crefs.extend(other.crefs)
 98 | 
 99 |         return self
100 |     def compute_doc_freq(self):
101 |         '''
102 |         Compute term frequency for reference data.
103 |         This will be used to compute idf (inverse document frequency later)
104 |         The term frequency is stored in the object
105 |         :return: None
106 |         '''
107 |         for refs in self.crefs:
108 |             # refs, k ref captions of one image
109 |             for ngram in set([ngram for ref in refs for (ngram,count) in ref.iteritems()]):
110 |                 self.document_frequency[ngram] += 1
111 |             # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
112 | 
113 |     def compute_cider(self):
114 |         def counts2vec(cnts):
115 |             """
116 |             Function maps counts of ngram to vector of tfidf weights.
117 |             The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
118 |             The n-th entry of array denotes length of n-grams.
119 |             :param cnts:
120 |             :return: vec (array of dict), norm (array of float), length (int)
121 |             """
122 |             vec = [defaultdict(float) for _ in range(self.n)]
123 |             length = 0
124 |             norm = [0.0 for _ in range(self.n)]
125 |             for (ngram,term_freq) in cnts.iteritems():
126 |                 # give word count 1 if it doesn't appear in reference corpus
127 |                 df = np.log(max(1.0, self.document_frequency[ngram]))
128 |                 # ngram index
129 |                 n = len(ngram)-1
130 |                 # tf (term_freq) * idf (precomputed idf) for n-grams
131 |                 vec[n][ngram] = float(term_freq)*(self.ref_len - df)
132 |                 # compute norm for the vector.  the norm will be used for
133 |                 # computing similarity
134 |                 norm[n] += pow(vec[n][ngram], 2)
135 | 
136 |                 if n == 1:
137 |                     length += term_freq
138 |             norm = [np.sqrt(n) for n in norm]
139 |             return vec, norm, length
140 | 
141 |         def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
142 |             '''
143 |             Compute the cosine similarity of two vectors.
144 |             :param vec_hyp: array of dictionary for vector corresponding to hypothesis
145 |             :param vec_ref: array of dictionary for vector corresponding to reference
146 |             :param norm_hyp: array of float for vector corresponding to hypothesis
147 |             :param norm_ref: array of float for vector corresponding to reference
148 |             :param length_hyp: int containing length of hypothesis
149 |             :param length_ref: int containing length of reference
150 |             :return: array of score for each n-grams cosine similarity
151 |             '''
152 |             delta = float(length_hyp - length_ref)
153 |             # measure consine similarity
154 |             val = np.array([0.0 for _ in range(self.n)])
155 |             for n in range(self.n):
156 |                 # ngram
157 |                 for (ngram,count) in vec_hyp[n].iteritems():
158 |                     val[n] += vec_hyp[n][ngram] * vec_ref[n][ngram]
159 | 
160 |                 if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
161 |                     val[n] /= (norm_hyp[n]*norm_ref[n])
162 | 
163 |                 assert(not math.isnan(val[n]))
164 |             return val
165 | 
166 |         # compute log reference length
167 |         if self.df_mode == "corpus":
168 |             self.ref_len = np.log(float(len(self.crefs)))
169 |         elif self.df_mode == "coco-val":
170 |             # if coco option selected, use length of coco-val set
171 |             self.ref_len = np.log(float(40504))
172 | 
173 |         scores = []
174 |         for test, refs in zip(self.ctest, self.crefs):
175 |             # compute vector for test captions
176 |             vec, norm, length = counts2vec(test)
177 |             # compute vector for ref captions
178 |             score = np.array([0.0 for _ in range(self.n)])
179 |             for ref in refs:
180 |                 vec_ref, norm_ref, length_ref = counts2vec(ref)
181 |                 score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
182 |             # change by vrama91 - mean of ngram scores, instead of sum
183 |             score_avg = np.mean(score)
184 |             # divide by number of references
185 |             score_avg /= len(refs)
186 |             # multiply score by 10
187 |             score_avg *= 10.0
188 |             # append score of an image to the score list
189 |             scores.append(score_avg)
190 |         return scores
191 | 
192 |     def compute_score(self, option=None, verbose=0):
193 |         # compute idf
194 |         if self.df_mode == "corpus":
195 |             self.document_frequency = defaultdict(float)
196 |             self.compute_doc_freq()
197 |             # assert to check document frequency
198 |             assert(len(self.ctest) >= max(self.document_frequency.values()))
199 |             # import json for now and write the corresponding files
200 |         # compute cider score
201 |         score = self.compute_cider()
202 |         # debug
203 |         # print score
204 |         return np.mean(np.array(score)), np.array(score)
205 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/ciderD/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/ciderD/ciderD.py:
--------------------------------------------------------------------------------
 1 | # Filename: ciderD.py
 2 | #
 3 | # Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric
 4 | #               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 5 | #
 6 | # Creation Date: Sun Feb  8 14:16:54 2015
 7 | #
 8 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
 9 | 
10 | from .ciderD_scorer import CiderScorer
11 | 
12 | 
13 | class CiderD:
14 |     """
15 |     Main Class to compute the CIDEr metric
16 | 
17 |     """
18 |     def __init__(self, n=4, sigma=6.0, df="corpus"):
19 |         # set cider to sum over 1 to 4-grams
20 |         self._n = n
21 |         # set the standard deviation parameter for gaussian penalty
22 |         self._sigma = sigma
23 |         # set which where to compute document frequencies from
24 |         self._df = df
25 |         self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
26 | 
27 |     def compute_score(self, gts, res):
28 |         """
29 |         Main function to compute CIDEr score
30 |         :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
31 |                 ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
32 |         :return: cider (float) : computed CIDEr score for the corpus
33 |         """
34 | 
35 |         # clear all the previous hypos and refs
36 |         self.cider_scorer.clear()
37 |         for res_id in res:
38 | 
39 |             hypo = res_id['caption']
40 |             ref = gts[res_id['image_id']]
41 | 
42 |             # Sanity check.
43 |             assert(type(hypo) is list)
44 |             assert(len(hypo) == 1)
45 |             assert(type(ref) is list)
46 |             assert(len(ref) > 0)
47 |             self.cider_scorer += (hypo[0], ref)
48 | 
49 |         (score, scores) = self.cider_scorer.compute_score()
50 | 
51 |         return score, scores
52 | 
53 |     def method(self):
54 |         return "CIDEr-D"
55 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/ciderD/ciderD_scorer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Tsung-Yi Lin <tl483@cornell.edu>
  3 | # Ramakrishna Vedantam <vrama91@vt.edu>
  4 | 
  5 | import copy
  6 | from collections import defaultdict
  7 | import numpy as np
  8 | import pdb
  9 | import math
 10 | import pickle
 11 | import os
 12 | 
 13 | def precook(s, n=4, out=False):
 14 |     """
 15 |     Takes a string as input and returns an object that can be given to
 16 |     either cook_refs or cook_test. This is optional: cook_refs and cook_test
 17 |     can take string arguments as well.
 18 |     :param s: string : sentence to be converted into ngrams
 19 |     :param n: int    : number of ngrams for which representation is calculated
 20 |     :return: term frequency vector for occuring ngrams
 21 |     """
 22 |     words = s.split()
 23 |     counts = defaultdict(int)
 24 |     for k in range(1,n+1):
 25 |         for i in range(len(words)-k+1):
 26 |             ngram = tuple(words[i:i+k])
 27 |             counts[ngram] += 1
 28 |     return counts
 29 | 
 30 | def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
 31 |     '''Takes a list of reference sentences for a single segment
 32 |     and returns an object that encapsulates everything that BLEU
 33 |     needs to know about them.
 34 |     :param refs: list of string : reference sentences for some image
 35 |     :param n: int : number of ngrams for which (ngram) representation is calculated
 36 |     :return: result (list of dict)
 37 |     '''
 38 |     return [precook(ref, n) for ref in refs]
 39 | 
 40 | def cook_test(test, n=4):
 41 |     '''Takes a test sentence and returns an object that
 42 |     encapsulates everything that BLEU needs to know about it.
 43 |     :param test: list of string : hypothesis sentence for some image
 44 |     :param n: int : number of ngrams for which (ngram) representation is calculated
 45 |     :return: result (dict)
 46 |     '''
 47 |     return precook(test, n, True)
 48 | 
 49 | class CiderScorer(object):
 50 |     """CIDEr scorer.
 51 |     """
 52 | 
 53 |     def copy(self):
 54 |         ''' copy the refs.'''
 55 |         new = CiderScorer(n=self.n)
 56 |         new.ctest = copy.copy(self.ctest)
 57 |         new.crefs = copy.copy(self.crefs)
 58 |         return new
 59 | 
 60 |     def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
 61 |         ''' singular instance '''
 62 |         self.n = n
 63 |         self.sigma = sigma
 64 |         self.crefs = []
 65 |         self.ctest = []
 66 |         self.df_mode = df_mode
 67 |         self.ref_len = None
 68 |         if self.df_mode != "corpus":
 69 |             pkl_file = pickle.load(open(os.path.join('data', df_mode + '.p'),'rb'))
 70 |             self.ref_len = pkl_file['ref_len']
 71 |             self.document_frequency = pkl_file['document_frequency']
 72 |         self.cook_append(test, refs)
 73 |     
 74 |     def clear(self):
 75 |         self.crefs = []
 76 |         self.ctest = []
 77 | 
 78 |     def cook_append(self, test, refs):
 79 |         '''called by constructor and __iadd__ to avoid creating new instances.'''
 80 | 
 81 |         if refs is not None:
 82 |             self.crefs.append(cook_refs(refs))
 83 |             if test is not None:
 84 |                 self.ctest.append(cook_test(test)) ## N.B.: -1
 85 |             else:
 86 |                 self.ctest.append(None) # lens of crefs and ctest have to match
 87 | 
 88 |     def size(self):
 89 |         assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
 90 |         return len(self.crefs)
 91 | 
 92 |     def __iadd__(self, other):
 93 |         '''add an instance (e.g., from another sentence).'''
 94 | 
 95 |         if type(other) is tuple:
 96 |             ## avoid creating new CiderScorer instances
 97 |             self.cook_append(other[0], other[1])
 98 |         else:
 99 |             self.ctest.extend(other.ctest)
100 |             self.crefs.extend(other.crefs)
101 | 
102 |         return self
103 |     def compute_doc_freq(self):
104 |         '''
105 |         Compute term frequency for reference data.
106 |         This will be used to compute idf (inverse document frequency later)
107 |         The term frequency is stored in the object
108 |         :return: None
109 |         '''
110 |         for refs in self.crefs:
111 |             # refs, k ref captions of one image
112 |             for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]):
113 |                 self.document_frequency[ngram] += 1
114 |             # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
115 | 
116 |     def compute_cider(self):
117 |         def counts2vec(cnts):
118 |             """
119 |             Function maps counts of ngram to vector of tfidf weights.
120 |             The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
121 |             The n-th entry of array denotes length of n-grams.
122 |             :param cnts:
123 |             :return: vec (array of dict), norm (array of float), length (int)
124 |             """
125 |             vec = [defaultdict(float) for _ in range(self.n)]
126 |             length = 0
127 |             norm = [0.0 for _ in range(self.n)]
128 |             for (ngram,term_freq) in cnts.items():
129 |                 # give word count 1 if it doesn't appear in reference corpus
130 |                 df = np.log(max(1.0, self.document_frequency[ngram]))
131 |                 # ngram index
132 |                 n = len(ngram)-1
133 |                 # tf (term_freq) * idf (precomputed idf) for n-grams
134 |                 vec[n][ngram] = float(term_freq)*(self.ref_len - df)
135 |                 # compute norm for the vector.  the norm will be used for computing similarity
136 |                 norm[n] += pow(vec[n][ngram], 2)
137 | 
138 |                 if n == 1:
139 |                     length += term_freq
140 |             norm = [np.sqrt(n) for n in norm]
141 |             return vec, norm, length
142 | 
143 |         def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
144 |             '''
145 |             Compute the cosine similarity of two vectors.
146 |             :param vec_hyp: array of dictionary for vector corresponding to hypothesis
147 |             :param vec_ref: array of dictionary for vector corresponding to reference
148 |             :param norm_hyp: array of float for vector corresponding to hypothesis
149 |             :param norm_ref: array of float for vector corresponding to reference
150 |             :param length_hyp: int containing length of hypothesis
151 |             :param length_ref: int containing length of reference
152 |             :return: array of score for each n-grams cosine similarity
153 |             '''
154 |             delta = float(length_hyp - length_ref)
155 |             # measure consine similarity
156 |             val = np.array([0.0 for _ in range(self.n)])
157 |             for n in range(self.n):
158 |                 # ngram
159 |                 for (ngram,count) in vec_hyp[n].items():
160 |                     # vrama91 : added clipping
161 |                     val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram]
162 | 
163 |                 if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
164 |                     val[n] /= (norm_hyp[n]*norm_ref[n])
165 | 
166 |                 assert(not math.isnan(val[n]))
167 |                 # vrama91: added a length based gaussian penalty
168 |                 val[n] *= np.e**(-(delta**2)/(2*self.sigma**2))
169 |             return val
170 | 
171 |         # compute log reference length
172 |         if self.df_mode == "corpus":
173 |             self.ref_len = np.log(float(len(self.crefs)))
174 |         #elif self.df_mode == "coco-val":
175 |             # if coco option selected, use length of coco-val set
176 |         #    self.ref_len = np.log(float(40504))
177 | 
178 |         scores = []
179 |         for test, refs in zip(self.ctest, self.crefs):
180 |             # compute vector for test captions
181 |             vec, norm, length = counts2vec(test)
182 |             # compute vector for ref captions
183 |             score = np.array([0.0 for _ in range(self.n)])
184 |             for ref in refs:
185 |                 vec_ref, norm_ref, length_ref = counts2vec(ref)
186 |                 score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
187 |             # change by vrama91 - mean of ngram scores, instead of sum
188 |             score_avg = np.mean(score)
189 |             # divide by number of references
190 |             score_avg /= len(refs)
191 |             # multiply score by 10
192 |             score_avg *= 10.0
193 |             # append score of an image to the score list
194 |             scores.append(score_avg)
195 |         return scores
196 | 
197 |     def compute_score(self, option=None, verbose=0):
198 |         # compute idf
199 |         if self.df_mode == "corpus":
200 |             self.document_frequency = defaultdict(float)
201 |             self.compute_doc_freq()
202 |             # assert to check document frequency
203 |             assert(len(self.ctest) >= max(self.document_frequency.values()))
204 |             # import json for now and write the corresponding files
205 |         # compute cider score
206 |         score = self.compute_cider()
207 |         # debug
208 |         # print score
209 |         return np.mean(np.array(score)), np.array(score)
210 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'rama'
 2 | from tokenizer.ptbtokenizer import PTBTokenizer
 3 | from cider.cider import Cider
 4 | from ciderD.ciderD import CiderD
 5 | 
 6 | 
 7 | class CIDErEvalCap:
 8 |     def __init__(self, gts, res, df):
 9 |         print 'tokenization...'
10 |         tokenizer = PTBTokenizer('gts')
11 |         _gts = tokenizer.tokenize(gts)
12 |         print 'tokenized refs'
13 |         tokenizer = PTBTokenizer('res')
14 |         _res = tokenizer.tokenize(res)
15 |         print 'tokenized cands'
16 | 
17 |         self.gts = _gts
18 |         self.res = _res
19 |         self.df = df
20 | 
21 |     def evaluate(self):
22 |         # =================================================
23 |         # Set up scorers
24 |         # =================================================
25 | 
26 |         print 'setting up scorers...'
27 |         scorers = [
28 |             (Cider(df=self.df), "CIDEr"), (CiderD(df=self.df), "CIDErD")
29 |         ]
30 | 
31 |         # =================================================
32 |         # Compute scores
33 |         # =================================================
34 |         metric_scores = {}
35 |         for scorer, method in scorers:
36 |             print 'computing %s score...' % (scorer.method())
37 |             score, scores = scorer.compute_score(self.gts, self.res)
38 |             print "Mean %s score: %0.3f" % (method, score)
39 |             metric_scores[method] = list(scores)
40 |         return metric_scores
41 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'hfang'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/tokenizer/ptbtokenizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # File Name : ptbtokenizer.py
 4 | #
 5 | # Description : Do the PTB Tokenization and remove punctuations.
 6 | #
 7 | # Creation Date : 29-12-2014
 8 | # Last Modified : Thu Mar 19 09:53:35 2015
 9 | # Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
10 | 
11 | import os
12 | import pdb # python debugger
13 | import sys
14 | import subprocess
15 | import re
16 | import tempfile
17 | import itertools
18 | 
19 | # path to the stanford corenlp jar
20 | STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar'
21 | 
22 | # punctuations to be removed from the sentences
23 | PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
24 |         ".", "?", "!", ",", ":", "-", "--", "...", ";"] 
25 | 
26 | class PTBTokenizer:
27 |     """Python wrapper of Stanford PTBTokenizer"""
28 |     def __init__(self, _source='gts'):
29 |         self.source = _source
30 | 
31 |     def tokenize(self, captions_for_image):
32 |         cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, \
33 |                 'edu.stanford.nlp.process.PTBTokenizer', \
34 |                 '-preserveLines', '-lowerCase']
35 | 
36 |         # ======================================================
37 |         # prepare data for PTB Tokenizer
38 |         # ======================================================
39 | 
40 |         if self.source == 'gts':
41 |             image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))]
42 |             sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v])
43 |             final_tokenized_captions_for_image = {}
44 | 
45 |         elif self.source == 'res':
46 |             index = [i for i, v in enumerate(captions_for_image)]
47 |             image_id = [v["image_id"] for v in captions_for_image]
48 |             sentences = '\n'.join(v["caption"].replace('\n', ' ') for v in captions_for_image )
49 |             final_tokenized_captions_for_index = []
50 | 
51 |         # ======================================================
52 |         # save sentences to temporary file
53 |         # ======================================================
54 |         path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__))
55 |         tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dirname)
56 |         tmp_file.write(sentences)
57 |         tmp_file.close()
58 | 
59 |         # ======================================================
60 |         # tokenize sentence
61 |         # ======================================================
62 |         cmd.append(os.path.basename(tmp_file.name))
63 |         p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \
64 |                 stdout=subprocess.PIPE)
65 |         token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0]
66 |         lines = token_lines.split('\n')
67 |         # remove temp file
68 |         os.remove(tmp_file.name)
69 | 
70 |         # ======================================================
71 |         # create dictionary for tokenized captions
72 |         # ======================================================
73 |         if self.source == 'gts':
74 |             for k, line in zip(image_id, lines):
75 |                 if not k in final_tokenized_captions_for_image:
76 |                     final_tokenized_captions_for_image[k] = []
77 |                 tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \
78 |                         if w not in PUNCTUATIONS])
79 |                 final_tokenized_captions_for_image[k].append(tokenized_caption)
80 | 
81 |             return final_tokenized_captions_for_image
82 | 
83 |         elif self.source == 'res':
84 |             for k, img, line in zip(index, image_id, lines):
85 |                 tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \
86 |                         if w not in PUNCTUATIONS])
87 |                 final_tokenized_captions_for_index.append({'image_id': img, 'caption': [tokenized_caption]})
88 | 
89 |             return final_tokenized_captions_for_index
90 | 


--------------------------------------------------------------------------------
/coco-caption/pyciderevalcap/tokenizer/stanford-corenlp-3.4.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pyciderevalcap/tokenizer/stanford-corenlp-3.4.1.jar


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/bleu.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/bleu.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/bleu.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/bleu.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/bleu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # File Name : bleu.py
 4 | #
 5 | # Description : Wrapper for BLEU scorer.
 6 | #
 7 | # Creation Date : 06-01-2015
 8 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT
 9 | # Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
10 | 
11 | from .bleu_scorer import BleuScorer
12 | 
13 | 
14 | class Bleu:
15 |     def __init__(self, n=4):
16 |         # default compute Blue score up to 4
17 |         self._n = n
18 |         self._hypo_for_image = {}
19 |         self.ref_for_image = {}
20 | 
21 |     def compute_score(self, gts, res):
22 | 
23 |         assert(sorted(gts.keys()) == sorted(res.keys()))
24 |         imgIds = sorted(gts.keys())
25 | 
26 |         bleu_scorer = BleuScorer(n=self._n)
27 |         for id in imgIds:
28 |             hypo = res[id]
29 |             ref = gts[id]
30 | 
31 |             # Sanity check.
32 |             assert(type(hypo) is list)
33 |             assert(len(hypo) == 1)
34 |             assert(type(ref) is list)
35 |             assert(len(ref) >= 1)
36 | 
37 |             bleu_scorer += (hypo[0], ref)
38 | 
39 |         #score, scores = bleu_scorer.compute_score(option='shortest')
40 |         score, scores = bleu_scorer.compute_score(option='closest', verbose=1)
41 |         #score, scores = bleu_scorer.compute_score(option='average', verbose=1)
42 | 
43 |         # return (bleu, bleu_info)
44 |         return score, scores
45 | 
46 |     def method(self):
47 |         return "Bleu"
48 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/bleu/bleu_scorer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # bleu_scorer.py
  4 | # David Chiang <chiang@isi.edu>
  5 | 
  6 | # Copyright (c) 2004-2006 University of Maryland. All rights
  7 | # reserved. Do not redistribute without permission from the
  8 | # author. Not for commercial use.
  9 | 
 10 | # Modified by: 
 11 | # Hao Fang <hfang@uw.edu>
 12 | # Tsung-Yi Lin <tl483@cornell.edu>
 13 | 
 14 | '''Provides:
 15 | cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
 16 | cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
 17 | '''
 18 | 
 19 | import copy
 20 | import sys, math, re
 21 | from collections import defaultdict
 22 | 
 23 | def precook(s, n=4, out=False):
 24 |     """Takes a string as input and returns an object that can be given to
 25 |     either cook_refs or cook_test. This is optional: cook_refs and cook_test
 26 |     can take string arguments as well."""
 27 |     words = s.split()
 28 |     counts = defaultdict(int)
 29 |     for k in range(1,n+1):
 30 |         for i in range(len(words)-k+1):
 31 |             ngram = tuple(words[i:i+k])
 32 |             counts[ngram] += 1
 33 |     return (len(words), counts)
 34 | 
 35 | def cook_refs(refs, eff=None, n=4): ## lhuang: oracle will call with "average"
 36 |     '''Takes a list of reference sentences for a single segment
 37 |     and returns an object that encapsulates everything that BLEU
 38 |     needs to know about them.'''
 39 | 
 40 |     reflen = []
 41 |     maxcounts = {}
 42 |     for ref in refs:
 43 |         rl, counts = precook(ref, n)
 44 |         reflen.append(rl)
 45 |         for (ngram,count) in counts.items():
 46 |             maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
 47 | 
 48 |     # Calculate effective reference sentence length.
 49 |     if eff == "shortest":
 50 |         reflen = min(reflen)
 51 |     elif eff == "average":
 52 |         reflen = float(sum(reflen))/len(reflen)
 53 | 
 54 |     ## lhuang: N.B.: leave reflen computaiton to the very end!!
 55 |     
 56 |     ## lhuang: N.B.: in case of "closest", keep a list of reflens!! (bad design)
 57 | 
 58 |     return (reflen, maxcounts)
 59 | 
 60 | def cook_test(test, reflen, refmaxcounts, eff=None, n=4):
 61 |     '''Takes a test sentence and returns an object that
 62 |     encapsulates everything that BLEU needs to know about it.'''
 63 | 
 64 |     testlen, counts = precook(test, n, True)
 65 | 
 66 |     result = {}
 67 | 
 68 |     # Calculate effective reference sentence length.
 69 |     
 70 |     if eff == "closest":
 71 |         result["reflen"] = min((abs(l-testlen), l) for l in reflen)[1]
 72 |     else: ## i.e., "average" or "shortest" or None
 73 |         result["reflen"] = reflen
 74 | 
 75 |     result["testlen"] = testlen
 76 | 
 77 |     result["guess"] = [max(0,testlen-k+1) for k in range(1,n+1)]
 78 | 
 79 |     result['correct'] = [0]*n
 80 |     for (ngram, count) in counts.items():
 81 |         result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count)
 82 | 
 83 |     return result
 84 | 
 85 | class BleuScorer(object):
 86 |     """Bleu scorer.
 87 |     """
 88 | 
 89 |     __slots__ = "n", "crefs", "ctest", "_score", "_ratio", "_testlen", "_reflen", "special_reflen"
 90 |     # special_reflen is used in oracle (proportional effective ref len for a node).
 91 | 
 92 |     def copy(self):
 93 |         ''' copy the refs.'''
 94 |         new = BleuScorer(n=self.n)
 95 |         new.ctest = copy.copy(self.ctest)
 96 |         new.crefs = copy.copy(self.crefs)
 97 |         new._score = None
 98 |         return new
 99 | 
100 |     def __init__(self, test=None, refs=None, n=4, special_reflen=None):
101 |         ''' singular instance '''
102 | 
103 |         self.n = n
104 |         self.crefs = []
105 |         self.ctest = []
106 |         self.cook_append(test, refs)
107 |         self.special_reflen = special_reflen
108 | 
109 |     def cook_append(self, test, refs):
110 |         '''called by constructor and __iadd__ to avoid creating new instances.'''
111 |         
112 |         if refs is not None:
113 |             self.crefs.append(cook_refs(refs))
114 |             if test is not None:
115 |                 cooked_test = cook_test(test, *self.crefs[-1])
116 |                 self.ctest.append(cooked_test) ## N.B.: -1
117 |             else:
118 |                 self.ctest.append(None) # lens of crefs and ctest have to match
119 | 
120 |         self._score = None ## need to recompute
121 | 
122 |     def ratio(self, option=None):
123 |         self.compute_score(option=option)
124 |         return self._ratio
125 | 
126 |     def score_ratio(self, option=None):
127 |         '''return (bleu, len_ratio) pair'''
128 |         return (self.fscore(option=option), self.ratio(option=option))
129 | 
130 |     def score_ratio_str(self, option=None):
131 |         return "%.4f (%.2f)" % self.score_ratio(option)
132 | 
133 |     def reflen(self, option=None):
134 |         self.compute_score(option=option)
135 |         return self._reflen
136 | 
137 |     def testlen(self, option=None):
138 |         self.compute_score(option=option)
139 |         return self._testlen        
140 | 
141 |     def retest(self, new_test):
142 |         if type(new_test) is str:
143 |             new_test = [new_test]
144 |         assert len(new_test) == len(self.crefs), new_test
145 |         self.ctest = []
146 |         for t, rs in zip(new_test, self.crefs):
147 |             self.ctest.append(cook_test(t, *rs))
148 |         self._score = None
149 | 
150 |         return self
151 | 
152 |     def rescore(self, new_test):
153 |         ''' replace test(s) with new test(s), and returns the new score.'''
154 |         
155 |         return self.retest(new_test).compute_score()
156 | 
157 |     def size(self):
158 |         assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
159 |         return len(self.crefs)
160 | 
161 |     def __iadd__(self, other):
162 |         '''add an instance (e.g., from another sentence).'''
163 | 
164 |         if type(other) is tuple:
165 |             ## avoid creating new BleuScorer instances
166 |             self.cook_append(other[0], other[1])
167 |         else:
168 |             assert self.compatible(other), "incompatible BLEUs."
169 |             self.ctest.extend(other.ctest)
170 |             self.crefs.extend(other.crefs)
171 |             self._score = None ## need to recompute
172 | 
173 |         return self        
174 | 
175 |     def compatible(self, other):
176 |         return isinstance(other, BleuScorer) and self.n == other.n
177 | 
178 |     def single_reflen(self, option="average"):
179 |         return self._single_reflen(self.crefs[0][0], option)
180 | 
181 |     def _single_reflen(self, reflens, option=None, testlen=None):
182 |         
183 |         if option == "shortest":
184 |             reflen = min(reflens)
185 |         elif option == "average":
186 |             reflen = float(sum(reflens))/len(reflens)
187 |         elif option == "closest":
188 |             reflen = min((abs(l-testlen), l) for l in reflens)[1]
189 |         else:
190 |             assert False, "unsupported reflen option %s" % option
191 | 
192 |         return reflen
193 | 
194 |     def recompute_score(self, option=None, verbose=0):
195 |         self._score = None
196 |         return self.compute_score(option, verbose)
197 |         
198 |     def compute_score(self, option=None, verbose=0):
199 |         n = self.n
200 |         small = 1e-9
201 |         tiny = 1e-15 ## so that if guess is 0 still return 0
202 |         bleu_list = [[] for _ in range(n)]
203 | 
204 |         if self._score is not None:
205 |             return self._score
206 | 
207 |         if option is None:
208 |             option = "average" if len(self.crefs) == 1 else "closest"
209 | 
210 |         self._testlen = 0
211 |         self._reflen = 0
212 |         totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n}
213 | 
214 |         # for each sentence
215 |         for comps in self.ctest:            
216 |             testlen = comps['testlen']
217 |             self._testlen += testlen
218 | 
219 |             if self.special_reflen is None: ## need computation
220 |                 reflen = self._single_reflen(comps['reflen'], option, testlen)
221 |             else:
222 |                 reflen = self.special_reflen
223 | 
224 |             self._reflen += reflen
225 |                 
226 |             for key in ['guess','correct']:
227 |                 for k in range(n):
228 |                     totalcomps[key][k] += comps[key][k]
229 | 
230 |             # append per image bleu score
231 |             bleu = 1.
232 |             for k in range(n):
233 |                 bleu *= (float(comps['correct'][k]) + tiny) \
234 |                         /(float(comps['guess'][k]) + small) 
235 |                 bleu_list[k].append(bleu ** (1./(k+1)))
236 |             ratio = (testlen + tiny) / (reflen + small) ## N.B.: avoid zero division
237 |             if ratio < 1:
238 |                 for k in range(n):
239 |                     bleu_list[k][-1] *= math.exp(1 - 1/ratio)
240 | 
241 |             if verbose > 1:
242 |                 print(comps, reflen)
243 | 
244 |         totalcomps['reflen'] = self._reflen
245 |         totalcomps['testlen'] = self._testlen
246 | 
247 |         bleus = []
248 |         bleu = 1.
249 |         for k in range(n):
250 |             bleu *= float(totalcomps['correct'][k] + tiny) \
251 |                     / (totalcomps['guess'][k] + small)
252 |             bleus.append(bleu ** (1./(k+1)))
253 |         ratio = (self._testlen + tiny) / (self._reflen + small) ## N.B.: avoid zero division
254 |         if ratio < 1:
255 |             for k in range(n):
256 |                 bleus[k] *= math.exp(1 - 1/ratio)
257 | 
258 |         if verbose > 0:
259 |             print(totalcomps)
260 |             print("ratio:%f"%ratio)
261 | 
262 |         self._score = bleus
263 |         return self._score, bleu_list
264 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/cider.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/cider.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/cider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/cider.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/cider/cider.py:
--------------------------------------------------------------------------------
 1 | # Filename: cider.py
 2 | #
 3 | # Description: Describes the class to compute the CIDEr (Consensus-Based Image Description Evaluation) Metric 
 4 | #               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 5 | #
 6 | # Creation Date: Sun Feb  8 14:16:54 2015
 7 | #
 8 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
 9 | 
10 | from .cider_scorer import CiderScorer
11 | import pdb
12 | 
13 | class Cider:
14 |     """
15 |     Main Class to compute the CIDEr metric 
16 | 
17 |     """
18 |     def __init__(self, test=None, refs=None, n=4, sigma=6.0):
19 |         # set cider to sum over 1 to 4-grams
20 |         self._n = n
21 |         # set the standard deviation parameter for gaussian penalty
22 |         self._sigma = sigma
23 | 
24 |     def compute_score(self, gts, res):
25 |         """
26 |         Main function to compute CIDEr score
27 |         :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
28 |                 ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
29 |         :return: cider (float) : computed CIDEr score for the corpus 
30 |         """
31 | 
32 |         assert(sorted(gts.keys()) == sorted(res.keys()))
33 |         imgIds = sorted(gts.keys())
34 | 
35 |         cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)
36 | 
37 |         for id in imgIds:
38 |             hypo = res[id]
39 |             ref = gts[id]
40 | 
41 |             # Sanity check.
42 |             assert(type(hypo) is list)
43 |             assert(len(hypo) == 1)
44 |             assert(type(ref) is list)
45 |             assert(len(ref) >= 1)
46 | 
47 |             cider_scorer += (hypo[0], ref)
48 | 
49 |         (score, scores) = cider_scorer.compute_score()
50 | 
51 |         return score, scores
52 | 
53 |     def method(self):
54 |         return "CIDEr"
55 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tylin'
 2 | from .tokenizer.ptbtokenizer import PTBTokenizer
 3 | from .bleu.bleu import Bleu
 4 | from .meteor.meteor import Meteor
 5 | from .rouge.rouge import Rouge
 6 | from .cider.cider import Cider
 7 | from .spice.spice import Spice
 8 | 
 9 | class COCOEvalCap:
10 |     def __init__(self, coco, cocoRes):
11 |         self.evalImgs = []
12 |         self.eval = {}
13 |         self.imgToEval = {}
14 |         self.coco = coco
15 |         self.cocoRes = cocoRes
16 |         self.params = {'image_id': coco.getImgIds()}
17 | 
18 |     def evaluate(self):
19 |         imgIds = self.params['image_id']
20 |         # imgIds = self.coco.getImgIds()
21 |         gts = {}
22 |         res = {}
23 |         for imgId in imgIds:
24 |             gts[imgId] = self.coco.imgToAnns[imgId]
25 |             res[imgId] = self.cocoRes.imgToAnns[imgId]
26 | 
27 |         # =================================================
28 |         # Set up scorers
29 |         # =================================================
30 |         print('tokenization...')
31 |         tokenizer = PTBTokenizer()
32 |         gts  = tokenizer.tokenize(gts)
33 |         res = tokenizer.tokenize(res)
34 | 
35 |         # =================================================
36 |         # Set up scorers
37 |         # =================================================
38 |         print('setting up scorers...')
39 |         scorers = [
40 |             (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
41 |             (Meteor(),"METEOR"),
42 |             (Rouge(), "ROUGE_L"),
43 |             (Cider(), "CIDEr"),
44 |             (Spice(), "SPICE")
45 |         ]
46 | 
47 |         # =================================================
48 |         # Compute scores
49 |         # =================================================
50 |         for scorer, method in scorers:
51 |             print('computing %s score...'%(scorer.method()))
52 |             score, scores = scorer.compute_score(gts, res)
53 |             if type(method) == list:
54 |                 for sc, scs, m in zip(score, scores, method):
55 |                     self.setEval(sc, m)
56 |                     self.setImgToEvalImgs(scs, gts.keys(), m)
57 |                     print("%s: %0.3f"%(m, sc))
58 |             else:
59 |                 self.setEval(score, method)
60 |                 self.setImgToEvalImgs(scores, gts.keys(), method)
61 |                 print("%s: %0.3f"%(method, score))
62 |         self.setEvalImgs()
63 | 
64 |     def setEval(self, score, method):
65 |         self.eval[method] = score
66 | 
67 |     def setImgToEvalImgs(self, scores, imgIds, method):
68 |         for imgId, score in zip(sorted(imgIds), scores):
69 |             if not imgId in self.imgToEval:
70 |                 self.imgToEval[imgId] = {}
71 |                 self.imgToEval[imgId]["image_id"] = imgId
72 |             self.imgToEval[imgId][method] = score
73 | 
74 |     def setEvalImgs(self):
75 |         self.evalImgs = [self.imgToEval[imgId] for imgId in sorted(self.imgToEval.keys())]
76 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/meteor/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/meteor/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/__pycache__/meteor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/meteor/__pycache__/meteor.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/__pycache__/meteor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/meteor/__pycache__/meteor.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/meteor-1.5.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/meteor/meteor-1.5.jar


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/meteor/meteor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Python wrapper for METEOR implementation, by Xinlei Chen
 4 | # Acknowledge Michael Denkowski for the generous discussion and help 
 5 | 
 6 | import os
 7 | import sys
 8 | import subprocess
 9 | import threading
10 | 
11 | # Assumes meteor-1.5.jar is in the same directory as meteor.py.  Change as needed.
12 | METEOR_JAR = 'meteor-1.5.jar'
13 | # print METEOR_JAR
14 | 
15 | class Meteor:
16 | 
17 |     def __init__(self):
18 |         self.meteor_cmd = ['java', '-jar', '-Xmx2G', METEOR_JAR, \
19 |                 '-', '-', '-stdio', '-l', 'en', '-norm']
20 |         self.meteor_p = subprocess.Popen(self.meteor_cmd, \
21 |                 cwd=os.path.dirname(os.path.abspath(__file__)), \
22 |                 stdin=subprocess.PIPE, \
23 |                 stdout=subprocess.PIPE, \
24 |                 stderr=subprocess.PIPE)
25 |         # Used to guarantee thread safety
26 |         self.lock = threading.Lock()
27 | 
28 |     def compute_score(self, gts, res):
29 |         assert(sorted(gts.keys()) == sorted(res.keys()))
30 |         imgIds = sorted(gts.keys())
31 |         scores = []
32 | 
33 |         eval_line = 'EVAL'
34 |         self.lock.acquire()
35 |         for i in imgIds:
36 |             assert(len(res[i]) == 1)
37 |             stat = self._stat(res[i][0], gts[i])
38 |             eval_line += ' ||| {}'.format(stat)
39 | 
40 |         self.meteor_p.stdin.write('{}\n'.format(eval_line).encode())
41 |         self.meteor_p.stdin.flush()
42 |         for i in range(0, len(imgIds)):
43 |             scores.append(float(self.meteor_p.stdout.readline().decode().strip()))
44 |         score = float(self.meteor_p.stdout.readline().decode().strip())
45 |         self.lock.release()
46 | 
47 |         return score, scores
48 | 
49 |     def method(self):
50 |         return "METEOR"
51 | 
52 |     def _stat(self, hypothesis_str, reference_list):
53 |         # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words
54 |         hypothesis_str = hypothesis_str.replace('|||','').replace('  ',' ')
55 |         score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str))
56 |         self.meteor_p.stdin.write('{}\n'.format(score_line).encode())
57 |         self.meteor_p.stdin.flush()
58 |         return self.meteor_p.stdout.readline().decode().strip()
59 | 
60 |     def _score(self, hypothesis_str, reference_list):
61 |         self.lock.acquire()
62 |         # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words
63 |         hypothesis_str = hypothesis_str.replace('|||','').replace('  ',' ')
64 |         score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str))
65 |         self.meteor_p.stdin.write('{}\n'.format(score_line).encode())
66 |         self.meteor_p.stdin.flush()
67 |         stats = self.meteor_p.stdout.readline().decode().strip()
68 |         eval_line = 'EVAL ||| {}'.format(stats)
69 |         # EVAL ||| stats 
70 |         self.meteor_p.stdin.write('{}\n'.format(eval_line).encode())
71 |         self.meteor_p.stdin.flush()
72 |         score = float(self.meteor_p.stdout.readline().decode().strip())
73 |         # bug fix: there are two values returned by the jar file, one average, and one all, so do it twice
74 |         # thanks for Andrej for pointing this out
75 |         score = float(self.meteor_p.stdout.readline().strip())
76 |         self.lock.release()
77 |         return score
78 |  
79 |     def __exit__(self):
80 |         self.lock.acquire()
81 |         self.meteor_p.stdin.close()
82 |         self.meteor_p.kill()
83 |         self.meteor_p.wait()
84 |         self.lock.release()
85 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'vrama91'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/rouge/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/rouge/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/__pycache__/rouge.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/rouge/__pycache__/rouge.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/__pycache__/rouge.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/rouge/__pycache__/rouge.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/rouge/rouge.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # 
  3 | # File Name : rouge.py
  4 | #
  5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
  6 | #
  7 | # Creation Date : 2015-01-07 06:03
  8 | # Author : Ramakrishna Vedantam <vrama91@vt.edu>
  9 | 
 10 | import numpy as np
 11 | import pdb
 12 | 
 13 | def my_lcs(string, sub):
 14 |     """
 15 |     Calculates longest common subsequence for a pair of tokenized strings
 16 |     :param string : list of str : tokens from a string split using whitespace
 17 |     :param sub : list of str : shorter string, also split using whitespace
 18 |     :returns: length (list of int): length of the longest common subsequence between the two strings
 19 | 
 20 |     Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
 21 |     """
 22 |     if(len(string)< len(sub)):
 23 |         sub, string = string, sub
 24 | 
 25 |     lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)]
 26 | 
 27 |     for j in range(1,len(sub)+1):
 28 |         for i in range(1,len(string)+1):
 29 |             if(string[i-1] == sub[j-1]):
 30 |                 lengths[i][j] = lengths[i-1][j-1] + 1
 31 |             else:
 32 |                 lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1])
 33 | 
 34 |     return lengths[len(string)][len(sub)]
 35 | 
 36 | class Rouge():
 37 |     '''
 38 |     Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set
 39 | 
 40 |     '''
 41 |     def __init__(self):
 42 |         # vrama91: updated the value below based on discussion with Hovey
 43 |         self.beta = 1.2
 44 | 
 45 |     def calc_score(self, candidate, refs):
 46 |         """
 47 |         Compute ROUGE-L score given one candidate and references for an image
 48 |         :param candidate: str : candidate sentence to be evaluated
 49 |         :param refs: list of str : COCO reference sentences for the particular image to be evaluated
 50 |         :returns score: int (ROUGE-L score for the candidate evaluated against references)
 51 |         """
 52 |         assert(len(candidate)==1)	
 53 |         assert(len(refs)>0)         
 54 |         prec = []
 55 |         rec = []
 56 | 
 57 |         # split into tokens
 58 |         token_c = candidate[0].split(" ")
 59 |     	
 60 |         for reference in refs:
 61 |             # split into tokens
 62 |             token_r = reference.split(" ")
 63 |             # compute the longest common subsequence
 64 |             lcs = my_lcs(token_r, token_c)
 65 |             prec.append(lcs/float(len(token_c)))
 66 |             rec.append(lcs/float(len(token_r)))
 67 | 
 68 |         prec_max = max(prec)
 69 |         rec_max = max(rec)
 70 | 
 71 |         if(prec_max!=0 and rec_max !=0):
 72 |             score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max)
 73 |         else:
 74 |             score = 0.0
 75 |         return score
 76 | 
 77 |     def compute_score(self, gts, res):
 78 |         """
 79 |         Computes Rouge-L score given a set of reference and candidate sentences for the dataset
 80 |         Invoked by evaluate_captions.py 
 81 |         :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values 
 82 |         :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
 83 |         :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
 84 |         """
 85 |         assert(sorted(gts.keys()) == sorted(res.keys()))
 86 |         imgIds = sorted(gts.keys())
 87 | 
 88 |         score = []
 89 |         for id in imgIds:
 90 |             hypo = res[id]
 91 |             ref  = gts[id]
 92 | 
 93 |             score.append(self.calc_score(hypo, ref))
 94 | 
 95 |             # Sanity check.
 96 |             assert(type(hypo) is list)
 97 |             assert(len(hypo) == 1)
 98 |             assert(type(ref) is list)
 99 |             assert(len(ref) >= 1)
100 | 
101 |         average_score = np.mean(np.array(score))
102 |         return average_score, np.array(score)
103 | 
104 |     def method(self):
105 |         return "Rouge"
106 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'hfang'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-35.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-36.pyc


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/ptbtokenizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # File Name : ptbtokenizer.py
 4 | #
 5 | # Description : Do the PTB Tokenization and remove punctuations.
 6 | #
 7 | # Creation Date : 29-12-2014
 8 | # Last Modified : Thu Mar 19 09:53:35 2015
 9 | # Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
10 | 
11 | import os
12 | import sys
13 | import subprocess
14 | import tempfile
15 | import itertools
16 | 
17 | # path to the stanford corenlp jar
18 | STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar'
19 | 
20 | # punctuations to be removed from the sentences
21 | PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
22 |         ".", "?", "!", ",", ":", "-", "--", "...", ";"] 
23 | 
24 | class PTBTokenizer:
25 |     """Python wrapper of Stanford PTBTokenizer"""
26 | 
27 |     def tokenize(self, captions_for_image):
28 |         cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, \
29 |                 'edu.stanford.nlp.process.PTBTokenizer', \
30 |                 '-preserveLines', '-lowerCase']
31 | 
32 |         # ======================================================
33 |         # prepare data for PTB Tokenizer
34 |         # ======================================================
35 |         final_tokenized_captions_for_image = {}
36 |         image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))]
37 |         sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v])
38 | 
39 |         # ======================================================
40 |         # save sentences to temporary file
41 |         # ======================================================
42 |         path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__))
43 |         tmp_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=path_to_jar_dirname)
44 |         tmp_file.write(sentences)
45 |         tmp_file.close()
46 | 
47 |         # ======================================================
48 |         # tokenize sentence
49 |         # ======================================================
50 |         cmd.append(os.path.basename(tmp_file.name))
51 |         p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \
52 |                 stdout=subprocess.PIPE)
53 |         token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0]
54 |         lines = token_lines.decode().split('\n')
55 |         # remove temp file
56 |         os.remove(tmp_file.name)
57 | 
58 |         # ======================================================
59 |         # create dictionary for tokenized captions
60 |         # ======================================================
61 |         for k, line in zip(image_id, lines):
62 |             if not k in final_tokenized_captions_for_image:
63 |                 final_tokenized_captions_for_image[k] = []
64 |             tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \
65 |                     if w not in PUNCTUATIONS])
66 |             final_tokenized_captions_for_image[k].append(tokenized_caption)
67 | 
68 |         return final_tokenized_captions_for_image
69 | 


--------------------------------------------------------------------------------
/coco-caption/pycocoevalcap/tokenizer/stanford-corenlp-3.4.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/coco-caption/pycocoevalcap/tokenizer/stanford-corenlp-3.4.1.jar


--------------------------------------------------------------------------------
/coco-caption/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/coco-caption/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/cocoapi-master/LuaAPI/cocoDemo.lua:
--------------------------------------------------------------------------------
 1 | -- Demo for the CocoApi (see CocoApi.lua)
 2 | coco = require 'coco'
 3 | image = require 'image'
 4 | 
 5 | -- initialize COCO api (please specify dataType/annType below)
 6 | annTypes = { 'instances', 'captions', 'person_keypoints' }
 7 | dataType, annType = 'val2014', annTypes[1]; -- specify dataType/annType
 8 | annFile = '../annotations/'..annType..'_'..dataType..'.json'
 9 | cocoApi=coco.CocoApi(annFile)
10 | 
11 | -- get all image ids, select one at random
12 | imgIds = cocoApi:getImgIds()
13 | imgId = imgIds[torch.random(imgIds:numel())]
14 | 
15 | -- load image
16 | img = cocoApi:loadImgs(imgId)[1]
17 | I = image.load('../images/'..dataType..'/'..img.file_name,3)
18 | 
19 | -- load and display instance annotations
20 | annIds = cocoApi:getAnnIds({imgId=imgId})
21 | anns = cocoApi:loadAnns(annIds)
22 | J = cocoApi:showAnns(I,anns)
23 | image.save('RES_'..img.file_name,J:double())
24 | 


--------------------------------------------------------------------------------
/cocoapi-master/LuaAPI/env.lua:
--------------------------------------------------------------------------------
 1 | --[[----------------------------------------------------------------------------
 2 | 
 3 | Common Objects in COntext (COCO) Toolbox.      version 3.0
 4 | Data, paper, and tutorials available at:  http://mscoco.org/
 5 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016.
 6 | Licensed under the Simplified BSD License [see coco/license.txt]
 7 | 
 8 | ------------------------------------------------------------------------------]]
 9 | 
10 | local coco = {}
11 | return coco
12 | 


--------------------------------------------------------------------------------
/cocoapi-master/LuaAPI/init.lua:
--------------------------------------------------------------------------------
 1 | --[[----------------------------------------------------------------------------
 2 | 
 3 | Common Objects in COntext (COCO) Toolbox.      version 3.0
 4 | Data, paper, and tutorials available at:  http://mscoco.org/
 5 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016.
 6 | Licensed under the Simplified BSD License [see coco/license.txt]
 7 | 
 8 | ------------------------------------------------------------------------------]]
 9 | 
10 | local coco = require 'coco.env'
11 | require 'coco.CocoApi'
12 | require 'coco.MaskApi'
13 | return coco
14 | 


--------------------------------------------------------------------------------
/cocoapi-master/LuaAPI/rocks/coco-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "coco"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |   url = "git://github.com/pdollar/coco.git"
 6 | }
 7 | 
 8 | description = {
 9 |   summary = "Interface for accessing the Microsoft COCO dataset",
10 |   detailed = "See http://mscoco.org/ for more details",
11 |   homepage = "https://github.com/pdollar/coco",
12 |   license = "Simplified BSD"
13 | }
14 | 
15 | dependencies = {
16 |   "lua >= 5.1",
17 |   "torch >= 7.0",
18 |   "lua-cjson"
19 | }
20 | 
21 | build = {
22 |   type = "builtin",
23 |   modules = {
24 |     ["coco.env"] = "LuaAPI/env.lua",
25 |     ["coco.init"] = "LuaAPI/init.lua",
26 |     ["coco.MaskApi"] = "LuaAPI/MaskApi.lua",
27 |     ["coco.CocoApi"] = "LuaAPI/CocoApi.lua",
28 |     libmaskapi = {
29 |       sources = { "common/maskApi.c" },
30 |       incdirs = { "common/" }
31 |     }
32 |   }
33 | }
34 | 
35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec
36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec
37 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/MaskApi.m:
--------------------------------------------------------------------------------
  1 | classdef MaskApi
  2 |   % Interface for manipulating masks stored in RLE format.
  3 |   %
  4 |   % RLE is a simple yet efficient format for storing binary masks. RLE
  5 |   % first divides a vector (or vectorized image) into a series of piecewise
  6 |   % constant regions and then for each piece simply stores the length of
  7 |   % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
  8 |   % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
  9 |   % (note that the odd counts are always the numbers of zeros). Instead of
 10 |   % storing the counts directly, additional compression is achieved with a
 11 |   % variable bitrate representation based on a common scheme called LEB128.
 12 |   %
 13 |   % Compression is greatest given large piecewise constant regions.
 14 |   % Specifically, the size of the RLE is proportional to the number of
 15 |   % *boundaries* in M (or for an image the number of boundaries in the y
 16 |   % direction). Assuming fairly simple shapes, the RLE representation is
 17 |   % O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 18 |   % is substantially lower, especially for large simple objects (large n).
 19 |   %
 20 |   % Many common operations on masks can be computed directly using the RLE
 21 |   % (without need for decoding). This includes computations such as area,
 22 |   % union, intersection, etc. All of these operations are linear in the
 23 |   % size of the RLE, in other words they are O(sqrt(n)) where n is the area
 24 |   % of the object. Computing these operations on the original mask is O(n).
 25 |   % Thus, using the RLE can result in substantial computational savings.
 26 |   %
 27 |   % The following API functions are defined:
 28 |   %  encode - Encode binary masks using RLE.
 29 |   %  decode - Decode binary masks encoded via RLE.
 30 |   %  merge  - Compute union or intersection of encoded masks.
 31 |   %  iou    - Compute intersection over union between masks.
 32 |   %  nms    - Compute non-maximum suppression between ordered masks.
 33 |   %  area   - Compute area of encoded masks.
 34 |   %  toBbox - Get bounding boxes surrounding encoded masks.
 35 |   %  frBbox - Convert bounding boxes to encoded masks.
 36 |   %  frPoly - Convert polygon to encoded mask.
 37 |   %
 38 |   % Usage:
 39 |   %  Rs     = MaskApi.encode( masks )
 40 |   %  masks  = MaskApi.decode( Rs )
 41 |   %  R      = MaskApi.merge( Rs, [intersect=false] )
 42 |   %  o      = MaskApi.iou( dt, gt, [iscrowd=false] )
 43 |   %  keep   = MaskApi.nms( dt, thr )
 44 |   %  a      = MaskApi.area( Rs )
 45 |   %  bbs    = MaskApi.toBbox( Rs )
 46 |   %  Rs     = MaskApi.frBbox( bbs, h, w )
 47 |   %  R      = MaskApi.frPoly( poly, h, w )
 48 |   %
 49 |   % In the API the following formats are used:
 50 |   %  R,Rs   - [struct] Run-length encoding of binary mask(s)
 51 |   %  masks  - [hxwxn] Binary mask(s) (must have type uint8)
 52 |   %  bbs    - [nx4] Bounding box(es) stored as [x y w h]
 53 |   %  poly   - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...}
 54 |   %  dt,gt  - May be either bounding boxes or encoded masks
 55 |   % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 56 |   %
 57 |   % Finally, a note about the intersection over union (iou) computation.
 58 |   % The standard iou of a ground truth (gt) and detected (dt) object is
 59 |   %  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 60 |   % For "crowd" regions, we use a modified criteria. If a gt object is
 61 |   % marked as "iscrowd", we allow a dt to match any subregion of the gt.
 62 |   % Choosing gt' in the crowd gt that best matches the dt can be done using
 63 |   % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 64 |   %  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 65 |   % For crowd gt regions we use this modified criteria above for the iou.
 66 |   %
 67 |   % To compile use the following (some precompiled binaries are included):
 68 |   %   mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',...
 69 |   %     'private/maskApiMex.c','../common/maskApi.c',...
 70 |   %     '-I../common/','-outdir','private');
 71 |   % Please do not contact us for help with compiling.
 72 |   %
 73 |   % Microsoft COCO Toolbox.      version 2.0
 74 |   % Data, paper, and tutorials available at:  http://mscoco.org/
 75 |   % Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 76 |   % Licensed under the Simplified BSD License [see coco/license.txt]
 77 |   
 78 |   methods( Static )
 79 |     function Rs = encode( masks )
 80 |       Rs = maskApiMex( 'encode', masks );
 81 |     end
 82 |     
 83 |     function masks = decode( Rs )
 84 |       masks = maskApiMex( 'decode', Rs );
 85 |     end
 86 |     
 87 |     function R = merge( Rs, varargin )
 88 |       R = maskApiMex( 'merge', Rs, varargin{:} );
 89 |     end
 90 |     
 91 |     function o = iou( dt, gt, varargin )
 92 |       o = maskApiMex( 'iou', dt', gt', varargin{:} );
 93 |     end
 94 |     
 95 |     function keep = nms( dt, thr )
 96 |       keep = maskApiMex('nms',dt',thr);
 97 |     end
 98 |     
 99 |     function a = area( Rs )
100 |       a = maskApiMex( 'area', Rs );
101 |     end
102 |     
103 |     function bbs = toBbox( Rs )
104 |       bbs = maskApiMex( 'toBbox', Rs )';
105 |     end
106 |     
107 |     function Rs = frBbox( bbs, h, w )
108 |       Rs = maskApiMex( 'frBbox', bbs', h, w );
109 |     end
110 |     
111 |     function R = frPoly( poly, h, w )
112 |       R = maskApiMex( 'frPoly', poly, h , w );
113 |     end
114 |   end
115 |   
116 | end
117 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/cocoDemo.m:
--------------------------------------------------------------------------------
 1 | %% Demo for the CocoApi (see CocoApi.m)
 2 | 
 3 | %% initialize COCO api (please specify dataType/annType below)
 4 | annTypes = { 'instances', 'captions', 'person_keypoints' };
 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType
 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType);
 7 | coco=CocoApi(annFile);
 8 | 
 9 | %% display COCO categories and supercategories
10 | if( ~strcmp(annType,'captions') )
11 |   cats = coco.loadCats(coco.getCatIds());
12 |   nms={cats.name}; fprintf('COCO categories: ');
13 |   fprintf('%s, ',nms{:}); fprintf('\n');
14 |   nms=unique({cats.supercategory}); fprintf('COCO supercategories: ');
15 |   fprintf('%s, ',nms{:}); fprintf('\n');
16 | end
17 | 
18 | %% get all images containing given categories, select one at random
19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'});
20 | imgIds = coco.getImgIds('catIds',catIds);
21 | imgId = imgIds(randi(length(imgIds)));
22 | 
23 | %% load and display image
24 | img = coco.loadImgs(imgId);
25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name));
26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[])
27 | 
28 | %% load and display annotations
29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]);
30 | anns = coco.loadAnns(annIds); coco.showAnns(anns);
31 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/evalDemo.m:
--------------------------------------------------------------------------------
 1 | %% Demo demonstrating the algorithm result formats for COCO
 2 | 
 3 | %% select results type for demo (either bbox or segm)
 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here
 5 | fprintf('Running demo for *%s* results.\n\n',type);
 6 | 
 7 | %% initialize COCO ground truth api
 8 | dataDir='../'; prefix='instances'; dataType='val2014';
 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end
10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType);
11 | cocoGt=CocoApi(annFile);
12 | 
13 | %% initialize COCO detections api
14 | resFile='%s/results/%s_%s_fake%s100_results.json';
15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type);
16 | cocoDt=cocoGt.loadRes(resFile);
17 | 
18 | %% visialuze gt and dt side by side
19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100);
20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId);
21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name));
22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off;
23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth')
24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns);
25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off;
26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results')
27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns);
28 | 
29 | %% load raw JSON and show exact format for results
30 | fprintf('results structure have the following format:\n');
31 | res = gason(fileread(resFile)); disp(res)
32 | 
33 | %% the following command can be used to save the results back to disk
34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end
35 | 
36 | %% run COCO evaluation code (see CocoEval.m)
37 | cocoEval=CocoEval(cocoGt,cocoDt,type);
38 | cocoEval.params.imgIds=imgIds;
39 | cocoEval.evaluate();
40 | cocoEval.accumulate();
41 | cocoEval.summarize();
42 | 
43 | %% generate Derek Hoiem style analyis of false positives (slow)
44 | if(0), cocoEval.analyze(); end
45 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/gason.m:
--------------------------------------------------------------------------------
 1 | function out = gason( in )
 2 | % Convert between JSON strings and corresponding JSON objects.
 3 | %
 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev:
 5 | %                 https://github.com/vivkin/gason
 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the
 7 | % above link for license information and additional details about Gason.
 8 | %
 9 | % Given a JSON string, gason calls the C++ parser and converts the output
10 | % into an appropriate Matlab structure. As the parsing is performed in mex
11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take
12 | % only a few seconds to parse (compared to hours for pure Matlab parsers).
13 | %
14 | % Given a JSON object, gason calls the C++ encoder to convert the object
15 | % back into a JSON string representation. Nearly any Matlab struct, cell
16 | % array, or numeric array represent a valid JSON object. Note that gason()
17 | % can be used to go both from JSON string to JSON object and back.
18 | %
19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or
20 | % later). The following command compiles the parser (may require tweaking):
21 | %   mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',...
22 | %     'private/gasonMex.cpp','../common/gason.cpp',...
23 | %     '-I../common/','-outdir','private');
24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries
25 | % are included, please do not contact us for help with compiling. If needed
26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'.
27 | %
28 | % Note that by default JSON arrays that contain only numbers are stored as
29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of
30 | % the same type are stored as Matlab struct arrays. This is much faster and
31 | % can use considerably less memory than always using Matlab cell arrays.
32 | %
33 | % USAGE
34 | %  object = gason( string )
35 | %  string = gason( object )
36 | %
37 | % INPUTS/OUTPUTS
38 | %  string     - JSON string
39 | %  object     - JSON object
40 | %
41 | % EXAMPLE
42 | %  o = struct('first',{'piotr','ty'},'last',{'dollar','lin'})
43 | %  s = gason( o ) % convert JSON object -> JSON string
44 | %  p = gason( s ) % convert JSON string -> JSON object
45 | %
46 | % See also
47 | %
48 | % Microsoft COCO Toolbox.      version 2.0
49 | % Data, paper, and tutorials available at:  http://mscoco.org/
50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
51 | % Licensed under the Simplified BSD License [see coco/license.txt]
52 | 
53 | out = gasonMex( 'convert', in );
54 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/private/gasonMex.cpp:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "gason.h"
  8 | #include "mex.h"
  9 | #include "string.h"
 10 | #include "math.h"
 11 | #include <cstdint>
 12 | #include <iomanip>
 13 | #include <sstream>
 14 | typedef std::ostringstream ostrm;
 15 | typedef unsigned long siz;
 16 | typedef unsigned short ushort;
 17 | 
 18 | siz length( const JsonValue &a ) {
 19 |   // get number of elements in JSON_ARRAY or JSON_OBJECT
 20 |   siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k;
 21 | }
 22 | 
 23 | bool isRegularObjArray( const JsonValue &a ) {
 24 |   // check if all JSON_OBJECTs in JSON_ARRAY have the same fields
 25 |   JsonValue o=a.toNode()->value; siz k, n; const char **keys;
 26 |   n=length(o); keys=new const char*[n];
 27 |   k=0; for(auto j:o) keys[k++]=j->key;
 28 |   for( auto i:a ) {
 29 |     if(length(i->value)!=n) return false; k=0;
 30 |     for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false;
 31 |   }
 32 |   delete [] keys; return true;
 33 | }
 34 | 
 35 | mxArray* json( const JsonValue &o ) {
 36 |   // convert JsonValue to Matlab mxArray
 37 |   siz k, m, n; mxArray *M; const char **keys;
 38 |   switch( o.getTag() ) {
 39 |     case JSON_NUMBER:
 40 |       return mxCreateDoubleScalar(o.toNumber());
 41 |     case JSON_STRING:
 42 |       return mxCreateString(o.toString());
 43 |     case JSON_ARRAY: {
 44 |       if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL);
 45 |       JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag();
 46 |       n=length(o); bool isRegular=true;
 47 |       for(auto i:o) isRegular=isRegular && i->value.getTag()==tag;
 48 |       if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) {
 49 |         m=length(o0); keys=new const char*[m];
 50 |         k=0; for(auto j:o0) keys[k++]=j->key;
 51 |         M = mxCreateStructMatrix(1,n,m,keys);
 52 |         k=0; for(auto i:o) { m=0; for(auto j:i->value)
 53 |           mxSetFieldByNumber(M,k,m++,json(j->value)); k++; }
 54 |         delete [] keys; return M;
 55 |       } else if( isRegular && tag==JSON_NUMBER ) {
 56 |         M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M);
 57 |         k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M;
 58 |       } else {
 59 |         M = mxCreateCellMatrix(1,n);
 60 |         k=0; for(auto i:o) mxSetCell(M,k++,json(i->value));
 61 |         return M;
 62 |       }
 63 |     }
 64 |     case JSON_OBJECT:
 65 |       if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL);
 66 |       n=length(o); keys=new const char*[n];
 67 |       k=0; for(auto i:o) keys[k++]=i->key;
 68 |       M = mxCreateStructMatrix(1,1,n,keys); k=0;
 69 |       for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value));
 70 |       delete [] keys; return M;
 71 |     case JSON_TRUE:
 72 |       return mxCreateDoubleScalar(1);
 73 |     case JSON_FALSE:
 74 |       return mxCreateDoubleScalar(0);
 75 |     case JSON_NULL:
 76 |       return mxCreateDoubleMatrix(0,0,mxREAL);
 77 |     default: return NULL;
 78 |   }
 79 | }
 80 | 
 81 | template<class T, class C> ostrm& json( ostrm &S, T *A, siz n ) {
 82 |   // convert numeric array to JSON string with casting
 83 |   if(n==0) { S<<"[]"; return S; } if(n==1) { S<<C(A[0]); return S; }
 84 |   S<<"["; for(siz i=0; i<n-1; i++) S<<C(A[i])<<",";
 85 |   S<<C(A[n-1]); S<<"]"; return S;
 86 | }
 87 | 
 88 | template<class T> ostrm& json( ostrm &S, T *A, siz n ) {
 89 |   // convert numeric array to JSON string without casting
 90 |   return json<T,T>(S,A,n);
 91 | }
 92 | 
 93 | ostrm& json( ostrm &S, const char *A ) {
 94 |   // convert char array to JSON string (handle escape characters)
 95 |   #define RPL(a,b) case a: { S << b; A++; break; }
 96 |   S << "\""; while( *A>0 ) switch( *A ) {
 97 |     RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b");
 98 |     RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t");
 99 |     default: S << *A; A++;
100 |   }
101 |   S << "\""; return S;
102 | }
103 | 
104 | ostrm& json( ostrm& S, const JsonValue *o ) {
105 |   // convert JsonValue to JSON string
106 |   switch( o->getTag() ) {
107 |     case JSON_NUMBER: S << o->toNumber(); return S;
108 |     case JSON_TRUE:   S << "true"; return S;
109 |     case JSON_FALSE:  S << "false"; return S;
110 |     case JSON_NULL:   S << "null"; return S;
111 |     case JSON_STRING: return json(S,o->toString());
112 |     case JSON_ARRAY:
113 |       S << "["; for(auto i:*o) {
114 |         json(S,&i->value) << (i->next ? "," : ""); }
115 |       S << "]"; return S;
116 |     case JSON_OBJECT:
117 |       S << "{"; for(auto i:*o) {
118 |         json(S,i->key) << ":";
119 |         json(S,&i->value) << (i->next ? "," : ""); }
120 |       S << "}"; return S;
121 |     default: return S;
122 |   }
123 | }
124 | 
125 | ostrm& json( ostrm& S, const mxArray *M ) {
126 |   // convert Matlab mxArray to JSON string
127 |   siz i, j, m, n=mxGetNumberOfElements(M);
128 |   void *A=mxGetData(M); ostrm *nms;
129 |   switch( mxGetClassID(M) ) {
130 |     case mxDOUBLE_CLASS:  return json(S,(double*)   A,n);
131 |     case mxSINGLE_CLASS:  return json(S,(float*)    A,n);
132 |     case mxINT64_CLASS:   return json(S,(int64_t*)  A,n);
133 |     case mxUINT64_CLASS:  return json(S,(uint64_t*) A,n);
134 |     case mxINT32_CLASS:   return json(S,(int32_t*)  A,n);
135 |     case mxUINT32_CLASS:  return json(S,(uint32_t*) A,n);
136 |     case mxINT16_CLASS:   return json(S,(int16_t*)  A,n);
137 |     case mxUINT16_CLASS:  return json(S,(uint16_t*) A,n);
138 |     case mxINT8_CLASS:    return json<int8_t,int32_t>(S,(int8_t*) A,n);
139 |     case mxUINT8_CLASS:   return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
140 |     case mxLOGICAL_CLASS: return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
141 |     case mxCHAR_CLASS:    return json(S,mxArrayToString(M));
142 |     case mxCELL_CLASS:
143 |       S << "["; for(i=0; i<n-1; i++) json(S,mxGetCell(M,i)) << ",";
144 |       if(n>0) json(S,mxGetCell(M,n-1)); S << "]"; return S;
145 |     case mxSTRUCT_CLASS:
146 |       if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M);
147 |       if(m==0) { S<<"["; for(i=0; i<n; i++) S<<"{},"; S<<"]"; return S; }
148 |       if(n>1) S<<"["; nms=new ostrm[m];
149 |       for(j=0; j<m; j++) json(nms[j],mxGetFieldNameByNumber(M,j));
150 |       for(i=0; i<n; i++) for(j=0; j<m; j++) {
151 |         if(j==0) S << "{"; S << nms[j].str() << ":";
152 |         json(S,mxGetFieldByNumber(M,i,j)) << ((j<m-1) ? "," : "}");
153 |         if(j==m-1 && i<n-1) S<<",";
154 |       }
155 |       if(n>1) S<<"]"; delete [] nms; return S;
156 |     default:
157 |       mexErrMsgTxt( "Unknown type." ); return S;
158 |   }
159 | }
160 | 
161 | mxArray* mxCreateStringRobust( const char* str ) {
162 |   // convert char* to Matlab string (robust version of mxCreateString)
163 |   mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)};
164 |   M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M);
165 |   for( siz i=0; i<n[1]; i++ ) c[i]=str[i]; return M;
166 | }
167 | 
168 | char* mxArrayToStringRobust( const mxArray *M ) {
169 |   // convert Matlab string to char* (robust version of mxArrayToString)
170 |   if(!mxIsChar(M)) mexErrMsgTxt("String expected.");
171 |   ushort *c=(ushort*) mxGetData(M); char* str; siz n;
172 |   n=mxGetNumberOfElements(M); str=(char*) mxMalloc(n+1);
173 |   for( siz i=0; i<n; i++ ) str[i]=c[i]; str[n]=0; return str;
174 | }
175 | 
176 | void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
177 | {
178 |   char action[1024]; if(!nr) mexErrMsgTxt("Inputs expected.");
179 |   mxGetString(pr[0],action,1024); nr--; pr++;
180 |   char *endptr; JsonValue val; JsonAllocator allocator;
181 |   if( nl>1 ) mexErrMsgTxt("One output expected.");
182 |   
183 |   if(!strcmp(action,"convert")) {
184 |     if( nr!=1 ) mexErrMsgTxt("One input expected.");
185 |     if( mxGetClassID(pr[0])==mxCHAR_CLASS ) {
186 |       // object = mexFunction( string )
187 |       char *str = mxArrayToStringRobust(pr[0]);
188 |       int status = jsonParse(str, &endptr, &val, allocator);
189 |       if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
190 |       pl[0] = json(val); mxFree(str);
191 |     } else {
192 |       // string = mexFunction( object )
193 |       ostrm S; S << std::setprecision(12); json(S,pr[0]);
194 |       pl[0]=mxCreateStringRobust(S.str().c_str());
195 |     }
196 |     
197 |   } else if(!strcmp(action,"split")) {
198 |     // strings = mexFunction( string, k )
199 |     if( nr!=2 ) mexErrMsgTxt("Two input expected.");
200 |     char *str = mxArrayToStringRobust(pr[0]);
201 |     int status = jsonParse(str, &endptr, &val, allocator);
202 |     if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
203 |     if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
204 |     siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]);
205 |     k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k));
206 |     pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<<std::setprecision(12);
207 |     for(auto o:val) {
208 |       if(!t) { S.str(std::string()); S << "["; t=ceil(double(n)/k); }
209 |       json(S,&o->value); t--; if(!o->next) t=0; S << (t ? "," : "]");
210 |       if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str()));
211 |     }
212 |     
213 |   } else if(!strcmp(action,"merge")) {
214 |     // string = mexFunction( strings )
215 |     if( nr!=1 ) mexErrMsgTxt("One input expected.");
216 |     if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected.");
217 |     siz n = mxGetNumberOfElements(pr[0]);
218 |     ostrm S; S << std::setprecision(12); S << "[";
219 |     for( siz i=0; i<n; i++ ) {
220 |       char *str = mxArrayToStringRobust(mxGetCell(pr[0],i));
221 |       int status = jsonParse(str, &endptr, &val, allocator);
222 |       if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
223 |       if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
224 |       for(auto j:val) json(S,&j->value) << (j->next ? "," : "");
225 |       mxFree(str); if(i<n-1) S<<",";
226 |     }
227 |     S << "]"; pl[0]=mxCreateStringRobust(S.str().c_str());
228 |     
229 |   } else mexErrMsgTxt("Invalid action.");
230 | }
231 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/private/gasonMex.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/MatlabAPI/private/gasonMex.mexa64


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/private/gasonMex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/MatlabAPI/private/gasonMex.mexmaci64


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/private/getPrmDflt.m:
--------------------------------------------------------------------------------
 1 | function varargout = getPrmDflt( prm, dfs, checkExtra )
 2 | % Helper to set default values (if not already set) of parameter struct.
 3 | %
 4 | % Takes input parameters and a list of 'name'/default pairs, and for each
 5 | % 'name' for which prm has no value (prm.(name) is not a field or 'name'
 6 | % does not appear in prm list), getPrmDflt assigns the given default
 7 | % value. If default value for variable 'name' is 'REQ', and value for
 8 | % 'name' is not given, an error is thrown. See below for usage details.
 9 | %
10 | % USAGE (nargout==1)
11 | %  prm = getPrmDflt( prm, dfs, [checkExtra] )
12 | %
13 | % USAGE (nargout>1)
14 | %  [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] )
15 | %
16 | % INPUTS
17 | %  prm          - param struct or cell of form {'name1' v1 'name2' v2 ...}
18 | %  dfs          - cell of form {'name1' def1 'name2' def2 ...}
19 | %  checkExtra   - [0] if 1 throw error if prm contains params not in dfs
20 | %                 if -1 if prm contains params not in dfs adds them
21 | %
22 | % OUTPUTS (nargout==1)
23 | %  prm    - parameter struct with fields 'name1' through 'nameN' assigned
24 | %
25 | % OUTPUTS (nargout>1)
26 | %  param1 - value assigned to parameter with 'name1'
27 | %   ...
28 | %  paramN - value assigned to parameter with 'nameN'
29 | %
30 | % EXAMPLE
31 | %  dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 };
32 | %  prm = getPrmDflt( struct('x',1,'y',1), dfs )
33 | %  [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs )
34 | %
35 | % See also INPUTPARSER
36 | %
37 | % Piotr's Computer Vision Matlab Toolbox      Version 2.60
38 | % Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
39 | % Licensed under the Simplified BSD License [see external/bsd.txt]
40 | 
41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end
42 | if nargin<=2, checkExtra = 0; end
43 | 
44 | % get the input parameters as two cell arrays: prmVal and prmField
45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end
46 | if iscell(prm)
47 |   if(mod(length(prm),2)), error('odd number of parameters in prm'); end
48 |   prmField = prm(1:2:end); prmVal = prm(2:2:end);
49 | else
50 |   if(~isstruct(prm)), error('prm must be a struct or a cell'); end
51 |   prmVal = struct2cell(prm); prmField = fieldnames(prm);
52 | end
53 | 
54 | % get and update default values using quick for loop
55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end);
56 | if checkExtra>0
57 |   for i=1:length(prmField)
58 |     j = find(strcmp(prmField{i},dfsField));
59 |     if isempty(j), error('parameter %s is not valid', prmField{i}); end
60 |     dfsVal(j) = prmVal(i);
61 |   end
62 | elseif checkExtra<0
63 |   for i=1:length(prmField)
64 |     j = find(strcmp(prmField{i},dfsField));
65 |     if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end
66 |     dfsVal(j) = prmVal(i);
67 |   end
68 | else
69 |   for i=1:length(prmField)
70 |     dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i);
71 |   end
72 | end
73 | 
74 | % check for missing values
75 | if any(strcmp('REQ',dfsVal))
76 |   cmpArray = find(strcmp('REQ',dfsVal));
77 |   error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] );
78 | end
79 | 
80 | % set output
81 | if nargout==1
82 |   varargout{1} = cell2struct( dfsVal, dfsField, 2 );
83 | else
84 |   varargout = dfsVal;
85 | end
86 | 


--------------------------------------------------------------------------------
/cocoapi-master/MatlabAPI/private/maskApiMex.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "mex.h"
  8 | #include "maskApi.h"
  9 | #include <string.h>
 10 | 
 11 | void checkType( const mxArray *M, mxClassID id ) {
 12 |   if(mxGetClassID(M)!=id) mexErrMsgTxt("Invalid type.");
 13 | }
 14 | 
 15 | mxArray* toMxArray( const RLE *R, siz n ) {
 16 |   const char *fs[] = {"size", "counts"};
 17 |   mxArray *M=mxCreateStructMatrix(1,n,2,fs);
 18 |   for( siz i=0; i<n; i++ ) {
 19 |     mxArray *S=mxCreateNumericMatrix(1,2,mxDOUBLE_CLASS,mxREAL);
 20 |     mxSetFieldByNumber(M,i,0,S); double *s=mxGetPr(S);
 21 |     s[0]=R[i].h; s[1]=R[i].w; char *c=rleToString(R+i);
 22 |     mxSetFieldByNumber(M,i,1,mxCreateString(c)); free(c);
 23 |   }
 24 |   return M;
 25 | }
 26 | 
 27 | RLE* frMxArray( const mxArray *M, siz *n, bool same ) {
 28 |   const char *fs[] = {"size", "counts"}; siz i, j, m, h, w, O[2];
 29 |   const char *err="Invalid RLE struct array.";
 30 |   *n=mxGetNumberOfElements(M); RLE *R; rlesInit(&R,*n); if(!(*n)) return R;
 31 |   if(!mxIsStruct(M) || mxGetNumberOfFields(M)!=2) mexErrMsgTxt(err);
 32 |   for( i=0; i<2; i++ ) { O[i]=2; for( j=0; j<2; j++ ) {
 33 |     if(!strcmp(mxGetFieldNameByNumber(M,j),fs[i])) O[i]=j; }}
 34 |   for( i=0; i<2; i++ ) if(O[i]>1) mexErrMsgTxt(err);
 35 |   for( i=0; i<*n; i++ ) {
 36 |     mxArray *S, *C; double *s; void *c;
 37 |     S=mxGetFieldByNumber(M,i,O[0]); checkType(S,mxDOUBLE_CLASS);
 38 |     C=mxGetFieldByNumber(M,i,O[1]); s=mxGetPr(S); c=mxGetData(C);
 39 |     h=(siz)s[0]; w=(siz)s[1]; m=mxGetNumberOfElements(C);
 40 |     if(same && i>0 && (h!=R[0].h || w!=R[0].w)) mexErrMsgTxt(err);
 41 |     if( mxGetClassID(C)==mxDOUBLE_CLASS ) {
 42 |       rleInit(R+i,h,w,m,0);
 43 |       for(j=0; j<m; j++) R[i].cnts[j]=(uint)((double*)c)[j];
 44 |     } else if( mxGetClassID(C)==mxUINT32_CLASS ) {
 45 |       rleInit(R+i,h,w,m,(uint*)c);
 46 |     } else if( mxGetClassID(C)==mxCHAR_CLASS ) {
 47 |       char *c=mxMalloc(sizeof(char)*(m+1)); mxGetString(C,c,m+1);
 48 |       rleFrString(R+i,c,h,w); mxFree(c);
 49 |     }
 50 |     else mexErrMsgTxt(err);
 51 |   }
 52 |   return R;
 53 | }
 54 | 
 55 | void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
 56 | {
 57 |   char action[1024]; RLE *R=0; siz h=0, w=0, n=0;
 58 |   mxGetString(pr[0],action,1024); nr--; pr++;
 59 |   
 60 |   if(!strcmp(action,"encode")) {
 61 |     checkType(pr[0],mxUINT8_CLASS); byte *M=(byte*) mxGetData(pr[0]);
 62 |     const mwSize *ds=mxGetDimensions(pr[0]); n=mxGetN(pr[0])/ds[1];
 63 |     rlesInit(&R,n); rleEncode(R,M,ds[0],ds[1],n); pl[0]=toMxArray(R,n);
 64 |     
 65 |   } else if(!strcmp(action,"decode")) {
 66 |     R=frMxArray(pr[0],&n,1); mwSize ds[3];
 67 |     ds[0]=n?R[0].h:0; ds[1]=n?R[0].w:0; ds[2]=n;
 68 |     pl[0]=mxCreateNumericArray(3,ds,mxUINT8_CLASS,mxREAL);
 69 |     byte *M=(byte*) mxGetPr(pl[0]); rleDecode(R,M,n);
 70 |     
 71 |   } else if(!strcmp(action,"merge")) {
 72 |     R=frMxArray(pr[0],&n,1); RLE M;
 73 |     bool intersect = (nr>=2) ? (mxGetScalar(pr[1])>0) : false;
 74 |     rleMerge(R,&M,n,intersect); pl[0]=toMxArray(&M,1); rleFree(&M);
 75 |     
 76 |   } else if(!strcmp(action,"area")) {
 77 |     R=frMxArray(pr[0],&n,0);
 78 |     pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
 79 |     uint *a=(uint*) mxGetPr(pl[0]); rleArea(R,n,a);
 80 |     
 81 |   } else if(!strcmp(action,"iou")) {
 82 |     if(nr>2) checkType(pr[2],mxUINT8_CLASS); siz nDt, nGt;
 83 |     byte *iscrowd = nr>2 ? (byte*) mxGetPr(pr[2]) : NULL;
 84 |     if(mxIsStruct(pr[0]) || mxIsStruct(pr[1])) {
 85 |       RLE *dt=frMxArray(pr[0],&nDt,1), *gt=frMxArray(pr[1],&nGt,1);
 86 |       pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL);
 87 |       double *o=mxGetPr(pl[0]); rleIou(dt,gt,nDt,nGt,iscrowd,o);
 88 |       rlesFree(&dt,nDt); rlesFree(&gt,nGt);
 89 |     } else {
 90 |       checkType(pr[0],mxDOUBLE_CLASS); checkType(pr[1],mxDOUBLE_CLASS);
 91 |       double *dt=mxGetPr(pr[0]); nDt=mxGetN(pr[0]);
 92 |       double *gt=mxGetPr(pr[1]); nGt=mxGetN(pr[1]);
 93 |       pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL);
 94 |       double *o=mxGetPr(pl[0]); bbIou(dt,gt,nDt,nGt,iscrowd,o);
 95 |     }
 96 |     
 97 |   } else if(!strcmp(action,"nms")) {
 98 |     siz n; uint *keep; double thr=(double) mxGetScalar(pr[1]);
 99 |     if(mxIsStruct(pr[0])) {
100 |       RLE *dt=frMxArray(pr[0],&n,1);
101 |       pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
102 |       keep=(uint*) mxGetPr(pl[0]); rleNms(dt,n,keep,thr);
103 |       rlesFree(&dt,n);
104 |     } else {
105 |       checkType(pr[0],mxDOUBLE_CLASS);
106 |       double *dt=mxGetPr(pr[0]); n=mxGetN(pr[0]);
107 |       pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
108 |       keep=(uint*) mxGetPr(pl[0]); bbNms(dt,n,keep,thr);
109 |     }
110 |     
111 |   } else if(!strcmp(action,"toBbox")) {
112 |     R=frMxArray(pr[0],&n,0);
113 |     pl[0]=mxCreateNumericMatrix(4,n,mxDOUBLE_CLASS,mxREAL);
114 |     BB bb=mxGetPr(pl[0]); rleToBbox(R,bb,n);
115 |     
116 |   } else if(!strcmp(action,"frBbox")) {
117 |     checkType(pr[0],mxDOUBLE_CLASS);
118 |     double *bb=mxGetPr(pr[0]); n=mxGetN(pr[0]);
119 |     h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]);
120 |     rlesInit(&R,n); rleFrBbox(R,bb,h,w,n); pl[0]=toMxArray(R,n);
121 |     
122 |   } else if(!strcmp(action,"frPoly")) {
123 |     checkType(pr[0],mxCELL_CLASS); n=mxGetNumberOfElements(pr[0]);
124 |     h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); rlesInit(&R,n);
125 |     for(siz i=0; i<n; i++) {
126 |       mxArray *XY=mxGetCell(pr[0],i); checkType(XY,mxDOUBLE_CLASS);
127 |       siz k=mxGetNumberOfElements(XY)/2; double *xy=mxGetPr(XY);
128 |       rleFrPoly(R+i,xy,k,h,w);
129 |     }
130 |     RLE M; rleMerge(R,&M,n,0); pl[0]=toMxArray(&M,1); rleFree(&M);
131 |     
132 |   } else mexErrMsgTxt("Invalid action.");
133 |   if( R!=0 ) { rlesFree(&R,n); R=0; }
134 | }
135 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |     # install pycocotools locally
3 | 	python setup.py build_ext --inplace
4 | 	rm -rf build
5 | 
6 | install:
7 | 	# install pycocotools to the Python site-packages
8 | 	python setup.py build_ext install
9 | 	rm -rf build


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/build/common/maskApi.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/PythonAPI/build/common/maskApi.o


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/build/lib.linux-x86_64-2.7/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/build/lib.linux-x86_64-2.7/pycocotools/_mask.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/PythonAPI/build/lib.linux-x86_64-2.7/pycocotools/_mask.so


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/build/lib.linux-x86_64-2.7/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/build/temp.linux-x86_64-2.7/pycocotools/_mask.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/PythonAPI/build/temp.linux-x86_64-2.7/pycocotools/_mask.o


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/dist/pycocotools-2.0-py2.7-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/PythonAPI/dist/pycocotools-2.0-py2.7-linux-x86_64.egg


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: pycocotools
 3 | Version: 2.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | setup.py
 2 | ../common/maskApi.c
 3 | pycocotools/__init__.py
 4 | pycocotools/_mask.c
 5 | pycocotools/coco.py
 6 | pycocotools/cocoeval.py
 7 | pycocotools/mask.py
 8 | pycocotools.egg-info/PKG-INFO
 9 | pycocotools.egg-info/SOURCES.txt
10 | pycocotools.egg-info/dependency_links.txt
11 | pycocotools.egg-info/requires.txt
12 | pycocotools.egg-info/top_level.txt


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | setuptools>=18.0
2 | cython>=0.27.3
3 | matplotlib>=2.1.0
4 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools/_mask.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/cocoapi-master/PythonAPI/pycocotools/_mask.so


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/cocoapi-master/PythonAPI/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | import numpy as np
 3 | 
 4 | # To compile and install locally run "python setup.py build_ext --inplace"
 5 | # To install library to Python site-packages run "python setup.py build_ext install"
 6 | 
 7 | ext_modules = [
 8 |     Extension(
 9 |         'pycocotools._mask',
10 |         sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'],
11 |         include_dirs = [np.get_include(), '../common'],
12 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
13 |     )
14 | ]
15 | 
16 | setup(
17 |     name='pycocotools',
18 |     packages=['pycocotools'],
19 |     package_dir = {'pycocotools': 'pycocotools'},
20 |     install_requires=[
21 |         'setuptools>=18.0',
22 |         'cython>=0.27.3',
23 |         'matplotlib>=2.1.0'
24 |     ],
25 |     version='2.0',
26 |     ext_modules= ext_modules
27 | )
28 | 


--------------------------------------------------------------------------------
/cocoapi-master/README.txt:
--------------------------------------------------------------------------------
 1 | COCO API - http://cocodataset.org/
 2 | 
 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality.
 4 | 
 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website.
 6 | -Please download, unzip, and place the images in: coco/images/
 7 | -Please download and place the annotations in: coco/annotations/
 8 | For substantially more details on the API please see http://cocodataset.org/#download.
 9 | 
10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage.
11 | 
12 | To install:
13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided)
14 | -For Python, run "make" under coco/PythonAPI
15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/
16 | 


--------------------------------------------------------------------------------
/cocoapi-master/common/gason.cpp:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #include "gason.h"
  3 | #include <stdlib.h>
  4 | 
  5 | #define JSON_ZONE_SIZE 4096
  6 | #define JSON_STACK_SIZE 32
  7 | 
  8 | const char *jsonStrError(int err) {
  9 |     switch (err) {
 10 | #define XX(no, str) \
 11 |     case JSON_##no: \
 12 |         return str;
 13 |         JSON_ERRNO_MAP(XX)
 14 | #undef XX
 15 |     default:
 16 |         return "unknown";
 17 |     }
 18 | }
 19 | 
 20 | void *JsonAllocator::allocate(size_t size) {
 21 |     size = (size + 7) & ~7;
 22 | 
 23 |     if (head && head->used + size <= JSON_ZONE_SIZE) {
 24 |         char *p = (char *)head + head->used;
 25 |         head->used += size;
 26 |         return p;
 27 |     }
 28 | 
 29 |     size_t allocSize = sizeof(Zone) + size;
 30 |     Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize);
 31 |     if (zone == nullptr)
 32 |         return nullptr;
 33 |     zone->used = allocSize;
 34 |     if (allocSize <= JSON_ZONE_SIZE || head == nullptr) {
 35 |         zone->next = head;
 36 |         head = zone;
 37 |     } else {
 38 |         zone->next = head->next;
 39 |         head->next = zone;
 40 |     }
 41 |     return (char *)zone + sizeof(Zone);
 42 | }
 43 | 
 44 | void JsonAllocator::deallocate() {
 45 |     while (head) {
 46 |         Zone *next = head->next;
 47 |         free(head);
 48 |         head = next;
 49 |     }
 50 | }
 51 | 
 52 | static inline bool isspace(char c) {
 53 |     return c == ' ' || (c >= '\t' && c <= '\r');
 54 | }
 55 | 
 56 | static inline bool isdelim(char c) {
 57 |     return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c;
 58 | }
 59 | 
 60 | static inline bool isdigit(char c) {
 61 |     return c >= '0' && c <= '9';
 62 | }
 63 | 
 64 | static inline bool isxdigit(char c) {
 65 |     return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F');
 66 | }
 67 | 
 68 | static inline int char2int(char c) {
 69 |     if (c <= '9')
 70 |         return c - '0';
 71 |     return (c & ~' ') - 'A' + 10;
 72 | }
 73 | 
 74 | static double string2double(char *s, char **endptr) {
 75 |     char ch = *s;
 76 |     if (ch == '-')
 77 |         ++s;
 78 | 
 79 |     double result = 0;
 80 |     while (isdigit(*s))
 81 |         result = (result * 10) + (*s++ - '0');
 82 | 
 83 |     if (*s == '.') {
 84 |         ++s;
 85 | 
 86 |         double fraction = 1;
 87 |         while (isdigit(*s)) {
 88 |             fraction *= 0.1;
 89 |             result += (*s++ - '0') * fraction;
 90 |         }
 91 |     }
 92 | 
 93 |     if (*s == 'e' || *s == 'E') {
 94 |         ++s;
 95 | 
 96 |         double base = 10;
 97 |         if (*s == '+')
 98 |             ++s;
 99 |         else if (*s == '-') {
100 |             ++s;
101 |             base = 0.1;
102 |         }
103 | 
104 |         unsigned int exponent = 0;
105 |         while (isdigit(*s))
106 |             exponent = (exponent * 10) + (*s++ - '0');
107 | 
108 |         double power = 1;
109 |         for (; exponent; exponent >>= 1, base *= base)
110 |             if (exponent & 1)
111 |                 power *= base;
112 | 
113 |         result *= power;
114 |     }
115 | 
116 |     *endptr = s;
117 |     return ch == '-' ? -result : result;
118 | }
119 | 
120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) {
121 |     if (!tail)
122 |         return node->next = node;
123 |     node->next = tail->next;
124 |     tail->next = node;
125 |     return node;
126 | }
127 | 
128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) {
129 |     if (tail) {
130 |         auto head = tail->next;
131 |         tail->next = nullptr;
132 |         return JsonValue(tag, head);
133 |     }
134 |     return JsonValue(tag, nullptr);
135 | }
136 | 
137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) {
138 |     JsonNode *tails[JSON_STACK_SIZE];
139 |     JsonTag tags[JSON_STACK_SIZE];
140 |     char *keys[JSON_STACK_SIZE];
141 |     JsonValue o;
142 |     int pos = -1;
143 |     bool separator = true;
144 |     JsonNode *node;
145 |     *endptr = s;
146 | 
147 |     while (*s) {
148 |         while (isspace(*s)) {
149 |             ++s;
150 |             if (!*s) break;
151 |         }
152 |         *endptr = s++;
153 |         switch (**endptr) {
154 |         case '-':
155 |             if (!isdigit(*s) && *s != '.') {
156 |                 *endptr = s;
157 |                 return JSON_BAD_NUMBER;
158 |             }
159 |         case '0':
160 |         case '1':
161 |         case '2':
162 |         case '3':
163 |         case '4':
164 |         case '5':
165 |         case '6':
166 |         case '7':
167 |         case '8':
168 |         case '9':
169 |             o = JsonValue(string2double(*endptr, &s));
170 |             if (!isdelim(*s)) {
171 |                 *endptr = s;
172 |                 return JSON_BAD_NUMBER;
173 |             }
174 |             break;
175 |         case '"':
176 |             o = JsonValue(JSON_STRING, s);
177 |             for (char *it = s; *s; ++it, ++s) {
178 |                 int c = *it = *s;
179 |                 if (c == '\\') {
180 |                     c = *++s;
181 |                     switch (c) {
182 |                     case '\\':
183 |                     case '"':
184 |                     case '/':
185 |                         *it = c;
186 |                         break;
187 |                     case 'b':
188 |                         *it = '\b';
189 |                         break;
190 |                     case 'f':
191 |                         *it = '\f';
192 |                         break;
193 |                     case 'n':
194 |                         *it = '\n';
195 |                         break;
196 |                     case 'r':
197 |                         *it = '\r';
198 |                         break;
199 |                     case 't':
200 |                         *it = '\t';
201 |                         break;
202 |                     case 'u':
203 |                         c = 0;
204 |                         for (int i = 0; i < 4; ++i) {
205 |                             if (isxdigit(*++s)) {
206 |                                 c = c * 16 + char2int(*s);
207 |                             } else {
208 |                                 *endptr = s;
209 |                                 return JSON_BAD_STRING;
210 |                             }
211 |                         }
212 |                         if (c < 0x80) {
213 |                             *it = c;
214 |                         } else if (c < 0x800) {
215 |                             *it++ = 0xC0 | (c >> 6);
216 |                             *it = 0x80 | (c & 0x3F);
217 |                         } else {
218 |                             *it++ = 0xE0 | (c >> 12);
219 |                             *it++ = 0x80 | ((c >> 6) & 0x3F);
220 |                             *it = 0x80 | (c & 0x3F);
221 |                         }
222 |                         break;
223 |                     default:
224 |                         *endptr = s;
225 |                         return JSON_BAD_STRING;
226 |                     }
227 |                 } else if ((unsigned int)c < ' ' || c == '\x7F') {
228 |                     *endptr = s;
229 |                     return JSON_BAD_STRING;
230 |                 } else if (c == '"') {
231 |                     *it = 0;
232 |                     ++s;
233 |                     break;
234 |                 }
235 |             }
236 |             if (!isdelim(*s)) {
237 |                 *endptr = s;
238 |                 return JSON_BAD_STRING;
239 |             }
240 |             break;
241 |         case 't':
242 |             if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3])))
243 |                 return JSON_BAD_IDENTIFIER;
244 |             o = JsonValue(JSON_TRUE);
245 |             s += 3;
246 |             break;
247 |         case 'f':
248 |             if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4])))
249 |                 return JSON_BAD_IDENTIFIER;
250 |             o = JsonValue(JSON_FALSE);
251 |             s += 4;
252 |             break;
253 |         case 'n':
254 |             if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3])))
255 |                 return JSON_BAD_IDENTIFIER;
256 |             o = JsonValue(JSON_NULL);
257 |             s += 3;
258 |             break;
259 |         case ']':
260 |             if (pos == -1)
261 |                 return JSON_STACK_UNDERFLOW;
262 |             if (tags[pos] != JSON_ARRAY)
263 |                 return JSON_MISMATCH_BRACKET;
264 |             o = listToValue(JSON_ARRAY, tails[pos--]);
265 |             break;
266 |         case '}':
267 |             if (pos == -1)
268 |                 return JSON_STACK_UNDERFLOW;
269 |             if (tags[pos] != JSON_OBJECT)
270 |                 return JSON_MISMATCH_BRACKET;
271 |             if (keys[pos] != nullptr)
272 |                 return JSON_UNEXPECTED_CHARACTER;
273 |             o = listToValue(JSON_OBJECT, tails[pos--]);
274 |             break;
275 |         case '[':
276 |             if (++pos == JSON_STACK_SIZE)
277 |                 return JSON_STACK_OVERFLOW;
278 |             tails[pos] = nullptr;
279 |             tags[pos] = JSON_ARRAY;
280 |             keys[pos] = nullptr;
281 |             separator = true;
282 |             continue;
283 |         case '{':
284 |             if (++pos == JSON_STACK_SIZE)
285 |                 return JSON_STACK_OVERFLOW;
286 |             tails[pos] = nullptr;
287 |             tags[pos] = JSON_OBJECT;
288 |             keys[pos] = nullptr;
289 |             separator = true;
290 |             continue;
291 |         case ':':
292 |             if (separator || keys[pos] == nullptr)
293 |                 return JSON_UNEXPECTED_CHARACTER;
294 |             separator = true;
295 |             continue;
296 |         case ',':
297 |             if (separator || keys[pos] != nullptr)
298 |                 return JSON_UNEXPECTED_CHARACTER;
299 |             separator = true;
300 |             continue;
301 |         case '\0':
302 |             continue;
303 |         default:
304 |             return JSON_UNEXPECTED_CHARACTER;
305 |         }
306 | 
307 |         separator = false;
308 | 
309 |         if (pos == -1) {
310 |             *endptr = s;
311 |             *value = o;
312 |             return JSON_OK;
313 |         }
314 | 
315 |         if (tags[pos] == JSON_OBJECT) {
316 |             if (!keys[pos]) {
317 |                 if (o.getTag() != JSON_STRING)
318 |                     return JSON_UNQUOTED_KEY;
319 |                 keys[pos] = o.toString();
320 |                 continue;
321 |             }
322 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr)
323 |                 return JSON_ALLOCATION_FAILURE;
324 |             tails[pos] = insertAfter(tails[pos], node);
325 |             tails[pos]->key = keys[pos];
326 |             keys[pos] = nullptr;
327 |         } else {
328 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr)
329 |                 return JSON_ALLOCATION_FAILURE;
330 |             tails[pos] = insertAfter(tails[pos], node);
331 |         }
332 |         tails[pos]->value = o;
333 |     }
334 |     return JSON_BREAKING_BAD;
335 | }
336 | 


--------------------------------------------------------------------------------
/cocoapi-master/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/cocoapi-master/common/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, xp, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       if(j%2==0) xp=x; else if(xp<x) { ys=0; ye=h-1; }
142 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
143 |     }
144 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
145 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
146 |   }
147 | }
148 | 
149 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
150 |   siz i; for( i=0; i<n; i++ ) {
151 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
152 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
153 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
154 |     rleFrPoly( R+i, xy, 4, h, w );
155 |   }
156 | }
157 | 
158 | int uintCompare(const void *a, const void *b) {
159 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
160 | }
161 | 
162 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
163 |   /* upsample and get discrete points densely along entire boundary */
164 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
165 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
166 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
167 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
168 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
169 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
170 |   for( j=0; j<k; j++ ) {
171 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
172 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
173 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
174 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
175 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
176 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
177 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
178 |     } else for( d=0; d<=dy; d++ ) {
179 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
180 |     }
181 |   }
182 |   /* get points along y-boundary and downsample */
183 |   free(x); free(y); k=m; m=0; double xd, yd;
184 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
185 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
186 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
187 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
188 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
189 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
190 |     x[m]=(int) xd; y[m]=(int) yd; m++;
191 |   }
192 |   /* compute rle encoding given y-boundary points */
193 |   k=m; a=malloc(sizeof(uint)*(k+1));
194 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
195 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
196 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
197 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
198 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
199 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
200 |     j++; if(j<k) b[m-1]+=a[j++]; }
201 |   rleInit(R,h,w,m,b); free(a); free(b);
202 | }
203 | 
204 | char* rleToString( const RLE *R ) {
205 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
206 |   siz i, m=R->m, p=0; long x; int more;
207 |   char *s=malloc(sizeof(char)*m*6);
208 |   for( i=0; i<m; i++ ) {
209 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
210 |     while( more ) {
211 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
212 |       if(more) c |= 0x20; c+=48; s[p++]=c;
213 |     }
214 |   }
215 |   s[p]=0; return s;
216 | }
217 | 
218 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
219 |   siz m=0, p=0, k; long x; int more; uint *cnts;
220 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
221 |   while( s[p] ) {
222 |     x=0; k=0; more=1;
223 |     while( more ) {
224 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
225 |       more = c & 0x20; p++; k++;
226 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
227 |     }
228 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
229 |   }
230 |   rleInit(R,h,w,m,cnts); free(cnts);
231 | }
232 | 


--------------------------------------------------------------------------------
/cocoapi-master/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/cocoapi-master/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/cocoapi-master/results/val2014_fake_eval_res.txt:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------------
 2 | type=segm
 3 | Running per image evaluation...      DONE (t=0.45s).
 4 | Accumulating evaluation results...   DONE (t=0.08s).
 5 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.320
 6 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.562
 7 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.299
 8 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.387
 9 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.310
10 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.327
11 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.268
12 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.415
13 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.417
14 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.469
15 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.377
16 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.381
17 | 
18 | ------------------------------------------------------------------------------
19 | type=bbox
20 | Running per image evaluation...      DONE (t=0.34s).
21 | Accumulating evaluation results...   DONE (t=0.08s).
22 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505
23 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.697
24 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.573
25 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586
26 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519
27 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501
28 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.387
29 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.594
30 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.595
31 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640
32 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566
33 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564
34 | 
35 | ------------------------------------------------------------------------------
36 | type=keypoints
37 | Running per image evaluation...      DONE (t=0.06s).
38 | Accumulating evaluation results...   DONE (t=0.00s).
39 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.372
40 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.636
41 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.348
42 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.384
43 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.386
44 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.514
45 |  Average Recall    (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.734
46 |  Average Recall    (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.504
47 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.508
48 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.522
49 | 


--------------------------------------------------------------------------------
/evaluate_models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from vocab import Vocabulary
 3 | import evaluation_models
 4 | 
 5 | # for coco
 6 | print('Evaluation on COCO:')
 7 | evaluation_models.evalrank("pretrain_model/coco/model_coco_1.pth.tar", "pretrain_model/coco/model_coco_2.pth.tar", data_path='data/', split="testall", fold5=True)
 8 | 
 9 | # for flickr
10 | print('Evaluation on Flickr30K:')
11 | evaluation_models.evalrank("pretrain_model/flickr/model_fliker_1.pth.tar", "pretrain_model/flickr/model_fliker_2.pth.tar", data_path='data/', split="test", fold5=False)
12 | 


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import pickle
  4 | 
  5 | import torch
  6 | import numpy
  7 | from data import get_test_loader
  8 | import time
  9 | import numpy as np
 10 | from vocab import Vocabulary  # NOQA
 11 | from model import VSRN, order_sim
 12 | from collections import OrderedDict
 13 | 
 14 | 
 15 | class AverageMeter(object):
 16 |     """Computes and stores the average and current value"""
 17 | 
 18 |     def __init__(self):
 19 |         self.reset()
 20 | 
 21 |     def reset(self):
 22 |         self.val = 0
 23 |         self.avg = 0
 24 |         self.sum = 0
 25 |         self.count = 0
 26 | 
 27 |     def update(self, val, n=0):
 28 |         self.val = val
 29 |         self.sum += val * n
 30 |         self.count += n
 31 |         self.avg = self.sum / (.0001 + self.count)
 32 | 
 33 |     def __str__(self):
 34 |         """String representation for logging
 35 |         """
 36 |         # for values that should be recorded exactly e.g. iteration number
 37 |         if self.count == 0:
 38 |             return str(self.val)
 39 |         # for stats
 40 |         return '%.4f (%.4f)' % (self.val, self.avg)
 41 | 
 42 | 
 43 | class LogCollector(object):
 44 |     """A collection of logging objects that can change from train to val"""
 45 | 
 46 |     def __init__(self):
 47 |         # to keep the order of logged variables deterministic
 48 |         self.meters = OrderedDict()
 49 | 
 50 |     def update(self, k, v, n=0):
 51 |         # create a new meter if previously not recorded
 52 |         if k not in self.meters:
 53 |             self.meters[k] = AverageMeter()
 54 |         self.meters[k].update(v, n)
 55 | 
 56 |     def __str__(self):
 57 |         """Concatenate the meters in one log line
 58 |         """
 59 |         s = ''
 60 |         for i, (k, v) in enumerate(self.meters.iteritems()):
 61 |             if i > 0:
 62 |                 s += '  '
 63 |             s += k + ' ' + str(v)
 64 |         return s
 65 | 
 66 |     def tb_log(self, tb_logger, prefix='', step=None):
 67 |         """Log using tensorboard
 68 |         """
 69 |         for k, v in self.meters.iteritems():
 70 |             tb_logger.log_value(prefix + k, v.val, step=step)
 71 | 
 72 | 
 73 | def encode_data(model, data_loader, log_step=10, logging=print):
 74 |     """Encode all images and captions loadable by `data_loader`
 75 |     """
 76 |     batch_time = AverageMeter()
 77 |     val_logger = LogCollector()
 78 | 
 79 |     # switch to evaluate mode
 80 |     model.val_start()
 81 | 
 82 |     end = time.time()
 83 | 
 84 |     # numpy array to keep all the embeddings
 85 |     img_embs = None
 86 |     cap_embs = None
 87 |     for i, (images, captions, lengths, ids, caption_labels, caption_masks) in enumerate(data_loader):
 88 |         # make sure val logger is used
 89 |         model.logger = val_logger
 90 | 
 91 |         # compute the embeddings
 92 |         img_emb, cap_emb, GCN_img_emd = model.forward_emb(images, captions, lengths,
 93 |                                              volatile=True)
 94 | 
 95 |         # initialize the numpy arrays given the size of the embeddings
 96 |         if img_embs is None:
 97 |             img_embs = np.zeros((len(data_loader.dataset), img_emb.size(1)))
 98 |             cap_embs = np.zeros((len(data_loader.dataset), cap_emb.size(1)))
 99 | 
100 |         # preserve the embeddings by copying from gpu and converting to numpy
101 |         img_embs[ids] = img_emb.data.cpu().numpy().copy()
102 |         cap_embs[ids] = cap_emb.data.cpu().numpy().copy()
103 | 
104 | 
105 |         del images, captions
106 | 
107 |     return img_embs, cap_embs
108 | 
109 | 
110 | 
111 | 
112 | def evalrank(model_path, data_path=None, split='dev', fold5=False):
113 |     """
114 |     Evaluate a trained model on either dev or test. If `fold5=True`, 5 fold
115 |     cross-validation is done (only for MSCOCO). Otherwise, the full data is
116 |     used for evaluation.
117 |     """
118 |     # load model and options
119 |     checkpoint = torch.load(model_path)
120 |     opt = checkpoint['opt']
121 |     if data_path is not None:
122 |         opt.data_path = data_path
123 | 
124 |     # load vocabulary used by the model
125 |     with open(os.path.join(opt.vocab_path,
126 |                            '%s_vocab.pkl' % opt.data_name), 'rb') as f:
127 |         vocab = pickle.load(f)
128 |     opt.vocab_size = len(vocab)
129 | 
130 |     # construct model
131 |     model = VSRN(opt)
132 | 
133 |     # load model state
134 |     model.load_state_dict(checkpoint['model'])
135 | 
136 |     print('Loading dataset')
137 |     data_loader = get_test_loader(split, opt.data_name, vocab, opt.crop_size,
138 |                                   opt.batch_size, opt.workers, opt)
139 | 
140 |     print('Computing results...')
141 |     img_embs, cap_embs = encode_data(model, data_loader)
142 |     print('Images: %d, Captions: %d' %
143 |           (img_embs.shape[0] / 5, cap_embs.shape[0]))
144 | 
145 |     if not fold5:
146 |         # no cross-validation, full evaluation
147 |         r, rt = i2t(img_embs, cap_embs, measure=opt.measure, return_ranks=True)
148 |         ri, rti = t2i(img_embs, cap_embs,
149 |                       measure=opt.measure, return_ranks=True)
150 |         ar = (r[0] + r[1] + r[2]) / 3
151 |         ari = (ri[0] + ri[1] + ri[2]) / 3
152 |         rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2]
153 |         print("rsum: %.1f" % rsum)
154 |         print("Average i2t Recall: %.1f" % ar)
155 |         print("Image to text: %.1f %.1f %.1f %.1f %.1f" % r)
156 |         print("Average t2i Recall: %.1f" % ari)
157 |         print("Text to image: %.1f %.1f %.1f %.1f %.1f" % ri)
158 |     else:
159 |         # 5fold cross-validation, only for MSCOCO
160 |         results = []
161 |         for i in range(5):
162 |             r, rt0 = i2t(img_embs[i * 5000:(i + 1) * 5000],
163 |                          cap_embs[i * 5000:(i + 1) *
164 |                                   5000], measure=opt.measure,
165 |                          return_ranks=True)
166 |             print("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % r)
167 |             ri, rti0 = t2i(img_embs[i * 5000:(i + 1) * 5000],
168 |                            cap_embs[i * 5000:(i + 1) *
169 |                                     5000], measure=opt.measure,
170 |                            return_ranks=True)
171 |             if i == 0:
172 |                 rt, rti = rt0, rti0
173 |             print("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ri)
174 |             ar = (r[0] + r[1] + r[2]) / 3
175 |             ari = (ri[0] + ri[1] + ri[2]) / 3
176 |             rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2]
177 |             print("rsum: %.1f ar: %.1f ari: %.1f" % (rsum, ar, ari))
178 |             results += [list(r) + list(ri) + [ar, ari, rsum]]
179 | 
180 |         print("-----------------------------------")
181 |         print("Mean metrics: ")
182 |         mean_metrics = tuple(np.array(results).mean(axis=0).flatten())
183 |         print("rsum: %.1f" % (mean_metrics[10] * 6))
184 |         print("Average i2t Recall: %.1f" % mean_metrics[11])
185 |         print("Image to text: %.1f %.1f %.1f %.1f %.1f" %
186 |               mean_metrics[:5])
187 |         print("Average t2i Recall: %.1f" % mean_metrics[12])
188 |         print("Text to image: %.1f %.1f %.1f %.1f %.1f" %
189 |               mean_metrics[5:10])
190 | 
191 |     torch.save({'rt': rt, 'rti': rti}, 'ranks.pth.tar')
192 | 
193 | 
194 | def i2t(images, captions, npts=None, measure='cosine', return_ranks=False):
195 |     """
196 |     Images->Text (Image Annotation)
197 |     Images: (5N, K) matrix of images
198 |     Captions: (5N, K) matrix of captions
199 |     """
200 |     if npts is None:
201 |         npts = images.shape[0] / 5
202 |     index_list = []
203 | 
204 |     ranks = numpy.zeros(npts)
205 |     top1 = numpy.zeros(npts)
206 |     for index in range(npts):
207 | 
208 |         # Get query image
209 |         im = images[5 * index].reshape(1, images.shape[1])
210 | 
211 |         # Compute scores
212 |         if measure == 'order':
213 |             bs = 100
214 |             if index % bs == 0:
215 |                 mx = min(images.shape[0], 5 * (index + bs))
216 |                 im2 = images[5 * index:mx:5]
217 |                 d2 = order_sim(torch.Tensor(im2).cuda(),
218 |                                torch.Tensor(captions).cuda())
219 |                 d2 = d2.cpu().numpy()
220 |             d = d2[index % bs]
221 |         else:
222 |             d = numpy.dot(im, captions.T).flatten()
223 |         inds = numpy.argsort(d)[::-1]
224 |         index_list.append(inds[0])
225 | 
226 |         # Score
227 |         rank = 1e20
228 |         for i in range(5 * index, 5 * index + 5, 1):
229 |             tmp = numpy.where(inds == i)[0][0]
230 |             if tmp < rank:
231 |                 rank = tmp
232 |         ranks[index] = rank
233 |         top1[index] = inds[0]
234 | 
235 |     # Compute metrics
236 |     r1 = 100.0 * len(numpy.where(ranks < 1)[0]) / len(ranks)
237 |     r5 = 100.0 * len(numpy.where(ranks < 5)[0]) / len(ranks)
238 |     r10 = 100.0 * len(numpy.where(ranks < 10)[0]) / len(ranks)
239 |     medr = numpy.floor(numpy.median(ranks)) + 1
240 |     meanr = ranks.mean() + 1
241 |     if return_ranks:
242 |         return (r1, r5, r10, medr, meanr), (ranks, top1)
243 |     else:
244 |         return (r1, r5, r10, medr, meanr)
245 | 
246 | 
247 | def t2i(images, captions, npts=None, measure='cosine', return_ranks=False):
248 |     """
249 |     Text->Images (Image Search)
250 |     Images: (5N, K) matrix of images
251 |     Captions: (5N, K) matrix of captions
252 |     """
253 |     if npts is None:
254 |         npts = images.shape[0] / 5
255 |     ims = numpy.array([images[i] for i in range(0, len(images), 5)])
256 | 
257 |     ranks = numpy.zeros(5 * npts)
258 |     top1 = numpy.zeros(5 * npts)
259 |     for index in range(npts):
260 | 
261 |         # Get query captions
262 |         queries = captions[5 * index:5 * index + 5]
263 | 
264 |         # Compute scores
265 |         if measure == 'order':
266 |             bs = 100
267 |             if 5 * index % bs == 0:
268 |                 mx = min(captions.shape[0], 5 * index + bs)
269 |                 q2 = captions[5 * index:mx]
270 |                 d2 = order_sim(torch.Tensor(ims).cuda(),
271 |                                torch.Tensor(q2).cuda())
272 |                 d2 = d2.cpu().numpy()
273 | 
274 |             d = d2[:, (5 * index) % bs:(5 * index) % bs + 5].T
275 |         else:
276 |             d = numpy.dot(queries, ims.T)
277 |         inds = numpy.zeros(d.shape)
278 |         for i in range(len(inds)):
279 |             inds[i] = numpy.argsort(d[i])[::-1]
280 |             ranks[5 * index + i] = numpy.where(inds[i] == index)[0][0]
281 |             top1[5 * index + i] = inds[i][0]
282 | 
283 |     # Compute metrics
284 |     r1 = 100.0 * len(numpy.where(ranks < 1)[0]) / len(ranks)
285 |     r5 = 100.0 * len(numpy.where(ranks < 5)[0]) / len(ranks)
286 |     r10 = 100.0 * len(numpy.where(ranks < 10)[0]) / len(ranks)
287 |     medr = numpy.floor(numpy.median(ranks)) + 1
288 |     meanr = ranks.mean() + 1
289 |     if return_ranks:
290 |         return (r1, r5, r10, medr, meanr), (ranks, top1)
291 |     else:
292 |         return (r1, r5, r10, medr, meanr)
293 | 


--------------------------------------------------------------------------------
/fig/Q_i2t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/fig/Q_i2t.png


--------------------------------------------------------------------------------
/fig/Q_t2i_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/fig/Q_t2i_2.png


--------------------------------------------------------------------------------
/fig/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/fig/model.png


--------------------------------------------------------------------------------
/fig/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/fig/teaser.png


--------------------------------------------------------------------------------
/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/misc/cocoeval.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Wrapper for evaluation on CIDEr, ROUGE_L, METEOR and Bleu_N
  3 | using coco-caption repo https://github.com/tylin/coco-caption
  4 | 
  5 | class COCOScorer is taken from https://github.com/yaoli/arctic-capgen-vid
  6 | '''
  7 | 
  8 | import json
  9 | import os
 10 | import sys
 11 | sys.path.append('coco-caption')
 12 | 
 13 | from pycocoevalcap.bleu.bleu import Bleu
 14 | from pycocoevalcap.rouge.rouge import Rouge
 15 | from pycocoevalcap.cider.cider import Cider
 16 | from pycocoevalcap.meteor.meteor import Meteor
 17 | from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
 18 | # Define a context manager to suppress stdout and stderr.
 19 | 
 20 | 
 21 | class suppress_stdout_stderr:
 22 |     '''
 23 |     A context manager for doing a "deep suppression" of stdout and stderr in
 24 |     Python, i.e. will suppress all print, even if the print originates in a
 25 |     compiled C/Fortran sub-function.
 26 |        This will not suppress raised exceptions, since exceptions are printed
 27 |     to stderr just before a script exits, and after the context manager has
 28 |     exited (at least, I think that is why it lets exceptions through).
 29 | 
 30 |     '''
 31 | 
 32 |     def __init__(self):
 33 |         # Open a pair of null files
 34 |         self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
 35 |         # Save the actual stdout (1) and stderr (2) file descriptors.
 36 |         self.save_fds = (os.dup(1), os.dup(2))
 37 | 
 38 |     def __enter__(self):
 39 |         # Assign the null pointers to stdout and stderr.
 40 |         os.dup2(self.null_fds[0], 1)
 41 |         os.dup2(self.null_fds[1], 2)
 42 | 
 43 |     def __exit__(self, *_):
 44 |         # Re-assign the real stdout/stderr back to (1) and (2)
 45 |         os.dup2(self.save_fds[0], 1)
 46 |         os.dup2(self.save_fds[1], 2)
 47 |         # Close the null files
 48 |         os.close(self.null_fds[0])
 49 |         os.close(self.null_fds[1])
 50 | 
 51 | 
 52 | class COCOScorer(object):
 53 |     def __init__(self):
 54 |         print('init COCO-EVAL scorer')
 55 | 
 56 |     def score(self, GT, RES, IDs):
 57 |         self.eval = {}
 58 |         self.imgToEval = {}
 59 |         gts = {}
 60 |         res = {}
 61 |         for ID in IDs:
 62 |             #            print ID
 63 |             gts[ID] = GT[ID]
 64 |             res[ID] = RES[ID]
 65 |         print('tokenization...')
 66 |         tokenizer = PTBTokenizer()
 67 |         gts = tokenizer.tokenize(gts)
 68 |         res = tokenizer.tokenize(res)
 69 | 
 70 |         # =================================================
 71 |         # Set up scorers
 72 |         # =================================================
 73 |         print('setting up scorers...')
 74 |         scorers = [
 75 |             (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
 76 |             (Meteor(),"METEOR"),
 77 |             (Rouge(), "ROUGE_L"),
 78 |             (Cider(), "CIDEr"),
 79 |             #(Spice(), "SPICE")
 80 |         ]
 81 | 
 82 |         # =================================================
 83 |         # Compute scores
 84 |         # =================================================
 85 |         eval = {}
 86 |         for scorer, method in scorers:
 87 |             print('computing %s score...' % (scorer.method()))
 88 |             score, scores = scorer.compute_score(gts, res)
 89 |             if type(method) == list:
 90 |                 for sc, scs, m in zip(score, scores, method):
 91 |                     self.setEval(sc, m)
 92 |                     self.setImgToEvalImgs(scs, IDs, m)
 93 |                     print("%s: %0.3f" % (m, sc))
 94 |             else:
 95 |                 self.setEval(score, method)
 96 |                 self.setImgToEvalImgs(scores, IDs, method)
 97 |                 print("%s: %0.3f" % (method, score))
 98 | 
 99 |         # for metric, score in self.eval.items():
100 |         #    print '%s: %.3f'%(metric, score)
101 |         return self.eval
102 | 
103 |     def setEval(self, score, method):
104 |         self.eval[method] = score
105 | 
106 |     def setImgToEvalImgs(self, scores, imgIds, method):
107 |         for imgId, score in zip(imgIds, scores):
108 |             if imgId not in self.imgToEval:
109 |                 self.imgToEval[imgId] = {}
110 |                 self.imgToEval[imgId]["image_id"] = imgId
111 |             self.imgToEval[imgId][method] = score
112 | 
113 | 
114 | def score(ref, sample):
115 |     # ref and sample are both dict
116 |     scorers = [
117 |         (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
118 |         (Rouge(), "ROUGE_L"),
119 |         (Cider(), "CIDEr")
120 |     ]
121 |     final_scores = {}
122 |     for scorer, method in scorers:
123 |         print('computing %s score with COCO-EVAL...' % (scorer.method()))
124 |         score, scores = scorer.compute_score(ref, sample)
125 |         if type(score) == list:
126 |             for m, s in zip(method, score):
127 |                 final_scores[m] = s
128 |         else:
129 |             final_scores[method] = score
130 |     return final_scores
131 | 
132 | 
133 | def test_cocoscorer():
134 |     '''gts = {
135 |         184321:[
136 |         {u'image_id': 184321, u'id': 352188, u'caption': u'A train traveling down-tracks next to lights.'},
137 |         {u'image_id': 184321, u'id': 356043, u'caption': u"A blue and silver train next to train's station and trees."},
138 |         {u'image_id': 184321, u'id': 356382, u'caption': u'A blue train is next to a sidewalk on the rails.'},
139 |         {u'image_id': 184321, u'id': 361110, u'caption': u'A passenger train pulls into a train station.'},
140 |         {u'image_id': 184321, u'id': 362544, u'caption': u'A train coming down the tracks arriving at a station.'}],
141 |         81922: [
142 |         {u'image_id': 81922, u'id': 86779, u'caption': u'A large jetliner flying over a traffic filled street.'},
143 |         {u'image_id': 81922, u'id': 90172, u'caption': u'An airplane flies low in the sky over a city street. '},
144 |         {u'image_id': 81922, u'id': 91615, u'caption': u'An airplane flies over a street with many cars.'},
145 |         {u'image_id': 81922, u'id': 92689, u'caption': u'An airplane comes in to land over a road full of cars'},
146 |         {u'image_id': 81922, u'id': 823814, u'caption': u'The plane is flying over top of the cars'}]
147 |         }
148 | 
149 |     samples = {
150 |         184321: [{u'image_id': 184321, 'id': 111, u'caption': u'train traveling down a track in front of a road'}],
151 |         81922: [{u'image_id': 81922, 'id': 219, u'caption': u'plane is flying through the sky'}],
152 |         }
153 |     '''
154 |     gts = {
155 |         '184321': [
156 |             {u'image_id': '184321', u'cap_id': 0, u'caption': u'A train traveling down tracks next to lights.',
157 |              'tokenized': 'a train traveling down tracks next to lights'},
158 |             {u'image_id': '184321', u'cap_id': 1, u'caption': u'A train coming down the tracks arriving at a station.',
159 |              'tokenized': 'a train coming down the tracks arriving at a station'}],
160 |         '81922': [
161 |             {u'image_id': '81922', u'cap_id': 0, u'caption': u'A large jetliner flying over a traffic filled street.',
162 |              'tokenized': 'a large jetliner flying over a traffic filled street'},
163 |             {u'image_id': '81922', u'cap_id': 1, u'caption': u'The plane is flying over top of the cars',
164 |              'tokenized': 'the plan is flying over top of the cars'}, ]
165 |     }
166 | 
167 |     samples = {
168 |         '184321': [{u'image_id': '184321', u'caption': u'train traveling down a track in front of a road'}],
169 |         '81922': [{u'image_id': '81922', u'caption': u'plane is flying through the sky'}],
170 |     }
171 |     IDs = ['184321', '81922']
172 |     scorer = COCOScorer()
173 |     scorer.score(gts, samples, IDs)
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     test_cocoscorer()
178 | 


--------------------------------------------------------------------------------
/misc/rewards.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | import torch
 4 | import sys
 5 | sys.path.append("coco-caption")
 6 | from pyciderevalcap.ciderD.ciderD import CiderD
 7 | 
 8 | CiderD_scorer = None
 9 | # CiderD_scorer = CiderD(df='corpus')
10 | 
11 | 
12 | def init_cider_scorer(cached_tokens):
13 |     global CiderD_scorer
14 |     CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
15 | 
16 | 
17 | def array_to_str(arr):
18 |     out = ''
19 |     for i in range(len(arr)):
20 |         out += str(arr[i]) + ' '
21 |         if arr[i] == 0:
22 |             break
23 |     return out.strip()
24 | 
25 | 
26 | def get_self_critical_reward(model, fc_feats, data, gen_result):
27 |     batch_size = gen_result.size(0)
28 | 
29 |     # get greedy decoding baseline
30 |     _, greedy_res = model(fc_feats, mode='inference')
31 | 
32 |     res = OrderedDict()
33 | 
34 |     gen_result = gen_result.cpu().data.numpy()
35 |     greedy_res = greedy_res.cpu().data.numpy()
36 |     for i in range(batch_size):
37 |         res[i] = [array_to_str(gen_result[i])]
38 |     for i in range(batch_size):
39 |         res[batch_size + i] = [array_to_str(greedy_res[i])]
40 | 
41 |     gts = OrderedDict()
42 |     for i in range(data['gts'].size(0)):
43 |         gts[i] = [array_to_str(data['gts'][i][j])
44 |                   for j in range(data['gts'].size(1))]
45 | 
46 |     res = [{'image_id': i, 'caption': res[i]} for i in range(2 * batch_size)]
47 |     gts = {i: gts[i % batch_size] for i in range(2 * batch_size)}
48 |     _, scores = CiderD_scorer.compute_score(gts, res)
49 |     print('Cider scores:', _)
50 | 
51 |     scores = scores[:batch_size] - scores[batch_size:]
52 | 
53 |     rewards = np.repeat(scores[:, np.newaxis], gen_result.shape[1], 1)
54 | 
55 |     return rewards
56 | 


--------------------------------------------------------------------------------
/misc/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | # Input: seq, N*D numpy array, with element 0 .. vocab_size. 0 is END token.
 7 | def decode_sequence(ix_to_word, seq):
 8 |     seq = seq.cpu()
 9 |     N, D = seq.size()
10 |     out = []
11 |     for i in range(N):
12 |         txt = ''
13 |         for j in range(D):
14 |             ix = seq[i, j].item()
15 |             if ix > 0:
16 |                 if j >= 1:
17 |                     txt = txt + ' '
18 |                 txt = txt + ix_to_word[str(ix)]
19 |             else:
20 |                 break
21 |         out.append(txt)
22 |     return out
23 | 
24 | 
25 | class RewardCriterion(nn.Module):
26 | 
27 |     def __init__(self):
28 |         super(RewardCriterion, self).__init__()
29 | 
30 |     def forward(self, input, seq, reward):
31 |         input = input.contiguous().view(-1)
32 |         reward = reward.contiguous().view(-1)
33 |         mask = (seq > 0).float()
34 |         mask = torch.cat([mask.new(mask.size(0), 1).fill_(1).cuda(),
35 |                          mask[:, :-1]], 1).contiguous().view(-1)
36 |         output = - input * reward * mask
37 |         output = torch.sum(output) / torch.sum(mask)
38 | 
39 |         return output
40 | 
41 | 
42 | class LanguageModelCriterion(nn.Module):
43 | 
44 |     def __init__(self):
45 |         super(LanguageModelCriterion, self).__init__()
46 |         self.loss_fn = nn.NLLLoss(reduce=False)
47 | 
48 |     def forward(self, logits, target, mask):
49 |         """
50 |         logits: shape of (N, seq_len, vocab_size)
51 |         target: shape of (N, seq_len)
52 |         mask: shape of (N, seq_len)
53 |         """
54 |         # truncate to the same size
55 |         batch_size = logits.shape[0]
56 |         target = target[:, :logits.shape[1]]
57 |         mask = mask[:, :logits.shape[1]]
58 |         logits = logits.contiguous().view(-1, logits.shape[2])
59 |         target = target.contiguous().view(-1)
60 |         mask = mask.contiguous().view(-1)
61 |         loss = self.loss_fn(logits, target)
62 |         output = torch.sum(loss * mask) / batch_size
63 |         return output


--------------------------------------------------------------------------------
/models/Attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Attention(nn.Module):
 7 |     """
 8 |     Applies an attention mechanism on the output features from the decoder.
 9 |     """
10 | 
11 |     def __init__(self, dim):
12 |         super(Attention, self).__init__()
13 |         self.dim = dim
14 |         self.linear1 = nn.Linear(dim * 2, dim)
15 |         self.linear2 = nn.Linear(dim, 1, bias=False)
16 |         #self._init_hidden()
17 | 
18 |     def _init_hidden(self):
19 |         nn.init.xavier_normal_(self.linear1.weight)
20 |         nn.init.xavier_normal_(self.linear2.weight)
21 | 
22 |     def forward(self, hidden_state, encoder_outputs):
23 |         """
24 |         Arguments:
25 |             hidden_state {Variable} -- batch_size x dim
26 |             encoder_outputs {Variable} -- batch_size x seq_len x dim
27 | 
28 |         Returns:
29 |             Variable -- context vector of size batch_size x dim
30 |         """
31 |         batch_size, seq_len, _ = encoder_outputs.size()
32 |         hidden_state = hidden_state.unsqueeze(1).repeat(1, seq_len, 1)
33 |         inputs = torch.cat((encoder_outputs, hidden_state),
34 |                            2).view(-1, self.dim * 2)
35 |         o = self.linear2(F.tanh(self.linear1(inputs)))
36 |         e = o.view(batch_size, seq_len)
37 |         alpha = F.softmax(e, dim=1)
38 |         context = torch.bmm(alpha.unsqueeze(1), encoder_outputs).squeeze(1)
39 |         return context
40 | 


--------------------------------------------------------------------------------
/models/DecoderRNN.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from .Attention import Attention
  8 | 
  9 | 
 10 | class DecoderRNN(nn.Module):
 11 |     """
 12 |     Provides functionality for decoding in a seq2seq framework, with an option for attention.
 13 |     Args:
 14 |         vocab_size (int): size of the vocabulary
 15 |         max_len (int): a maximum allowed length for the sequence to be processed
 16 |         dim_hidden (int): the number of features in the hidden state `h`
 17 |         n_layers (int, optional): number of recurrent layers (default: 1)
 18 |         rnn_cell (str, optional): type of RNN cell (default: gru)
 19 |         bidirectional (bool, optional): if the encoder is bidirectional (default False)
 20 |         input_dropout_p (float, optional): dropout probability for the input sequence (default: 0)
 21 |         rnn_dropout_p (float, optional): dropout probability for the output sequence (default: 0)
 22 | 
 23 |     """
 24 | 
 25 |     def __init__(self,
 26 |                  vocab_size,
 27 |                  max_len,
 28 |                  dim_hidden,
 29 |                  dim_word,
 30 |                  n_layers=1,
 31 |                  rnn_cell='gru',
 32 |                  bidirectional=False,
 33 |                  input_dropout_p=0.1,
 34 |                  rnn_dropout_p=0.1):
 35 |         super(DecoderRNN, self).__init__()
 36 | 
 37 |         self.bidirectional_encoder = bidirectional
 38 | 
 39 |         self.dim_output = vocab_size
 40 |         self.dim_hidden = dim_hidden * 2 if bidirectional else dim_hidden
 41 |         self.dim_word = dim_word
 42 |         self.max_length = max_len
 43 |         self.sos_id = 1
 44 |         self.eos_id = 0
 45 |         self.input_dropout = nn.Dropout(input_dropout_p)
 46 |         self.embedding = nn.Embedding(self.dim_output, dim_word)
 47 |         self.attention = Attention(self.dim_hidden)
 48 |         if rnn_cell.lower() == 'lstm':
 49 |             self.rnn_cell = nn.LSTM
 50 |         elif rnn_cell.lower() == 'gru':
 51 |             self.rnn_cell = nn.GRU
 52 |         self.rnn = self.rnn_cell(
 53 |             self.dim_hidden + dim_word,
 54 |             self.dim_hidden,
 55 |             n_layers,
 56 |             batch_first=True,
 57 |             dropout=rnn_dropout_p)
 58 | 
 59 |         self.out = nn.Linear(self.dim_hidden, self.dim_output)
 60 | 
 61 |         self._init_weights()
 62 | 
 63 |     def forward(self,
 64 |                 encoder_outputs,
 65 |                 encoder_hidden,
 66 |                 targets=None,
 67 |                 mode='train',
 68 |                 opt={}):
 69 |         """
 70 | 
 71 |         Inputs: inputs, encoder_hidden, encoder_outputs, function, teacher_forcing_ratio
 72 |         - **encoder_hidden** (num_layers * num_directions, batch_size, dim_hidden): tensor containing the features in the
 73 |           hidden state `h` of encoder. Used as the initial hidden state of the decoder. (default `None`)
 74 |         - **encoder_outputs** (batch, seq_len, dim_hidden * num_directions): (default is `None`).
 75 |         - **targets** (batch, max_length): targets labels of the ground truth sentences
 76 | 
 77 |         Outputs: seq_probs,
 78 |         - **seq_logprobs** (batch_size, max_length, vocab_size): tensors containing the outputs of the decoding function.
 79 |         - **seq_preds** (batch_size, max_length): predicted symbols
 80 |         """
 81 |         sample_max = opt.get('sample_max', 1)
 82 |         beam_size = opt.get('beam_size', 1)
 83 |         temperature = opt.get('temperature', 1.0)
 84 | 
 85 |         batch_size, _, _ = encoder_outputs.size()
 86 |         decoder_hidden = self._init_rnn_state(encoder_hidden)
 87 | 
 88 |         seq_logprobs = []
 89 |         seq_preds = []
 90 |         self.rnn.flatten_parameters()
 91 |         if mode == 'train':
 92 |             # use targets as rnn inputs
 93 |             # print(targets)
 94 |             targets_emb = self.embedding(targets)
 95 |             for i in range(self.max_length - 1):
 96 |                 current_words = targets_emb[:, i, :]
 97 |                 context = self.attention(decoder_hidden.squeeze(0), encoder_outputs)
 98 |                 decoder_input = torch.cat([current_words, context], dim=1)
 99 |                 decoder_input = self.input_dropout(decoder_input).unsqueeze(1)
100 |                 decoder_output, decoder_hidden = self.rnn(
101 |                     decoder_input, decoder_hidden)
102 |                 logprobs = F.log_softmax(
103 |                     self.out(decoder_output.squeeze(1)), dim=1)
104 |                 seq_logprobs.append(logprobs.unsqueeze(1))
105 | 
106 |             seq_logprobs = torch.cat(seq_logprobs, 1)
107 | 
108 |         elif mode == 'inference':
109 |             if beam_size > 1:
110 |                 return self.sample_beam(encoder_outputs, decoder_hidden, opt)
111 | 
112 |             for t in range(self.max_length - 1):
113 |                 context = self.attention(
114 |                     decoder_hidden.squeeze(0), encoder_outputs)
115 | 
116 |                 if t == 0:  # input <bos>
117 |                     it = torch.LongTensor([self.sos_id] * batch_size).cuda()
118 |                 elif sample_max:
119 |                     sampleLogprobs, it = torch.max(logprobs, 1)
120 |                     seq_logprobs.append(sampleLogprobs.view(-1, 1))
121 |                     it = it.view(-1).long()
122 | 
123 |                 else:
124 |                     # sample according to distribuition
125 |                     if temperature == 1.0:
126 |                         prob_prev = torch.exp(logprobs)
127 |                     else:
128 |                         # scale logprobs by temperature
129 |                         prob_prev = torch.exp(torch.div(logprobs, temperature))
130 |                     it = torch.multinomial(prob_prev, 1).cuda()
131 |                     sampleLogprobs = logprobs.gather(1, it)
132 |                     seq_logprobs.append(sampleLogprobs.view(-1, 1))
133 |                     it = it.view(-1).long()
134 | 
135 |                 seq_preds.append(it.view(-1, 1))
136 | 
137 |                 xt = self.embedding(it)
138 |                 decoder_input = torch.cat([xt, context], dim=1)
139 |                 decoder_input = self.input_dropout(decoder_input).unsqueeze(1)
140 |                 decoder_output, decoder_hidden = self.rnn(
141 |                     decoder_input, decoder_hidden)
142 |                 logprobs = F.log_softmax(
143 |                     self.out(decoder_output.squeeze(1)), dim=1)
144 | 
145 |             seq_logprobs = torch.cat(seq_logprobs, 1)
146 |             seq_preds = torch.cat(seq_preds[1:], 1)
147 | 
148 |         return seq_logprobs, seq_preds
149 | 
150 |     def _init_weights(self):
151 |         """ init the weight of some layers
152 |         """
153 |         nn.init.xavier_normal_(self.out.weight)
154 | 
155 |     def _init_rnn_state(self, encoder_hidden):
156 |         """ Initialize the encoder hidden state. """
157 |         if encoder_hidden is None:
158 |             return None
159 |         if isinstance(encoder_hidden, tuple):
160 |             encoder_hidden = tuple(
161 |                 [self._cat_directions(h) for h in encoder_hidden])
162 |         else:
163 |             encoder_hidden = self._cat_directions(encoder_hidden)
164 |         return encoder_hidden
165 | 
166 |     def _cat_directions(self, h):
167 |         """ If the encoder is bidirectional, do the following transformation.
168 |             (#directions * #layers, #batch, dim_hidden) -> (#layers, #batch, #directions * dim_hidden)
169 |         """
170 |         if self.bidirectional_encoder:
171 |             h = torch.cat([h[0:h.size(0):2], h[1:h.size(0):2]], 2)
172 |         return h
173 | 


--------------------------------------------------------------------------------
/models/EncoderRNN.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class EncoderRNN(nn.Module):
 5 |     def __init__(self, dim_vid, dim_hidden, input_dropout_p=0.2, rnn_dropout_p=0.5,
 6 |                  n_layers=1, bidirectional=False, rnn_cell='gru'):
 7 |         """
 8 | 
 9 |         Args:
10 |             hidden_dim (int): dim of hidden state of rnn
11 |             input_dropout_p (int): dropout probability for the input sequence
12 |             dropout_p (float): dropout probability for the output sequence
13 |             n_layers (int): number of rnn layers
14 |             rnn_cell (str): type of RNN cell ('LSTM'/'GRU')
15 |         """
16 |         super(EncoderRNN, self).__init__()
17 |         self.dim_vid = dim_vid
18 |         self.dim_hidden = dim_hidden
19 |         self.input_dropout_p = input_dropout_p
20 |         self.rnn_dropout_p = rnn_dropout_p
21 |         self.n_layers = n_layers
22 |         self.bidirectional = bidirectional
23 |         self.rnn_cell = rnn_cell
24 | 
25 |         self.vid2hid = nn.Linear(dim_vid, dim_hidden)
26 |         self.input_dropout = nn.Dropout(input_dropout_p)
27 | 
28 |         if rnn_cell.lower() == 'lstm':
29 |             self.rnn_cell = nn.LSTM
30 |         elif rnn_cell.lower() == 'gru':
31 |             self.rnn_cell = nn.GRU
32 | 
33 |         self.rnn = self.rnn_cell(dim_hidden, dim_hidden, n_layers, batch_first=True,
34 |                                 bidirectional=bidirectional, dropout=self.rnn_dropout_p)
35 | 
36 |         self._init_hidden()
37 | 
38 |     def _init_hidden(self):
39 |         nn.init.xavier_normal_(self.vid2hid.weight)
40 | 
41 |     def forward(self, vid_feats):
42 |         """
43 |         Applies a multi-layer RNN to an input sequence.
44 |         Args:
45 |             input_var (batch, seq_len): tensor containing the features of the input sequence.
46 |             input_lengths (list of int, optional): A list that contains the lengths of sequences
47 |               in the mini-batch
48 |         Returns: output, hidden
49 |             - **output** (batch, seq_len, hidden_size): variable containing the encoded features of the input sequence
50 |             - **hidden** (num_layers * num_directions, batch, hidden_size): variable containing the features in the hidden state h
51 |         """
52 |         batch_size, seq_len, dim_vid = vid_feats.size()
53 |         vid_feats = self.vid2hid(vid_feats.view(-1, dim_vid))
54 |         vid_feats = self.input_dropout(vid_feats)
55 |         vid_feats = vid_feats.view(batch_size, seq_len, self.dim_hidden)
56 |         self.rnn.flatten_parameters()
57 |         output, hidden = self.rnn(vid_feats)
58 |         return output, hidden
59 | 


--------------------------------------------------------------------------------
/models/S2VTAttModel.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class S2VTAttModel(nn.Module):
 5 |     def __init__(self, encoder, decoder):
 6 |         """
 7 | 
 8 |         Args:
 9 |             encoder (nn.Module): Encoder rnn
10 |             decoder (nn.Module): Decoder rnn
11 |         """
12 |         super(S2VTAttModel, self).__init__()
13 |         self.encoder = encoder
14 |         self.decoder = decoder
15 | 
16 |     def forward(self, vid_feats, target_variable=None,
17 |                 mode='train', opt={}):
18 |         """
19 | 
20 |         Args:
21 |             vid_feats (Variable): video feats of shape [batch_size, seq_len, dim_vid]
22 |             target_variable (None, optional): groung truth labels
23 | 
24 |         Returns:
25 |             seq_prob: Variable of shape [batch_size, max_len-1, vocab_size]
26 |             seq_preds: [] or Variable of shape [batch_size, max_len-1]
27 |         """
28 |         encoder_outputs, encoder_hidden = self.encoder(vid_feats)
29 |         seq_prob, seq_preds = self.decoder(encoder_outputs, encoder_hidden, target_variable, mode, opt)
30 |         return seq_prob, seq_preds
31 | 


--------------------------------------------------------------------------------
/models/S2VTModel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import random
 5 | from torch.autograd import Variable
 6 | 
 7 | 
 8 | class S2VTModel(nn.Module):
 9 |     def __init__(self, vocab_size, max_len, dim_hidden, dim_word, dim_vid=2048, sos_id=1, eos_id=0,
10 |                  n_layers=1, rnn_cell='gru', rnn_dropout_p=0.2):
11 |         super(S2VTModel, self).__init__()
12 |         if rnn_cell.lower() == 'lstm':
13 |             self.rnn_cell = nn.LSTM
14 |         elif rnn_cell.lower() == 'gru':
15 |             self.rnn_cell = nn.GRU
16 |         self.rnn1 = self.rnn_cell(dim_vid, dim_hidden, n_layers,
17 |                                   batch_first=True, dropout=rnn_dropout_p)
18 |         self.rnn2 = self.rnn_cell(dim_hidden + dim_word, dim_hidden, n_layers,
19 |                                   batch_first=True, dropout=rnn_dropout_p)
20 | 
21 |         self.dim_vid = dim_vid
22 |         self.dim_output = vocab_size
23 |         self.dim_hidden = dim_hidden
24 |         self.dim_word = dim_word
25 |         self.max_length = max_len
26 |         self.sos_id = sos_id
27 |         self.eos_id = eos_id
28 |         self.embedding = nn.Embedding(self.dim_output, self.dim_word)
29 | 
30 |         self.out = nn.Linear(self.dim_hidden, self.dim_output)
31 | 
32 |     def forward(self, vid_feats, target_variable=None,
33 |                 mode='train', opt={}):
34 |         batch_size, n_frames, _ = vid_feats.shape
35 |         padding_words = Variable(vid_feats.data.new(batch_size, n_frames, self.dim_word)).zero_()
36 |         padding_frames = Variable(vid_feats.data.new(batch_size, 1, self.dim_vid)).zero_()
37 |         state1 = None
38 |         state2 = None
39 |         #self.rnn1.flatten_parameters()
40 |         #self.rnn2.flatten_parameters()
41 |         output1, state1 = self.rnn1(vid_feats, state1)
42 |         input2 = torch.cat((output1, padding_words), dim=2)
43 |         output2, state2 = self.rnn2(input2, state2)
44 | 
45 |         seq_probs = []
46 |         seq_preds = []
47 |         if mode == 'train':
48 |             for i in range(self.max_length - 1):
49 |                 # <eos> doesn't input to the network
50 |                 current_words = self.embedding(target_variable[:, i])
51 |                 self.rnn1.flatten_parameters()
52 |                 self.rnn2.flatten_parameters()
53 |                 output1, state1 = self.rnn1(padding_frames, state1)
54 |                 input2 = torch.cat(
55 |                     (output1, current_words.unsqueeze(1)), dim=2)
56 |                 output2, state2 = self.rnn2(input2, state2)
57 |                 logits = self.out(output2.squeeze(1))
58 |                 logits = F.log_softmax(logits, dim=1)
59 |                 seq_probs.append(logits.unsqueeze(1))
60 |             seq_probs = torch.cat(seq_probs, 1)
61 | 
62 |         else:
63 |             current_words = self.embedding(
64 |                 Variable(torch.LongTensor([self.sos_id] * batch_size)).cuda())
65 |             for i in range(self.max_length - 1):
66 |                 self.rnn1.flatten_parameters()
67 |                 self.rnn2.flatten_parameters()
68 |                 output1, state1 = self.rnn1(padding_frames, state1)
69 |                 input2 = torch.cat(
70 |                     (output1, current_words.unsqueeze(1)), dim=2)
71 |                 output2, state2 = self.rnn2(input2, state2)
72 |                 logits = self.out(output2.squeeze(1))
73 |                 logits = F.log_softmax(logits, dim=1)
74 |                 seq_probs.append(logits.unsqueeze(1))
75 |                 _, preds = torch.max(logits, 1)
76 |                 current_words = self.embedding(preds)
77 |                 seq_preds.append(preds.unsqueeze(1))
78 |             seq_probs = torch.cat(seq_probs, 1)
79 |             seq_preds = torch.cat(seq_preds, 1)
80 |         return seq_probs, seq_preds
81 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .EncoderRNN import EncoderRNN
2 | from .DecoderRNN import DecoderRNN
3 | from .S2VTAttModel import S2VTAttModel
4 | from .S2VTModel import S2VTModel
5 | 


--------------------------------------------------------------------------------
/opts.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | 
  4 | def parse_opt():
  5 |     parser = argparse.ArgumentParser()
  6 |     # Data input settings
  7 |     parser.add_argument(
  8 |         '--input_json',
  9 |         type=str,
 10 |         default='data/videodatainfo_2017.json',
 11 |         help='path to the json file containing video info')
 12 |     parser.add_argument(
 13 |         '--info_json',
 14 |         type=str,
 15 |         default='data/info.json',
 16 |         help='path to the json file containing additional info and vocab')
 17 |     parser.add_argument(
 18 |         '--caption_json',
 19 |         type=str,
 20 |         default='data/caption.json',
 21 |         help='path to the processed video caption json')
 22 | 
 23 |     parser.add_argument(
 24 |         '--feats_dir',
 25 |         nargs='*',
 26 |         type=str,
 27 |         default=['data/feats/resnet152/'],
 28 |         help='path to the directory containing the preprocessed fc feats')
 29 | 
 30 |     parser.add_argument('--c3d_feats_dir', type=str, default='data/c3d_feats')
 31 |     parser.add_argument(
 32 |         '--with_c3d', type=int, default=0, help='whether to use c3d features')
 33 | 
 34 |     parser.add_argument(
 35 |         '--cached_tokens',
 36 |         type=str,
 37 |         default='msr-all-idxs',
 38 |         help='Cached token file for calculating cider score \
 39 |                         during self critical training.')
 40 | 
 41 |     # Model settings
 42 |     parser.add_argument(
 43 |         "--model", type=str, default='S2VTModel', help="with model to use")
 44 | 
 45 |     parser.add_argument(
 46 |         "--max_len",
 47 |         type=int,
 48 |         default=28,
 49 |         help='max length of captions(containing <sos>,<eos>)')
 50 |     parser.add_argument(
 51 |         "--bidirectional",
 52 |         type=int,
 53 |         default=0,
 54 |         help="0 for disable, 1 for enable. encoder/decoder bidirectional.")
 55 | 
 56 |     parser.add_argument(
 57 |         '--dim_hidden',
 58 |         type=int,
 59 |         default=512,
 60 |         help='size of the rnn hidden layer')
 61 |     parser.add_argument(
 62 |         '--num_layers', type=int, default=1, help='number of layers in the RNN')
 63 |     parser.add_argument(
 64 |         '--input_dropout_p',
 65 |         type=float,
 66 |         default=0.2,
 67 |         help='strength of dropout in the Language Model RNN')
 68 |     parser.add_argument(
 69 |         '--rnn_type', type=str, default='gru', help='lstm or gru')
 70 |     parser.add_argument(
 71 |         '--rnn_dropout_p',
 72 |         type=float,
 73 |         default=0.5,
 74 |         help='strength of dropout in the Language Model RNN')
 75 |     parser.add_argument(
 76 |         '--dim_word',
 77 |         type=int,
 78 |         default=512,
 79 |         help='the encoding size of each token in the vocabulary, and the video.'
 80 |     )
 81 | 
 82 |     parser.add_argument(
 83 |         '--dim_vid',
 84 |         type=int,
 85 |         default=2048,
 86 |         help='dim of features of video frames')
 87 | 
 88 |     # Optimization: General
 89 | 
 90 |     parser.add_argument(
 91 |         '--epochs', type=int, default=6001, help='number of epochs')
 92 |     parser.add_argument(
 93 |         '--batch_size', type=int, default=128, help='minibatch size')
 94 |     parser.add_argument(
 95 |         '--grad_clip',
 96 |         type=float,
 97 |         default=5,  # 5.,
 98 |         help='clip gradients at this value')
 99 | 
100 |     parser.add_argument(
101 |         '--self_crit_after',
102 |         type=int,
103 |         default=-1,
104 |         help='After what epoch do we start finetuning the CNN? \
105 |                         (-1 = disable; never finetune, 0 = finetune from start)'
106 |     )
107 | 
108 |     parser.add_argument(
109 |         '--learning_rate', type=float, default=4e-4, help='learning rate')
110 | 
111 |     parser.add_argument(
112 |         '--learning_rate_decay_every',
113 |         type=int,
114 |         default=200,
115 |         help='every how many iterations thereafter to drop LR?(in epoch)')
116 |     parser.add_argument('--learning_rate_decay_rate', type=float, default=0.8)
117 |     parser.add_argument(
118 |         '--optim_alpha', type=float, default=0.9, help='alpha for adam')
119 |     parser.add_argument(
120 |         '--optim_beta', type=float, default=0.999, help='beta used for adam')
121 |     parser.add_argument(
122 |         '--optim_epsilon',
123 |         type=float,
124 |         default=1e-8,
125 |         help='epsilon that goes into denominator for smoothing')
126 |     parser.add_argument(
127 |         '--weight_decay',
128 |         type=float,
129 |         default=5e-4,
130 |         help='weight_decay. strength of weight regularization')
131 | 
132 |     parser.add_argument(
133 |         '--save_checkpoint_every',
134 |         type=int,
135 |         default=50,
136 |         help='how often to save a model checkpoint (in epoch)?')
137 |     parser.add_argument(
138 |         '--checkpoint_path',
139 |         type=str,
140 |         default='save',
141 |         help='directory to store checkpointed models')
142 | 
143 |     parser.add_argument(
144 |         '--gpu', type=str, default='0', help='gpu device number')
145 | 
146 |     args = parser.parse_args()
147 | 
148 |     return args
149 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | Cython                    0.29.2                    <pip>
 2 | dask                      1.1.1                     <pip> 
 3 | decorator                 4.3.2                     <pip>
 4 | enum34                    1.1.6                     <pip>  
 5 | grpcio                    1.18.0                    <pip> 
 6 | h5py                      2.9.0                     <pip>   
 7 | lmdb                      0.94                      <pip>
 8 | matplotlib                2.2.3            py27hb69df0a_0  
 9 | mkl                       2019.0                      118   
10 | networkx                  2.2                       <pip> 
11 | nltk                      3.4                       <pip>
12 | numpy                     1.15.2           py27h1d66e8a_1    
13 | opencv                    3.4.2            py27h6fd60c2_1    
14 | pillow                    5.3.0            py27h34e0f95_0   
15 | protobuf                  3.6.1                     <pip>
16 | py-opencv                 3.4.2            py27hb342d67_1    
17 | python                    2.7.15               h1571d57_0  
18 | pytorch                   0.4.1           py27__9.0.176_7.1.2_2  
19 | scikit-image              0.14.2                    <pip>
20 | scipy                     1.2.0                     <pip>
21 | setuptools                40.4.3                   py27_0   
22 | tensorboard               1.12.2                    <pip>
23 | tensorboard-logger        0.1.0                     <pip>
24 | torchvision               0.2.1                    py27_1  
25 | 


--------------------------------------------------------------------------------
/vocab.py:
--------------------------------------------------------------------------------
  1 | # Create a vocabulary wrapper
  2 | import nltk
  3 | import pickle
  4 | from collections import Counter
  5 | from pycocotools.coco import COCO
  6 | import json
  7 | import argparse
  8 | import os
  9 | 
 10 | annotations = {
 11 |     'coco_precomp': ['train_caps.txt', 'dev_caps.txt'],
 12 |     'coco': ['annotations/captions_train2014.json',
 13 |              'annotations/captions_val2014.json'],
 14 |     'f8k_precomp': ['train_caps.txt', 'dev_caps.txt'],
 15 |     '10crop_precomp': ['train_caps.txt', 'dev_caps.txt'],
 16 |     'f30k_precomp': ['train_caps.txt', 'dev_caps.txt'],
 17 |     'f8k': ['dataset_flickr8k.json'],
 18 |     'f30k': ['dataset_flickr30k.json'],
 19 | }
 20 | 
 21 | 
 22 | class Vocabulary(object):
 23 |     """Simple vocabulary wrapper."""
 24 | 
 25 |     def __init__(self):
 26 |         self.word2idx = {}
 27 |         self.idx2word = {}
 28 |         self.idx = 0
 29 | 
 30 |     def add_word(self, word):
 31 |         if word not in self.word2idx:
 32 |             self.word2idx[word] = self.idx
 33 |             self.idx2word[self.idx] = word
 34 |             self.idx += 1
 35 | 
 36 |     def __call__(self, word):
 37 |         if word not in self.word2idx:
 38 |             return self.word2idx['<unk>']
 39 |         return self.word2idx[word]
 40 | 
 41 |     def __len__(self):
 42 |         return len(self.word2idx)
 43 | 
 44 | 
 45 | def from_coco_json(path):
 46 |     coco = COCO(path)
 47 |     ids = coco.anns.keys()
 48 |     captions = []
 49 |     for i, idx in enumerate(ids):
 50 |         captions.append(str(coco.anns[idx]['caption']))
 51 | 
 52 |     return captions
 53 | 
 54 | 
 55 | def from_flickr_json(path):
 56 |     dataset = json.load(open(path, 'r'))['images']
 57 |     captions = []
 58 |     for i, d in enumerate(dataset):
 59 |         captions += [str(x['raw']) for x in d['sentences']]
 60 | 
 61 |     return captions
 62 | 
 63 | 
 64 | def from_txt(txt):
 65 |     captions = []
 66 |     with open(txt, 'rb') as f:
 67 |         for line in f:
 68 |             captions.append(line.strip())
 69 |     return captions
 70 | 
 71 | 
 72 | def build_vocab(data_path, data_name, jsons, threshold):
 73 |     """Build a simple vocabulary wrapper."""
 74 |     counter = Counter()
 75 |     for path in jsons[data_name]:
 76 |         full_path = os.path.join(os.path.join(data_path, data_name), path)
 77 |         if data_name == 'coco':
 78 |             captions = from_coco_json(full_path)
 79 |         elif data_name == 'f8k' or data_name == 'f30k':
 80 |             captions = from_flickr_json(full_path)
 81 |         else:
 82 |             captions = from_txt(full_path)
 83 |         for i, caption in enumerate(captions):
 84 |             tokens = nltk.tokenize.word_tokenize(
 85 |                 caption.lower().decode('utf-8'))
 86 |             counter.update(tokens)
 87 | 
 88 |             if i % 1000 == 0:
 89 |                 print("[%d/%d] tokenized the captions." % (i, len(captions)))
 90 | 
 91 |     # Discard if the occurrence of the word is less than min_word_cnt.
 92 |     words = [word for word, cnt in counter.items() if cnt >= threshold]
 93 | 
 94 |     # Create a vocab wrapper and add some special tokens.
 95 |     vocab = Vocabulary()
 96 |     vocab.add_word('<pad>')
 97 |     vocab.add_word('<start>')
 98 |     vocab.add_word('<end>')
 99 |     vocab.add_word('<unk>')
100 | 
101 |     # Add words to the vocabulary.
102 |     for i, word in enumerate(words):
103 |         vocab.add_word(word)
104 |     return vocab
105 | 
106 | 
107 | def main(data_path, data_name):
108 |     vocab = build_vocab(data_path, data_name, jsons=annotations, threshold=4)
109 |     with open('./vocab/%s_vocab.pkl' % data_name, 'wb') as f:
110 |         pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL)
111 |     print("Saved vocabulary file to ", './vocab/%s_vocab.pkl' % data_name)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     parser = argparse.ArgumentParser()
116 |     parser.add_argument('--data_path', default='/w/31/faghri/vsepp_data/')
117 |     parser.add_argument('--data_name', default='coco',
118 |                         help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
119 |     opt = parser.parse_args()
120 |     main(opt.data_path, opt.data_name)
121 | 


--------------------------------------------------------------------------------
/vocab/10crop_precomp_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/10crop_precomp_vocab.pkl


--------------------------------------------------------------------------------
/vocab/coco_precomp_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/coco_precomp_vocab.pkl


--------------------------------------------------------------------------------
/vocab/coco_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/coco_vocab.pkl


--------------------------------------------------------------------------------
/vocab/f30k_precomp_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/f30k_precomp_vocab.pkl


--------------------------------------------------------------------------------
/vocab/f30k_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/f30k_vocab.pkl


--------------------------------------------------------------------------------
/vocab/f8k_precomp_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/f8k_precomp_vocab.pkl


--------------------------------------------------------------------------------
/vocab/f8k_vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunpengLi1994/VSRN/777ae74326fdb6abe69dbd3911d0e545322520d1/vocab/f8k_vocab.pkl


--------------------------------------------------------------------------------