├── README.md ├── cocoapi-master ├── LuaAPI │ ├── CocoApi.lua │ ├── MaskApi.lua │ ├── cocoDemo.lua │ ├── env.lua │ ├── init.lua │ └── rocks │ │ └── coco-scm-1.rockspec ├── MatlabAPI │ ├── CocoApi.m │ ├── CocoEval.m │ ├── CocoUtils.m │ ├── MaskApi.m │ ├── cocoDemo.m │ ├── evalDemo.m │ ├── gason.m │ └── private │ │ ├── gasonMex.cpp │ │ ├── gasonMex.mexa64 │ │ ├── gasonMex.mexmaci64 │ │ ├── getPrmDflt.m │ │ └── maskApiMex.c ├── PythonAPI │ ├── Makefile │ ├── build │ │ ├── common │ │ │ └── maskApi.o │ │ ├── lib.linux-x86_64-2.7 │ │ │ └── pycocotools │ │ │ │ ├── __init__.py │ │ │ │ ├── _mask.so │ │ │ │ ├── coco.py │ │ │ │ ├── cocoeval.py │ │ │ │ └── mask.py │ │ └── temp.linux-x86_64-2.7 │ │ │ └── pycocotools │ │ │ └── _mask.o │ ├── dist │ │ └── pycocotools-2.0-py2.7-linux-x86_64.egg │ ├── pycocotools.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ ├── requires.txt │ │ └── top_level.txt │ ├── pycocotools │ │ ├── __init__.py │ │ ├── _mask.c │ │ ├── _mask.pyx │ │ ├── _mask.so │ │ ├── coco.py │ │ ├── cocoeval.py │ │ └── mask.py │ └── setup.py ├── README.txt ├── common │ ├── gason.cpp │ ├── gason.h │ ├── maskApi.c │ └── maskApi.h ├── license.txt └── results │ ├── captions_val2014_fakecap_results.json │ ├── instances_val2014_fakebbox100_results.json │ ├── instances_val2014_fakesegm100_results.json │ ├── person_keypoints_val2014_fakekeypoints100_results.json │ └── val2014_fake_eval_res.txt ├── cocoeval ├── LICENSE ├── pyciderevalcap │ ├── __init__.py │ ├── __init__.pyc │ ├── cider │ │ ├── __init__.py │ │ ├── cider.py │ │ └── cider_scorer.py │ ├── ciderD │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── ciderD.py │ │ ├── ciderD.pyc │ │ ├── ciderD_scorer.py │ │ └── ciderD_scorer.pyc │ ├── eval.py │ └── tokenizer │ │ ├── __init__.py │ │ ├── ptbtokenizer.py │ │ └── stanford-corenlp-3.4.1.jar ├── pycocoevalcap │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── __init__.cpython-36.pyc │ ├── bleu │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── bleu.cpython-35.pyc │ │ │ ├── bleu.cpython-36.pyc │ │ │ ├── bleu_scorer.cpython-35.pyc │ │ │ └── bleu_scorer.cpython-36.pyc │ │ ├── bleu.py │ │ └── bleu_scorer.py │ ├── cider │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── cider.cpython-35.pyc │ │ │ ├── cider.cpython-36.pyc │ │ │ ├── cider_scorer.cpython-35.pyc │ │ │ └── cider_scorer.cpython-36.pyc │ │ └── cider.py │ ├── eval.py │ ├── meteor │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── meteor.cpython-35.pyc │ │ │ └── meteor.cpython-36.pyc │ │ ├── meteor-1.5.jar │ │ └── meteor.py │ ├── rouge │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── rouge.cpython-35.pyc │ │ │ └── rouge.cpython-36.pyc │ │ └── rouge.py │ └── tokenizer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── ptbtokenizer.cpython-35.pyc │ │ └── ptbtokenizer.cpython-36.pyc │ │ ├── ptbtokenizer.py │ │ └── stanford-corenlp-3.4.1.jar └── pycocotools │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ └── mask.py ├── data.py ├── env.yml ├── evaluate.py ├── evaluation.py ├── figures └── framework_CVSE.jpg ├── metrics ├── README.md ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── metric.cpython-37.pyc ├── custom_cider.py ├── eval.py ├── eval.sh ├── metric.py ├── metric.pyc └── preprocess.py ├── model_CVSE.py ├── paper_images ├── ITM_fig1.png └── ITM_fig2.png ├── run_all.py ├── train_coco.py ├── train_f30k.py ├── util ├── C_GCN.py ├── __pycache__ │ ├── C_GCN.cpython-36.pyc │ ├── C_GCN.cpython-37.pyc │ ├── util_C_GCN.cpython-36.pyc │ ├── util_C_GCN.cpython-37.pyc │ ├── utils.cpython-36.pyc │ └── utils.cpython-37.pyc ├── util_C_GCN.py └── utils.py ├── vocab.py └── vocab ├── coco_precomp_vocab.json ├── coco_precomp_vocab.pkl ├── coco_vocab.pkl ├── f30k_precomp_vocab.json ├── f30k_precomp_vocab.pkl ├── f30k_vocab.json ├── f30k_vocab.pkl ├── f8k_precomp_vocab.pkl └── f8k_vocab.pkl /README.md: -------------------------------------------------------------------------------- 1 | # Is An Image Worth Five Sentences? A New Look into Semantics for Image-Text Matching 2 | 3 | Code based on our WACV 2022 Accepted Paper: https://arxiv.org/pdf/2110.02623.pdf 4 | 5 | Project is built on top of the [CVSE] (https://github.com/BruceW91/CVSE) in PyTorch. However, it is easy to adapt to different Image-Text Matching models (SCAN, VSRN, SGRAF). Regarding the proposed metric code and evaluation, please visit: https://github.com/furkanbiten/ncs_metric. 6 | 7 | 8 | ## Introduction 9 | The task of image-text matching aims to map representations from different modalities into a common joint visual-textual embedding. However, the most widely used datasets for this task, MSCOCO and Flickr30K, are actually image captioning datasets that offer a very limited set of relationships between images and sentences in their ground-truth annotations. 10 | This limited ground truth information forces us to use evaluation metrics based on binary relevance: given a sentence query we consider only one image as relevant. However, many other relevant images or captions may be present in the dataset. 11 | In this work, we propose two metrics that evaluate the degree of semantic relevance of retrieved items, independently of their annotated binary relevance. 12 | Additionally, we incorporate a novel strategy that uses an image captioning metric, CIDEr, to define a Semantic Adaptive Margin (SAM) to be optimized in a standard triplet loss. By incorporating our formulation to existing models, a large improvement is obtained in scenarios where available training data is limited. We also demonstrate that the performance on the annotated image-caption pairs is maintained while improving on other non-annotated relevant items when employing the full training set. The code for our new metric can be found at https://github.com/furkanbiten/ncs_metric and model https://github.com/andrespmd/semantic_adaptive_margin 13 | 14 | 15 |

16 | 17 | 18 |

19 | 20 | 21 | ## Install Environment 22 | 23 | Git clone the project. 24 | 25 | Create Conda environment: 26 | 27 | $ conda env create -f env.yml 28 | 29 | Activate the environment: 30 | 31 | $ conda activate pytorch12 32 | 33 | 34 | ## Download Additional Project Data 35 | 36 | Please download the following compressed file from: 37 | 38 | https://drive.google.com/file/d/1DiRZODZQENoxbCpQFsiSEs1-9VatD176/view?usp=sharing 39 | 40 | Uncompress the downloaded files under the main project folder. 41 | 42 | ## Download Features 43 | 44 | Download the dataset files (MS-COCO and Flickr30K). We use regions of visual features, similar as the one used by SCAN, downloaded [here](https://github.com/kuanghuei/SCAN). All the data needed for reproducing the experiments in the paper, including image features and vocabularies, can be downloaded from: 45 | ```bash 46 | wget https://iudata.blob.core.windows.net/scan/data.zip 47 | wget https://iudata.blob.core.windows.net/scan/vocab.zip 48 | ``` 49 | In this implementation, we refer to the path of extracted files for `data.zip` as `$data_path` and files for `vocab.zip` to `./vocab_path` directory. 50 | 51 | ## Training 52 | 53 | * Train MSCOCO models: 54 | Run `train_coco.py` using the SAM, e.a. using random sampling, metric margin division 6, weight on the cider loss 10 and on the 10% of the dataset: 55 | ```bash 56 | python train_coco.py --data_path "$DATA_PATH" --model_name "$PATH_TO_SAVE" --resume None --val_step 1000 --use_metric --cider_weight 10 --metric_samples random --metric_div 6 --data_percentage 10 57 | ``` 58 | 59 | * Train Flickr30K models: 60 | Run `train_f30k.py` using the SAM, e.a. using soft sampling, metric margin division 10, weight on the cider loss 5 and on full dataset: 61 | ```bash 62 | python train_f30k.py --data_path "$DATA_PATH" --model_name "$PATH_TO_SAVE" --resume None --val_step 1000 --use_metric --metric_samples soft --metric_div 10 --cider_weight 5 --data_percentage 100 63 | ``` 64 | 65 | ## Evaluate 66 | Run run_all.py to obtain the evaluation from our proposed metrics and standard Recall. 67 | E.a. Evaluate the model on Flickr30K: 68 | ```bash 69 | python run_all.py --model "$PATH_TO_SAVED_MODEL"model_best.pth.tar --split test --dataset f30k 70 | ``` 71 | E.a. Evaluate the mode with the full 5K testset of MS-COCO: 72 | ```bash 73 | python run_all.py --model "$PATH_TO_SAVED_MODEL"model_best.pth.tar --split testall --dataset coco 74 | ``` 75 | 76 | ## Reference 77 | 78 | If you found this research, metric proposed or code useful, please cite the following paper: 79 | 80 | ``` 81 | @article{biten2021image, 82 | title={Is An Image Worth Five Sentences? A New Look into Semantics for Image-Text Matching}, 83 | author={Biten, Ali Furkan and Mafla, Andres and Gomez, Lluis and Karatzas, Dimosthenis}, 84 | journal={arXiv preprint arXiv:2110.02623}, 85 | year={2021} 86 | } 87 | 88 | ``` 89 | 90 | ## License 91 | 92 | [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0) 93 | -------------------------------------------------------------------------------- /cocoapi-master/LuaAPI/MaskApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for manipulating masks stored in RLE format. 4 | 5 | For an overview of RLE please see http://mscoco.org/dataset/#download. 6 | Additionally, more detailed information can be found in the Matlab MaskApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m 8 | 9 | The following API functions are defined: 10 | encode - Encode binary masks using RLE. 11 | decode - Decode binary masks encoded via RLE. 12 | merge - Compute union or intersection of encoded masks. 13 | iou - Compute intersection over union between masks. 14 | nms - Compute non-maximum suppression between ordered masks. 15 | area - Compute area of encoded masks. 16 | toBbox - Get bounding boxes surrounding encoded masks. 17 | frBbox - Convert bounding boxes to encoded masks. 18 | frPoly - Convert polygon to encoded mask. 19 | drawCirc - Draw circle into image (alters input). 20 | drawLine - Draw line into image (alters input). 21 | drawMasks - Draw masks into image (alters input). 22 | 23 | Usage: 24 | Rs = MaskApi.encode( masks ) 25 | masks = MaskApi.decode( Rs ) 26 | R = MaskApi.merge( Rs, [intersect=false] ) 27 | o = MaskApi.iou( dt, gt, [iscrowd=false] ) 28 | keep = MaskApi.nms( dt, thr ) 29 | a = MaskApi.area( Rs ) 30 | bbs = MaskApi.toBbox( Rs ) 31 | Rs = MaskApi.frBbox( bbs, h, w ) 32 | R = MaskApi.frPoly( poly, h, w ) 33 | MaskApi.drawCirc( img, x, y, rad, clr ) 34 | MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr ) 35 | MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] ) 36 | For detailed usage information please see cocoDemo.lua. 37 | 38 | In the API the following formats are used: 39 | R,Rs - [table] Run-length encoding of binary mask(s) 40 | masks - [nxhxw] Binary mask(s) 41 | bbs - [nx4] Bounding box(es) stored as [x y w h] 42 | poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 43 | dt,gt - May be either bounding boxes or encoded masks 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 45 | 46 | Common Objects in COntext (COCO) Toolbox. version 3.0 47 | Data, paper, and tutorials available at: http://mscoco.org/ 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 49 | Licensed under the Simplified BSD License [see coco/license.txt] 50 | 51 | ------------------------------------------------------------------------------]] 52 | 53 | local ffi = require 'ffi' 54 | local coco = require 'coco.env' 55 | 56 | coco.MaskApi = {} 57 | local MaskApi = coco.MaskApi 58 | 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath)) 60 | local libmaskapi = coco.libmaskapi 61 | 62 | -------------------------------------------------------------------------------- 63 | 64 | MaskApi.encode = function( masks ) 65 | local n, h, w = masks:size(1), masks:size(2), masks:size(3) 66 | masks = masks:type('torch.ByteTensor'):transpose(2,3) 67 | local data = masks:contiguous():data() 68 | local Qs = MaskApi._rlesInit(n) 69 | libmaskapi.rleEncode(Qs[0],data,h,w,n) 70 | return MaskApi._rlesToLua(Qs,n) 71 | end 72 | 73 | MaskApi.decode = function( Rs ) 74 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 75 | local masks = torch.ByteTensor(n,w,h):zero():contiguous() 76 | libmaskapi.rleDecode(Qs,masks:data(),n) 77 | MaskApi._rlesFree(Qs,n) 78 | return masks:transpose(2,3) 79 | end 80 | 81 | MaskApi.merge = function( Rs, intersect ) 82 | intersect = intersect or 0 83 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 84 | local Q = MaskApi._rlesInit(1) 85 | libmaskapi.rleMerge(Qs,Q,n,intersect) 86 | MaskApi._rlesFree(Qs,n) 87 | return MaskApi._rlesToLua(Q,1)[1] 88 | end 89 | 90 | MaskApi.iou = function( dt, gt, iscrowd ) 91 | if not iscrowd then iscrowd = NULL else 92 | iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data() 93 | end 94 | if torch.isTensor(gt) and torch.isTensor(dt) then 95 | local nDt, k = dt:size(1), dt:size(2); assert(k==4) 96 | local nGt, k = gt:size(1), gt:size(2); assert(k==4) 97 | local dDt = dt:type('torch.DoubleTensor'):contiguous():data() 98 | local dGt = gt:type('torch.DoubleTensor'):contiguous():data() 99 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 100 | libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data()) 101 | return o:transpose(1,2) 102 | else 103 | local qDt, nDt = MaskApi._rlesFrLua(dt) 104 | local qGt, nGt = MaskApi._rlesFrLua(gt) 105 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 106 | libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data()) 107 | MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt) 108 | return o:transpose(1,2) 109 | end 110 | end 111 | 112 | MaskApi.nms = function( dt, thr ) 113 | if torch.isTensor(dt) then 114 | local n, k = dt:size(1), dt:size(2); assert(k==4) 115 | local Q = dt:type('torch.DoubleTensor'):contiguous():data() 116 | local kp = torch.IntTensor(n):contiguous() 117 | libmaskapi.bbNms(Q,n,kp:data(),thr) 118 | return kp 119 | else 120 | local Q, n = MaskApi._rlesFrLua(dt) 121 | local kp = torch.IntTensor(n):contiguous() 122 | libmaskapi.rleNms(Q,n,kp:data(),thr) 123 | MaskApi._rlesFree(Q,n) 124 | return kp 125 | end 126 | end 127 | 128 | MaskApi.area = function( Rs ) 129 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 130 | local a = torch.IntTensor(n):contiguous() 131 | libmaskapi.rleArea(Qs,n,a:data()) 132 | MaskApi._rlesFree(Qs,n) 133 | return a 134 | end 135 | 136 | MaskApi.toBbox = function( Rs ) 137 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 138 | local bb = torch.DoubleTensor(n,4):contiguous() 139 | libmaskapi.rleToBbox(Qs,bb:data(),n) 140 | MaskApi._rlesFree(Qs,n) 141 | return bb 142 | end 143 | 144 | MaskApi.frBbox = function( bbs, h, w ) 145 | if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end 146 | local n, k = bbs:size(1), bbs:size(2); assert(k==4) 147 | local data = bbs:type('torch.DoubleTensor'):contiguous():data() 148 | local Qs = MaskApi._rlesInit(n) 149 | libmaskapi.rleFrBbox(Qs[0],data,h,w,n) 150 | return MaskApi._rlesToLua(Qs,n) 151 | end 152 | 153 | MaskApi.frPoly = function( poly, h, w ) 154 | local n = #poly 155 | local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1) 156 | for i,p in pairs(poly) do 157 | local xy = p:type('torch.DoubleTensor'):contiguous():data() 158 | libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w) 159 | end 160 | libmaskapi.rleMerge(Qs,Q[0],n,0) 161 | MaskApi._rlesFree(Qs,n) 162 | return MaskApi._rlesToLua(Q,1)[1] 163 | end 164 | 165 | -------------------------------------------------------------------------------- 166 | 167 | MaskApi.drawCirc = function( img, x, y, rad, clr ) 168 | assert(img:isContiguous() and img:dim()==3) 169 | local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data() 170 | for dx=-rad,rad do for dy=-rad,rad do 171 | local xi, yi = torch.round(x+dx), torch.round(y+dy) 172 | if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi=0 and yi>=0 and xi= 5.1", 17 | "torch >= 7.0", 18 | "lua-cjson" 19 | } 20 | 21 | build = { 22 | type = "builtin", 23 | modules = { 24 | ["coco.env"] = "LuaAPI/env.lua", 25 | ["coco.init"] = "LuaAPI/init.lua", 26 | ["coco.MaskApi"] = "LuaAPI/MaskApi.lua", 27 | ["coco.CocoApi"] = "LuaAPI/CocoApi.lua", 28 | libmaskapi = { 29 | sources = { "common/maskApi.c" }, 30 | incdirs = { "common/" } 31 | } 32 | } 33 | } 34 | 35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec 36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec 37 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/MaskApi.m: -------------------------------------------------------------------------------- 1 | classdef MaskApi 2 | % Interface for manipulating masks stored in RLE format. 3 | % 4 | % RLE is a simple yet efficient format for storing binary masks. RLE 5 | % first divides a vector (or vectorized image) into a series of piecewise 6 | % constant regions and then for each piece simply stores the length of 7 | % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 8 | % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 9 | % (note that the odd counts are always the numbers of zeros). Instead of 10 | % storing the counts directly, additional compression is achieved with a 11 | % variable bitrate representation based on a common scheme called LEB128. 12 | % 13 | % Compression is greatest given large piecewise constant regions. 14 | % Specifically, the size of the RLE is proportional to the number of 15 | % *boundaries* in M (or for an image the number of boundaries in the y 16 | % direction). Assuming fairly simple shapes, the RLE representation is 17 | % O(sqrt(n)) where n is number of pixels in the object. Hence space usage 18 | % is substantially lower, especially for large simple objects (large n). 19 | % 20 | % Many common operations on masks can be computed directly using the RLE 21 | % (without need for decoding). This includes computations such as area, 22 | % union, intersection, etc. All of these operations are linear in the 23 | % size of the RLE, in other words they are O(sqrt(n)) where n is the area 24 | % of the object. Computing these operations on the original mask is O(n). 25 | % Thus, using the RLE can result in substantial computational savings. 26 | % 27 | % The following API functions are defined: 28 | % encode - Encode binary masks using RLE. 29 | % decode - Decode binary masks encoded via RLE. 30 | % merge - Compute union or intersection of encoded masks. 31 | % iou - Compute intersection over union between masks. 32 | % nms - Compute non-maximum suppression between ordered masks. 33 | % area - Compute area of encoded masks. 34 | % toBbox - Get bounding boxes surrounding encoded masks. 35 | % frBbox - Convert bounding boxes to encoded masks. 36 | % frPoly - Convert polygon to encoded mask. 37 | % 38 | % Usage: 39 | % Rs = MaskApi.encode( masks ) 40 | % masks = MaskApi.decode( Rs ) 41 | % R = MaskApi.merge( Rs, [intersect=false] ) 42 | % o = MaskApi.iou( dt, gt, [iscrowd=false] ) 43 | % keep = MaskApi.nms( dt, thr ) 44 | % a = MaskApi.area( Rs ) 45 | % bbs = MaskApi.toBbox( Rs ) 46 | % Rs = MaskApi.frBbox( bbs, h, w ) 47 | % R = MaskApi.frPoly( poly, h, w ) 48 | % 49 | % In the API the following formats are used: 50 | % R,Rs - [struct] Run-length encoding of binary mask(s) 51 | % masks - [hxwxn] Binary mask(s) (must have type uint8) 52 | % bbs - [nx4] Bounding box(es) stored as [x y w h] 53 | % poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 54 | % dt,gt - May be either bounding boxes or encoded masks 55 | % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 56 | % 57 | % Finally, a note about the intersection over union (iou) computation. 58 | % The standard iou of a ground truth (gt) and detected (dt) object is 59 | % iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 60 | % For "crowd" regions, we use a modified criteria. If a gt object is 61 | % marked as "iscrowd", we allow a dt to match any subregion of the gt. 62 | % Choosing gt' in the crowd gt that best matches the dt can be done using 63 | % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 64 | % iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 65 | % For crowd gt regions we use this modified criteria above for the iou. 66 | % 67 | % To compile use the following (some precompiled binaries are included): 68 | % mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',... 69 | % 'private/maskApiMex.c','../common/maskApi.c',... 70 | % '-I../common/','-outdir','private'); 71 | % Please do not contact us for help with compiling. 72 | % 73 | % Microsoft COCO Toolbox. version 2.0 74 | % Data, paper, and tutorials available at: http://mscoco.org/ 75 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 76 | % Licensed under the Simplified BSD License [see coco/license.txt] 77 | 78 | methods( Static ) 79 | function Rs = encode( masks ) 80 | Rs = maskApiMex( 'encode', masks ); 81 | end 82 | 83 | function masks = decode( Rs ) 84 | masks = maskApiMex( 'decode', Rs ); 85 | end 86 | 87 | function R = merge( Rs, varargin ) 88 | R = maskApiMex( 'merge', Rs, varargin{:} ); 89 | end 90 | 91 | function o = iou( dt, gt, varargin ) 92 | o = maskApiMex( 'iou', dt', gt', varargin{:} ); 93 | end 94 | 95 | function keep = nms( dt, thr ) 96 | keep = maskApiMex('nms',dt',thr); 97 | end 98 | 99 | function a = area( Rs ) 100 | a = maskApiMex( 'area', Rs ); 101 | end 102 | 103 | function bbs = toBbox( Rs ) 104 | bbs = maskApiMex( 'toBbox', Rs )'; 105 | end 106 | 107 | function Rs = frBbox( bbs, h, w ) 108 | Rs = maskApiMex( 'frBbox', bbs', h, w ); 109 | end 110 | 111 | function R = frPoly( poly, h, w ) 112 | R = maskApiMex( 'frPoly', poly, h , w ); 113 | end 114 | end 115 | 116 | end 117 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/cocoDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo for the CocoApi (see CocoApi.m) 2 | 3 | %% initialize COCO api (please specify dataType/annType below) 4 | annTypes = { 'instances', 'captions', 'person_keypoints' }; 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType); 7 | coco=CocoApi(annFile); 8 | 9 | %% display COCO categories and supercategories 10 | if( ~strcmp(annType,'captions') ) 11 | cats = coco.loadCats(coco.getCatIds()); 12 | nms={cats.name}; fprintf('COCO categories: '); 13 | fprintf('%s, ',nms{:}); fprintf('\n'); 14 | nms=unique({cats.supercategory}); fprintf('COCO supercategories: '); 15 | fprintf('%s, ',nms{:}); fprintf('\n'); 16 | end 17 | 18 | %% get all images containing given categories, select one at random 19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'}); 20 | imgIds = coco.getImgIds('catIds',catIds); 21 | imgId = imgIds(randi(length(imgIds))); 22 | 23 | %% load and display image 24 | img = coco.loadImgs(imgId); 25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name)); 26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[]) 27 | 28 | %% load and display annotations 29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]); 30 | anns = coco.loadAnns(annIds); coco.showAnns(anns); 31 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/evalDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo demonstrating the algorithm result formats for COCO 2 | 3 | %% select results type for demo (either bbox or segm) 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here 5 | fprintf('Running demo for *%s* results.\n\n',type); 6 | 7 | %% initialize COCO ground truth api 8 | dataDir='../'; prefix='instances'; dataType='val2014'; 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end 10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType); 11 | cocoGt=CocoApi(annFile); 12 | 13 | %% initialize COCO detections api 14 | resFile='%s/results/%s_%s_fake%s100_results.json'; 15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type); 16 | cocoDt=cocoGt.loadRes(resFile); 17 | 18 | %% visialuze gt and dt side by side 19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100); 20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId); 21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name)); 22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off; 23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth') 24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns); 25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off; 26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results') 27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns); 28 | 29 | %% load raw JSON and show exact format for results 30 | fprintf('results structure have the following format:\n'); 31 | res = gason(fileread(resFile)); disp(res) 32 | 33 | %% the following command can be used to save the results back to disk 34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end 35 | 36 | %% run COCO evaluation code (see CocoEval.m) 37 | cocoEval=CocoEval(cocoGt,cocoDt,type); 38 | cocoEval.params.imgIds=imgIds; 39 | cocoEval.evaluate(); 40 | cocoEval.accumulate(); 41 | cocoEval.summarize(); 42 | 43 | %% generate Derek Hoiem style analyis of false positives (slow) 44 | if(0), cocoEval.analyze(); end 45 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/gason.m: -------------------------------------------------------------------------------- 1 | function out = gason( in ) 2 | % Convert between JSON strings and corresponding JSON objects. 3 | % 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev: 5 | % https://github.com/vivkin/gason 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the 7 | % above link for license information and additional details about Gason. 8 | % 9 | % Given a JSON string, gason calls the C++ parser and converts the output 10 | % into an appropriate Matlab structure. As the parsing is performed in mex 11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take 12 | % only a few seconds to parse (compared to hours for pure Matlab parsers). 13 | % 14 | % Given a JSON object, gason calls the C++ encoder to convert the object 15 | % back into a JSON string representation. Nearly any Matlab struct, cell 16 | % array, or numeric array represent a valid JSON object. Note that gason() 17 | % can be used to go both from JSON string to JSON object and back. 18 | % 19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or 20 | % later). The following command compiles the parser (may require tweaking): 21 | % mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',... 22 | % 'private/gasonMex.cpp','../common/gason.cpp',... 23 | % '-I../common/','-outdir','private'); 24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries 25 | % are included, please do not contact us for help with compiling. If needed 26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'. 27 | % 28 | % Note that by default JSON arrays that contain only numbers are stored as 29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of 30 | % the same type are stored as Matlab struct arrays. This is much faster and 31 | % can use considerably less memory than always using Matlab cell arrays. 32 | % 33 | % USAGE 34 | % object = gason( string ) 35 | % string = gason( object ) 36 | % 37 | % INPUTS/OUTPUTS 38 | % string - JSON string 39 | % object - JSON object 40 | % 41 | % EXAMPLE 42 | % o = struct('first',{'piotr','ty'},'last',{'dollar','lin'}) 43 | % s = gason( o ) % convert JSON object -> JSON string 44 | % p = gason( s ) % convert JSON string -> JSON object 45 | % 46 | % See also 47 | % 48 | % Microsoft COCO Toolbox. version 2.0 49 | % Data, paper, and tutorials available at: http://mscoco.org/ 50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 51 | % Licensed under the Simplified BSD License [see coco/license.txt] 52 | 53 | out = gasonMex( 'convert', in ); 54 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/private/gasonMex.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "gason.h" 8 | #include "mex.h" 9 | #include "string.h" 10 | #include "math.h" 11 | #include 12 | #include 13 | #include 14 | typedef std::ostringstream ostrm; 15 | typedef unsigned long siz; 16 | typedef unsigned short ushort; 17 | 18 | siz length( const JsonValue &a ) { 19 | // get number of elements in JSON_ARRAY or JSON_OBJECT 20 | siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k; 21 | } 22 | 23 | bool isRegularObjArray( const JsonValue &a ) { 24 | // check if all JSON_OBJECTs in JSON_ARRAY have the same fields 25 | JsonValue o=a.toNode()->value; siz k, n; const char **keys; 26 | n=length(o); keys=new const char*[n]; 27 | k=0; for(auto j:o) keys[k++]=j->key; 28 | for( auto i:a ) { 29 | if(length(i->value)!=n) return false; k=0; 30 | for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false; 31 | } 32 | delete [] keys; return true; 33 | } 34 | 35 | mxArray* json( const JsonValue &o ) { 36 | // convert JsonValue to Matlab mxArray 37 | siz k, m, n; mxArray *M; const char **keys; 38 | switch( o.getTag() ) { 39 | case JSON_NUMBER: 40 | return mxCreateDoubleScalar(o.toNumber()); 41 | case JSON_STRING: 42 | return mxCreateString(o.toString()); 43 | case JSON_ARRAY: { 44 | if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL); 45 | JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag(); 46 | n=length(o); bool isRegular=true; 47 | for(auto i:o) isRegular=isRegular && i->value.getTag()==tag; 48 | if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) { 49 | m=length(o0); keys=new const char*[m]; 50 | k=0; for(auto j:o0) keys[k++]=j->key; 51 | M = mxCreateStructMatrix(1,n,m,keys); 52 | k=0; for(auto i:o) { m=0; for(auto j:i->value) 53 | mxSetFieldByNumber(M,k,m++,json(j->value)); k++; } 54 | delete [] keys; return M; 55 | } else if( isRegular && tag==JSON_NUMBER ) { 56 | M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M); 57 | k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M; 58 | } else { 59 | M = mxCreateCellMatrix(1,n); 60 | k=0; for(auto i:o) mxSetCell(M,k++,json(i->value)); 61 | return M; 62 | } 63 | } 64 | case JSON_OBJECT: 65 | if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL); 66 | n=length(o); keys=new const char*[n]; 67 | k=0; for(auto i:o) keys[k++]=i->key; 68 | M = mxCreateStructMatrix(1,1,n,keys); k=0; 69 | for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value)); 70 | delete [] keys; return M; 71 | case JSON_TRUE: 72 | return mxCreateDoubleScalar(1); 73 | case JSON_FALSE: 74 | return mxCreateDoubleScalar(0); 75 | case JSON_NULL: 76 | return mxCreateDoubleMatrix(0,0,mxREAL); 77 | default: return NULL; 78 | } 79 | } 80 | 81 | template ostrm& json( ostrm &S, T *A, siz n ) { 82 | // convert numeric array to JSON string with casting 83 | if(n==0) { S<<"[]"; return S; } if(n==1) { S< ostrm& json( ostrm &S, T *A, siz n ) { 89 | // convert numeric array to JSON string without casting 90 | return json(S,A,n); 91 | } 92 | 93 | ostrm& json( ostrm &S, const char *A ) { 94 | // convert char array to JSON string (handle escape characters) 95 | #define RPL(a,b) case a: { S << b; A++; break; } 96 | S << "\""; while( *A>0 ) switch( *A ) { 97 | RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b"); 98 | RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t"); 99 | default: S << *A; A++; 100 | } 101 | S << "\""; return S; 102 | } 103 | 104 | ostrm& json( ostrm& S, const JsonValue *o ) { 105 | // convert JsonValue to JSON string 106 | switch( o->getTag() ) { 107 | case JSON_NUMBER: S << o->toNumber(); return S; 108 | case JSON_TRUE: S << "true"; return S; 109 | case JSON_FALSE: S << "false"; return S; 110 | case JSON_NULL: S << "null"; return S; 111 | case JSON_STRING: return json(S,o->toString()); 112 | case JSON_ARRAY: 113 | S << "["; for(auto i:*o) { 114 | json(S,&i->value) << (i->next ? "," : ""); } 115 | S << "]"; return S; 116 | case JSON_OBJECT: 117 | S << "{"; for(auto i:*o) { 118 | json(S,i->key) << ":"; 119 | json(S,&i->value) << (i->next ? "," : ""); } 120 | S << "}"; return S; 121 | default: return S; 122 | } 123 | } 124 | 125 | ostrm& json( ostrm& S, const mxArray *M ) { 126 | // convert Matlab mxArray to JSON string 127 | siz i, j, m, n=mxGetNumberOfElements(M); 128 | void *A=mxGetData(M); ostrm *nms; 129 | switch( mxGetClassID(M) ) { 130 | case mxDOUBLE_CLASS: return json(S,(double*) A,n); 131 | case mxSINGLE_CLASS: return json(S,(float*) A,n); 132 | case mxINT64_CLASS: return json(S,(int64_t*) A,n); 133 | case mxUINT64_CLASS: return json(S,(uint64_t*) A,n); 134 | case mxINT32_CLASS: return json(S,(int32_t*) A,n); 135 | case mxUINT32_CLASS: return json(S,(uint32_t*) A,n); 136 | case mxINT16_CLASS: return json(S,(int16_t*) A,n); 137 | case mxUINT16_CLASS: return json(S,(uint16_t*) A,n); 138 | case mxINT8_CLASS: return json(S,(int8_t*) A,n); 139 | case mxUINT8_CLASS: return json(S,(uint8_t*) A,n); 140 | case mxLOGICAL_CLASS: return json(S,(uint8_t*) A,n); 141 | case mxCHAR_CLASS: return json(S,mxArrayToString(M)); 142 | case mxCELL_CLASS: 143 | S << "["; for(i=0; i0) json(S,mxGetCell(M,n-1)); S << "]"; return S; 145 | case mxSTRUCT_CLASS: 146 | if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M); 147 | if(m==0) { S<<"["; for(i=0; i1) S<<"["; nms=new ostrm[m]; 149 | for(j=0; j1) S<<"]"; delete [] nms; return S; 156 | default: 157 | mexErrMsgTxt( "Unknown type." ); return S; 158 | } 159 | } 160 | 161 | mxArray* mxCreateStringRobust( const char* str ) { 162 | // convert char* to Matlab string (robust version of mxCreateString) 163 | mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)}; 164 | M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M); 165 | for( siz i=0; i1 ) mexErrMsgTxt("One output expected."); 182 | 183 | if(!strcmp(action,"convert")) { 184 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 185 | if( mxGetClassID(pr[0])==mxCHAR_CLASS ) { 186 | // object = mexFunction( string ) 187 | char *str = mxArrayToStringRobust(pr[0]); 188 | int status = jsonParse(str, &endptr, &val, allocator); 189 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 190 | pl[0] = json(val); mxFree(str); 191 | } else { 192 | // string = mexFunction( object ) 193 | ostrm S; S << std::setprecision(12); json(S,pr[0]); 194 | pl[0]=mxCreateStringRobust(S.str().c_str()); 195 | } 196 | 197 | } else if(!strcmp(action,"split")) { 198 | // strings = mexFunction( string, k ) 199 | if( nr!=2 ) mexErrMsgTxt("Two input expected."); 200 | char *str = mxArrayToStringRobust(pr[0]); 201 | int status = jsonParse(str, &endptr, &val, allocator); 202 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 203 | if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected"); 204 | siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]); 205 | k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k)); 206 | pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<value); t--; if(!o->next) t=0; S << (t ? "," : "]"); 210 | if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str())); 211 | } 212 | 213 | } else if(!strcmp(action,"merge")) { 214 | // string = mexFunction( strings ) 215 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 216 | if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected."); 217 | siz n = mxGetNumberOfElements(pr[0]); 218 | ostrm S; S << std::setprecision(12); S << "["; 219 | for( siz i=0; ivalue) << (j->next ? "," : ""); 225 | mxFree(str); if(i1) 14 | % [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] ) 15 | % 16 | % INPUTS 17 | % prm - param struct or cell of form {'name1' v1 'name2' v2 ...} 18 | % dfs - cell of form {'name1' def1 'name2' def2 ...} 19 | % checkExtra - [0] if 1 throw error if prm contains params not in dfs 20 | % if -1 if prm contains params not in dfs adds them 21 | % 22 | % OUTPUTS (nargout==1) 23 | % prm - parameter struct with fields 'name1' through 'nameN' assigned 24 | % 25 | % OUTPUTS (nargout>1) 26 | % param1 - value assigned to parameter with 'name1' 27 | % ... 28 | % paramN - value assigned to parameter with 'nameN' 29 | % 30 | % EXAMPLE 31 | % dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 }; 32 | % prm = getPrmDflt( struct('x',1,'y',1), dfs ) 33 | % [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs ) 34 | % 35 | % See also INPUTPARSER 36 | % 37 | % Piotr's Computer Vision Matlab Toolbox Version 2.60 38 | % Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] 39 | % Licensed under the Simplified BSD License [see external/bsd.txt] 40 | 41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end 42 | if nargin<=2, checkExtra = 0; end 43 | 44 | % get the input parameters as two cell arrays: prmVal and prmField 45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end 46 | if iscell(prm) 47 | if(mod(length(prm),2)), error('odd number of parameters in prm'); end 48 | prmField = prm(1:2:end); prmVal = prm(2:2:end); 49 | else 50 | if(~isstruct(prm)), error('prm must be a struct or a cell'); end 51 | prmVal = struct2cell(prm); prmField = fieldnames(prm); 52 | end 53 | 54 | % get and update default values using quick for loop 55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end); 56 | if checkExtra>0 57 | for i=1:length(prmField) 58 | j = find(strcmp(prmField{i},dfsField)); 59 | if isempty(j), error('parameter %s is not valid', prmField{i}); end 60 | dfsVal(j) = prmVal(i); 61 | end 62 | elseif checkExtra<0 63 | for i=1:length(prmField) 64 | j = find(strcmp(prmField{i},dfsField)); 65 | if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end 66 | dfsVal(j) = prmVal(i); 67 | end 68 | else 69 | for i=1:length(prmField) 70 | dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i); 71 | end 72 | end 73 | 74 | % check for missing values 75 | if any(strcmp('REQ',dfsVal)) 76 | cmpArray = find(strcmp('REQ',dfsVal)); 77 | error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] ); 78 | end 79 | 80 | % set output 81 | if nargout==1 82 | varargout{1} = cell2struct( dfsVal, dfsField, 2 ); 83 | else 84 | varargout = dfsVal; 85 | end 86 | -------------------------------------------------------------------------------- /cocoapi-master/MatlabAPI/private/maskApiMex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "mex.h" 8 | #include "maskApi.h" 9 | #include 10 | 11 | void checkType( const mxArray *M, mxClassID id ) { 12 | if(mxGetClassID(M)!=id) mexErrMsgTxt("Invalid type."); 13 | } 14 | 15 | mxArray* toMxArray( const RLE *R, siz n ) { 16 | const char *fs[] = {"size", "counts"}; 17 | mxArray *M=mxCreateStructMatrix(1,n,2,fs); 18 | for( siz i=0; i1) mexErrMsgTxt(err); 35 | for( i=0; i<*n; i++ ) { 36 | mxArray *S, *C; double *s; void *c; 37 | S=mxGetFieldByNumber(M,i,O[0]); checkType(S,mxDOUBLE_CLASS); 38 | C=mxGetFieldByNumber(M,i,O[1]); s=mxGetPr(S); c=mxGetData(C); 39 | h=(siz)s[0]; w=(siz)s[1]; m=mxGetNumberOfElements(C); 40 | if(same && i>0 && (h!=R[0].h || w!=R[0].w)) mexErrMsgTxt(err); 41 | if( mxGetClassID(C)==mxDOUBLE_CLASS ) { 42 | rleInit(R+i,h,w,m,0); 43 | for(j=0; j=2) ? (mxGetScalar(pr[1])>0) : false; 74 | rleMerge(R,&M,n,intersect); pl[0]=toMxArray(&M,1); rleFree(&M); 75 | 76 | } else if(!strcmp(action,"area")) { 77 | R=frMxArray(pr[0],&n,0); 78 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 79 | uint *a=(uint*) mxGetPr(pl[0]); rleArea(R,n,a); 80 | 81 | } else if(!strcmp(action,"iou")) { 82 | if(nr>2) checkType(pr[2],mxUINT8_CLASS); siz nDt, nGt; 83 | byte *iscrowd = nr>2 ? (byte*) mxGetPr(pr[2]) : NULL; 84 | if(mxIsStruct(pr[0]) || mxIsStruct(pr[1])) { 85 | RLE *dt=frMxArray(pr[0],&nDt,1), *gt=frMxArray(pr[1],&nGt,1); 86 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 87 | double *o=mxGetPr(pl[0]); rleIou(dt,gt,nDt,nGt,iscrowd,o); 88 | rlesFree(&dt,nDt); rlesFree(>,nGt); 89 | } else { 90 | checkType(pr[0],mxDOUBLE_CLASS); checkType(pr[1],mxDOUBLE_CLASS); 91 | double *dt=mxGetPr(pr[0]); nDt=mxGetN(pr[0]); 92 | double *gt=mxGetPr(pr[1]); nGt=mxGetN(pr[1]); 93 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 94 | double *o=mxGetPr(pl[0]); bbIou(dt,gt,nDt,nGt,iscrowd,o); 95 | } 96 | 97 | } else if(!strcmp(action,"nms")) { 98 | siz n; uint *keep; double thr=(double) mxGetScalar(pr[1]); 99 | if(mxIsStruct(pr[0])) { 100 | RLE *dt=frMxArray(pr[0],&n,1); 101 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 102 | keep=(uint*) mxGetPr(pl[0]); rleNms(dt,n,keep,thr); 103 | rlesFree(&dt,n); 104 | } else { 105 | checkType(pr[0],mxDOUBLE_CLASS); 106 | double *dt=mxGetPr(pr[0]); n=mxGetN(pr[0]); 107 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 108 | keep=(uint*) mxGetPr(pl[0]); bbNms(dt,n,keep,thr); 109 | } 110 | 111 | } else if(!strcmp(action,"toBbox")) { 112 | R=frMxArray(pr[0],&n,0); 113 | pl[0]=mxCreateNumericMatrix(4,n,mxDOUBLE_CLASS,mxREAL); 114 | BB bb=mxGetPr(pl[0]); rleToBbox(R,bb,n); 115 | 116 | } else if(!strcmp(action,"frBbox")) { 117 | checkType(pr[0],mxDOUBLE_CLASS); 118 | double *bb=mxGetPr(pr[0]); n=mxGetN(pr[0]); 119 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); 120 | rlesInit(&R,n); rleFrBbox(R,bb,h,w,n); pl[0]=toMxArray(R,n); 121 | 122 | } else if(!strcmp(action,"frPoly")) { 123 | checkType(pr[0],mxCELL_CLASS); n=mxGetNumberOfElements(pr[0]); 124 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); rlesInit(&R,n); 125 | for(siz i=0; i=18.0 2 | cython>=0.27.3 3 | matplotlib>=2.1.0 4 | -------------------------------------------------------------------------------- /cocoapi-master/PythonAPI/pycocotools.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | -------------------------------------------------------------------------------- /cocoapi-master/PythonAPI/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoapi-master/PythonAPI/pycocotools/_mask.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoapi-master/PythonAPI/pycocotools/_mask.so -------------------------------------------------------------------------------- /cocoapi-master/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /cocoapi-master/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy as np 3 | 4 | # To compile and install locally run "python setup.py build_ext --inplace" 5 | # To install library to Python site-packages run "python setup.py build_ext install" 6 | 7 | ext_modules = [ 8 | Extension( 9 | 'pycocotools._mask', 10 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 11 | include_dirs = [np.get_include(), '../common'], 12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 13 | ) 14 | ] 15 | 16 | setup( 17 | name='pycocotools', 18 | packages=['pycocotools'], 19 | package_dir = {'pycocotools': 'pycocotools'}, 20 | install_requires=[ 21 | 'setuptools>=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='2.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /cocoapi-master/README.txt: -------------------------------------------------------------------------------- 1 | COCO API - http://cocodataset.org/ 2 | 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality. 4 | 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website. 6 | -Please download, unzip, and place the images in: coco/images/ 7 | -Please download and place the annotations in: coco/annotations/ 8 | For substantially more details on the API please see http://cocodataset.org/#download. 9 | 10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage. 11 | 12 | To install: 13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided) 14 | -For Python, run "make" under coco/PythonAPI 15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/ 16 | -------------------------------------------------------------------------------- /cocoapi-master/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /cocoapi-master/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /cocoapi-master/common/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 174 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 175 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 176 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 177 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 178 | } else for( d=0; d<=dy; d++ ) { 179 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 180 | } 181 | } 182 | /* get points along y-boundary and downsample */ 183 | free(x); free(y); k=m; m=0; double xd, yd; 184 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 185 | for( j=1; jw-1 ) continue; 188 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 190 | x[m]=(int) xd; y[m]=(int) yd; m++; 191 | } 192 | /* compute rle encoding given y-boundary points */ 193 | k=m; a=malloc(sizeof(uint)*(k+1)); 194 | for( j=0; j0) b[m++]=a[j++]; else { 200 | j++; if(jm, p=0; long x; int more; 207 | char *s=malloc(sizeof(char)*m*6); 208 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 210 | while( more ) { 211 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 212 | if(more) c |= 0x20; c+=48; s[p++]=c; 213 | } 214 | } 215 | s[p]=0; return s; 216 | } 217 | 218 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 219 | siz m=0, p=0, k; long x; int more; uint *cnts; 220 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 221 | while( s[p] ) { 222 | x=0; k=0; more=1; 223 | while( more ) { 224 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 225 | more = c & 0x20; p++; k++; 226 | if(!more && (c & 0x10)) x |= -1 << 5*k; 227 | } 228 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 229 | } 230 | rleInit(R,h,w,m,cnts); free(cnts); 231 | } 232 | -------------------------------------------------------------------------------- /cocoapi-master/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /cocoapi-master/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /cocoapi-master/results/val2014_fake_eval_res.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | type=segm 3 | Running per image evaluation... DONE (t=0.45s). 4 | Accumulating evaluation results... DONE (t=0.08s). 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.320 6 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.562 7 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.299 8 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.387 9 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.310 10 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.327 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.268 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.415 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.417 14 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.469 15 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.377 16 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.381 17 | 18 | ------------------------------------------------------------------------------ 19 | type=bbox 20 | Running per image evaluation... DONE (t=0.34s). 21 | Accumulating evaluation results... DONE (t=0.08s). 22 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505 23 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697 24 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573 25 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586 26 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519 27 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501 28 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387 29 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594 30 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595 31 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640 32 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566 33 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564 34 | 35 | ------------------------------------------------------------------------------ 36 | type=keypoints 37 | Running per image evaluation... DONE (t=0.06s). 38 | Accumulating evaluation results... DONE (t=0.00s). 39 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.372 40 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.636 41 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.348 42 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.384 43 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.386 44 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.514 45 | Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.734 46 | Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.504 47 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.508 48 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.522 49 | -------------------------------------------------------------------------------- /cocoeval/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 DingXia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pyciderevalcap/__init__.pyc -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/cider/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/cider/cider.py: -------------------------------------------------------------------------------- 1 | # Filename: cider.py 2 | # 3 | # 4 | # Description: Describes the class to compute the CIDEr 5 | # (Consensus-Based Image Description Evaluation) Metric 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 7 | # 8 | # Creation Date: Sun Feb 8 14:16:54 2015 9 | # 10 | # Authors: Ramakrishna Vedantam and 11 | # Tsung-Yi Lin 12 | 13 | from cider_scorer import CiderScorer 14 | 15 | 16 | class Cider: 17 | """ 18 | Main Class to compute the CIDEr metric 19 | 20 | """ 21 | def __init__(self, n=4, df="corpus"): 22 | """ 23 | Initialize the CIDEr scoring function 24 | : param n (int): n-gram size 25 | : param df (string): specifies where to get the IDF values from 26 | takes values 'corpus', 'coco-train' 27 | : return: None 28 | """ 29 | # set cider to sum over 1 to 4-grams 30 | self._n = n 31 | self._df = df 32 | self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) 33 | 34 | def compute_score(self, gts, res): 35 | """ 36 | Main function to compute CIDEr score 37 | : param gts (dict) : {image:tokenized reference sentence} 38 | : param res (dict) : {image:tokenized candidate sentence} 39 | : return: cider (float) : computed CIDEr score for the corpus 40 | """ 41 | 42 | # clear all the previous hypos and refs 43 | self.cider_scorer.clear() 44 | 45 | for res_id in res: 46 | 47 | hypo = res_id['caption'] 48 | ref = gts[res_id['image_id']] 49 | 50 | # Sanity check. 51 | assert(type(hypo) is list) 52 | assert(len(hypo) == 1) 53 | assert(type(ref) is list) 54 | assert(len(ref) > 0) 55 | self.cider_scorer += (hypo[0], ref) 56 | 57 | (score, scores) = self.cider_scorer.compute_score() 58 | 59 | return score, scores 60 | 61 | def method(self): 62 | return "CIDEr" 63 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/cider/cider_scorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Tsung-Yi Lin 3 | # Ramakrishna Vedantam 4 | 5 | import copy 6 | import pickle 7 | from collections import defaultdict 8 | import numpy as np 9 | import math 10 | import os 11 | 12 | def precook(s, n=4, out=False): 13 | """ 14 | Takes a string as input and returns an object that can be given to 15 | either cook_refs or cook_test. This is optional: cook_refs and cook_test 16 | can take string arguments as well. 17 | :param s: string : sentence to be converted into ngrams 18 | :param n: int : number of ngrams for which representation is calculated 19 | :return: term frequency vector for occuring ngrams 20 | """ 21 | words = s.split() 22 | counts = defaultdict(int) 23 | for k in xrange(1,n+1): 24 | for i in xrange(len(words)-k+1): 25 | ngram = tuple(words[i:i+k]) 26 | counts[ngram] += 1 27 | return counts 28 | 29 | def cook_refs(refs, n=4): ## lhuang: oracle will call with "average" 30 | '''Takes a list of reference sentences for a single segment 31 | and returns an object that encapsulates everything that BLEU 32 | needs to know about them. 33 | :param refs: list of string : reference sentences for some image 34 | :param n: int : number of ngrams for which (ngram) representation is calculated 35 | :return: result (list of dict) 36 | ''' 37 | return [precook(ref, n) for ref in refs] 38 | 39 | def cook_test(test, n=4): 40 | '''Takes a test sentence and returns an object that 41 | encapsulates everything that BLEU needs to know about it. 42 | :param test: list of string : hypothesis sentence for some image 43 | :param n: int : number of ngrams for which (ngram) representation is calculated 44 | :return: result (dict) 45 | ''' 46 | return precook(test, n, True) 47 | 48 | class CiderScorer(object): 49 | """CIDEr scorer. 50 | """ 51 | 52 | def copy(self): 53 | ''' copy the refs.''' 54 | new = CiderScorer(n=self.n) 55 | new.ctest = copy.copy(self.ctest) 56 | new.crefs = copy.copy(self.crefs) 57 | return new 58 | 59 | def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0): 60 | ''' singular instance ''' 61 | self.n = n 62 | self.sigma = sigma 63 | self.crefs = [] 64 | self.ctest = [] 65 | self.df_mode = df_mode 66 | if self.df_mode != "corpus": 67 | self.document_frequency = pickle.load(open(os.path.join('data', df_mode + '.p'),'r')) 68 | self.cook_append(test, refs) 69 | self.ref_len = None 70 | 71 | def clear(self): 72 | self.crefs = [] 73 | self.ctest = [] 74 | 75 | def cook_append(self, test, refs): 76 | '''called by constructor and __iadd__ to avoid creating new instances.''' 77 | 78 | if refs is not None: 79 | self.crefs.append(cook_refs(refs)) 80 | if test is not None: 81 | self.ctest.append(cook_test(test)) ## N.B.: -1 82 | else: 83 | self.ctest.append(None) # lens of crefs and ctest have to match 84 | 85 | def size(self): 86 | assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest)) 87 | return len(self.crefs) 88 | 89 | def __iadd__(self, other): 90 | '''add an instance (e.g., from another sentence).''' 91 | 92 | if type(other) is tuple: 93 | ## avoid creating new CiderScorer instances 94 | self.cook_append(other[0], other[1]) 95 | else: 96 | self.ctest.extend(other.ctest) 97 | self.crefs.extend(other.crefs) 98 | 99 | return self 100 | def compute_doc_freq(self): 101 | ''' 102 | Compute term frequency for reference data. 103 | This will be used to compute idf (inverse document frequency later) 104 | The term frequency is stored in the object 105 | :return: None 106 | ''' 107 | for refs in self.crefs: 108 | # refs, k ref captions of one image 109 | for ngram in set([ngram for ref in refs for (ngram,count) in ref.iteritems()]): 110 | self.document_frequency[ngram] += 1 111 | # maxcounts[ngram] = max(maxcounts.get(ngram,0), count) 112 | 113 | def compute_cider(self): 114 | def counts2vec(cnts): 115 | """ 116 | Function maps counts of ngram to vector of tfidf weights. 117 | The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights. 118 | The n-th entry of array denotes length of n-grams. 119 | :param cnts: 120 | :return: vec (array of dict), norm (array of float), length (int) 121 | """ 122 | vec = [defaultdict(float) for _ in range(self.n)] 123 | length = 0 124 | norm = [0.0 for _ in range(self.n)] 125 | for (ngram,term_freq) in cnts.iteritems(): 126 | # give word count 1 if it doesn't appear in reference corpus 127 | df = np.log(max(1.0, self.document_frequency[ngram])) 128 | # ngram index 129 | n = len(ngram)-1 130 | # tf (term_freq) * idf (precomputed idf) for n-grams 131 | vec[n][ngram] = float(term_freq)*(self.ref_len - df) 132 | # compute norm for the vector. the norm will be used for 133 | # computing similarity 134 | norm[n] += pow(vec[n][ngram], 2) 135 | 136 | if n == 1: 137 | length += term_freq 138 | norm = [np.sqrt(n) for n in norm] 139 | return vec, norm, length 140 | 141 | def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref): 142 | ''' 143 | Compute the cosine similarity of two vectors. 144 | :param vec_hyp: array of dictionary for vector corresponding to hypothesis 145 | :param vec_ref: array of dictionary for vector corresponding to reference 146 | :param norm_hyp: array of float for vector corresponding to hypothesis 147 | :param norm_ref: array of float for vector corresponding to reference 148 | :param length_hyp: int containing length of hypothesis 149 | :param length_ref: int containing length of reference 150 | :return: array of score for each n-grams cosine similarity 151 | ''' 152 | delta = float(length_hyp - length_ref) 153 | # measure consine similarity 154 | val = np.array([0.0 for _ in range(self.n)]) 155 | for n in range(self.n): 156 | # ngram 157 | for (ngram,count) in vec_hyp[n].iteritems(): 158 | val[n] += vec_hyp[n][ngram] * vec_ref[n][ngram] 159 | 160 | if (norm_hyp[n] != 0) and (norm_ref[n] != 0): 161 | val[n] /= (norm_hyp[n]*norm_ref[n]) 162 | 163 | assert(not math.isnan(val[n])) 164 | return val 165 | 166 | # compute log reference length 167 | if self.df_mode == "corpus": 168 | self.ref_len = np.log(float(len(self.crefs))) 169 | elif self.df_mode == "coco-val": 170 | # if coco option selected, use length of coco-val set 171 | self.ref_len = np.log(float(40504)) 172 | 173 | scores = [] 174 | for test, refs in zip(self.ctest, self.crefs): 175 | # compute vector for test captions 176 | vec, norm, length = counts2vec(test) 177 | # compute vector for ref captions 178 | score = np.array([0.0 for _ in range(self.n)]) 179 | for ref in refs: 180 | vec_ref, norm_ref, length_ref = counts2vec(ref) 181 | score += sim(vec, vec_ref, norm, norm_ref, length, length_ref) 182 | # change by vrama91 - mean of ngram scores, instead of sum 183 | score_avg = np.mean(score) 184 | # divide by number of references 185 | score_avg /= len(refs) 186 | # multiply score by 10 187 | score_avg *= 10.0 188 | # append score of an image to the score list 189 | scores.append(score_avg) 190 | return scores 191 | 192 | def compute_score(self, option=None, verbose=0): 193 | # compute idf 194 | if self.df_mode == "corpus": 195 | self.document_frequency = defaultdict(float) 196 | self.compute_doc_freq() 197 | # assert to check document frequency 198 | assert(len(self.ctest) >= max(self.document_frequency.values())) 199 | # import json for now and write the corresponding files 200 | # compute cider score 201 | score = self.compute_cider() 202 | # debug 203 | # print score 204 | return np.mean(np.array(score)), np.array(score) 205 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pyciderevalcap/ciderD/__init__.pyc -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/ciderD.py: -------------------------------------------------------------------------------- 1 | # Filename: ciderD.py 2 | # 3 | # Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric 4 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 5 | # 6 | # Creation Date: Sun Feb 8 14:16:54 2015 7 | # 8 | # Authors: Ramakrishna Vedantam and Tsung-Yi Lin 9 | 10 | from .ciderD_scorer import CiderScorer 11 | 12 | 13 | class CiderD: 14 | """ 15 | Main Class to compute the CIDEr metric 16 | 17 | """ 18 | def __init__(self, n=4, sigma=6.0, df="corpus"): 19 | # set cider to sum over 1 to 4-grams 20 | self._n = n 21 | # set the standard deviation parameter for gaussian penalty 22 | self._sigma = sigma 23 | # set which where to compute document frequencies from 24 | self._df = df 25 | self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) 26 | 27 | def compute_score(self, gts, res): 28 | """ 29 | Main function to compute CIDEr score 30 | :param hypo_for_image (dict) : dictionary with key and value 31 | ref_for_image (dict) : dictionary with key and value 32 | :return: cider (float) : computed CIDEr score for the corpus 33 | """ 34 | 35 | # clear all the previous hypos and refs 36 | self.cider_scorer.clear() 37 | for res_id in res: 38 | 39 | hypo = res_id['caption'] 40 | ref = gts[res_id['image_id']] 41 | 42 | # Sanity check. 43 | assert(type(hypo) is list) 44 | assert(len(hypo) == 1) 45 | assert(type(ref) is list) 46 | assert(len(ref) > 0) 47 | self.cider_scorer += (hypo[0], ref) 48 | 49 | (score, scores) = self.cider_scorer.compute_score() 50 | 51 | return score, scores 52 | 53 | def method(self): 54 | return "CIDEr-D" 55 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/ciderD.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pyciderevalcap/ciderD/ciderD.pyc -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/ciderD_scorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Tsung-Yi Lin 3 | # Ramakrishna Vedantam 4 | 5 | import copy 6 | from collections import defaultdict 7 | import numpy as np 8 | import pdb 9 | import math 10 | import pickle 11 | import os 12 | 13 | def precook(s, n=4, out=False): 14 | """ 15 | Takes a string as input and returns an object that can be given to 16 | either cook_refs or cook_test. This is optional: cook_refs and cook_test 17 | can take string arguments as well. 18 | :param s: string : sentence to be converted into ngrams 19 | :param n: int : number of ngrams for which representation is calculated 20 | :return: term frequency vector for occuring ngrams 21 | """ 22 | words = s.split() 23 | counts = defaultdict(int) 24 | for k in range(1,n+1): 25 | for i in range(len(words)-k+1): 26 | ngram = tuple(words[i:i+k]) 27 | counts[ngram] += 1 28 | return counts 29 | 30 | def cook_refs(refs, n=4): ## lhuang: oracle will call with "average" 31 | '''Takes a list of reference sentences for a single segment 32 | and returns an object that encapsulates everything that BLEU 33 | needs to know about them. 34 | :param refs: list of string : reference sentences for some image 35 | :param n: int : number of ngrams for which (ngram) representation is calculated 36 | :return: result (list of dict) 37 | ''' 38 | return [precook(ref, n) for ref in refs] 39 | 40 | def cook_test(test, n=4): 41 | '''Takes a test sentence and returns an object that 42 | encapsulates everything that BLEU needs to know about it. 43 | :param test: list of string : hypothesis sentence for some image 44 | :param n: int : number of ngrams for which (ngram) representation is calculated 45 | :return: result (dict) 46 | ''' 47 | return precook(test, n, True) 48 | 49 | class CiderScorer(object): 50 | """CIDEr scorer. 51 | """ 52 | 53 | def copy(self): 54 | ''' copy the refs.''' 55 | new = CiderScorer(n=self.n) 56 | new.ctest = copy.copy(self.ctest) 57 | new.crefs = copy.copy(self.crefs) 58 | return new 59 | 60 | def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0): 61 | ''' singular instance ''' 62 | self.n = n 63 | self.sigma = sigma 64 | self.crefs = [] 65 | self.ctest = [] 66 | self.df_mode = df_mode 67 | self.ref_len = None 68 | if self.df_mode != "corpus": 69 | pkl_file = pickle.load(open(os.path.join('data', df_mode + '.p'),'rb')) 70 | self.ref_len = pkl_file['ref_len'] 71 | self.document_frequency = pkl_file['document_frequency'] 72 | self.cook_append(test, refs) 73 | 74 | def clear(self): 75 | self.crefs = [] 76 | self.ctest = [] 77 | 78 | def cook_append(self, test, refs): 79 | '''called by constructor and __iadd__ to avoid creating new instances.''' 80 | 81 | if refs is not None: 82 | self.crefs.append(cook_refs(refs)) 83 | if test is not None: 84 | self.ctest.append(cook_test(test)) ## N.B.: -1 85 | else: 86 | self.ctest.append(None) # lens of crefs and ctest have to match 87 | 88 | def size(self): 89 | assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest)) 90 | return len(self.crefs) 91 | 92 | def __iadd__(self, other): 93 | '''add an instance (e.g., from another sentence).''' 94 | 95 | if type(other) is tuple: 96 | ## avoid creating new CiderScorer instances 97 | self.cook_append(other[0], other[1]) 98 | else: 99 | self.ctest.extend(other.ctest) 100 | self.crefs.extend(other.crefs) 101 | 102 | return self 103 | def compute_doc_freq(self): 104 | ''' 105 | Compute term frequency for reference data. 106 | This will be used to compute idf (inverse document frequency later) 107 | The term frequency is stored in the object 108 | :return: None 109 | ''' 110 | for refs in self.crefs: 111 | # refs, k ref captions of one image 112 | for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]): 113 | self.document_frequency[ngram] += 1 114 | # maxcounts[ngram] = max(maxcounts.get(ngram,0), count) 115 | 116 | def compute_cider(self): 117 | def counts2vec(cnts): 118 | """ 119 | Function maps counts of ngram to vector of tfidf weights. 120 | The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights. 121 | The n-th entry of array denotes length of n-grams. 122 | :param cnts: 123 | :return: vec (array of dict), norm (array of float), length (int) 124 | """ 125 | vec = [defaultdict(float) for _ in range(self.n)] 126 | length = 0 127 | norm = [0.0 for _ in range(self.n)] 128 | for (ngram,term_freq) in cnts.items(): 129 | # give word count 1 if it doesn't appear in reference corpus 130 | df = np.log(max(1.0, self.document_frequency[ngram])) 131 | # ngram index 132 | n = len(ngram)-1 133 | # tf (term_freq) * idf (precomputed idf) for n-grams 134 | vec[n][ngram] = float(term_freq)*(self.ref_len - df) 135 | # compute norm for the vector. the norm will be used for computing similarity 136 | norm[n] += pow(vec[n][ngram], 2) 137 | 138 | if n == 1: 139 | length += term_freq 140 | norm = [np.sqrt(n) for n in norm] 141 | return vec, norm, length 142 | 143 | def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref): 144 | ''' 145 | Compute the cosine similarity of two vectors. 146 | :param vec_hyp: array of dictionary for vector corresponding to hypothesis 147 | :param vec_ref: array of dictionary for vector corresponding to reference 148 | :param norm_hyp: array of float for vector corresponding to hypothesis 149 | :param norm_ref: array of float for vector corresponding to reference 150 | :param length_hyp: int containing length of hypothesis 151 | :param length_ref: int containing length of reference 152 | :return: array of score for each n-grams cosine similarity 153 | ''' 154 | delta = float(length_hyp - length_ref) 155 | # measure consine similarity 156 | val = np.array([0.0 for _ in range(self.n)]) 157 | for n in range(self.n): 158 | # ngram 159 | for (ngram,count) in vec_hyp[n].items(): 160 | # vrama91 : added clipping 161 | val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram] 162 | 163 | if (norm_hyp[n] != 0) and (norm_ref[n] != 0): 164 | val[n] /= (norm_hyp[n]*norm_ref[n]) 165 | 166 | assert(not math.isnan(val[n])) 167 | # vrama91: added a length based gaussian penalty 168 | val[n] *= np.e**(-(delta**2)/(2*self.sigma**2)) 169 | return val 170 | 171 | # compute log reference length 172 | if self.df_mode == "corpus": 173 | self.ref_len = np.log(float(len(self.crefs))) 174 | #elif self.df_mode == "coco-val": 175 | # if coco option selected, use length of coco-val set 176 | # self.ref_len = np.log(float(40504)) 177 | 178 | scores = [] 179 | for test, refs in zip(self.ctest, self.crefs): 180 | # compute vector for test captions 181 | vec, norm, length = counts2vec(test) 182 | # compute vector for ref captions 183 | score = np.array([0.0 for _ in range(self.n)]) 184 | for ref in refs: 185 | vec_ref, norm_ref, length_ref = counts2vec(ref) 186 | score += sim(vec, vec_ref, norm, norm_ref, length, length_ref) 187 | # change by vrama91 - mean of ngram scores, instead of sum 188 | score_avg = np.mean(score) 189 | # divide by number of references 190 | score_avg /= len(refs) 191 | # multiply score by 10 192 | score_avg *= 10.0 193 | # append score of an image to the score list 194 | scores.append(score_avg) 195 | return scores 196 | 197 | def compute_score(self, option=None, verbose=0): 198 | # compute idf 199 | if self.df_mode == "corpus": 200 | self.document_frequency = defaultdict(float) 201 | self.compute_doc_freq() 202 | # assert to check document frequency 203 | assert(len(self.ctest) >= max(self.document_frequency.values())) 204 | # import json for now and write the corresponding files 205 | # compute cider score 206 | score = self.compute_cider() 207 | # debug 208 | # print score 209 | return np.mean(np.array(score)), np.array(score) 210 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/ciderD/ciderD_scorer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pyciderevalcap/ciderD/ciderD_scorer.pyc -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/eval.py: -------------------------------------------------------------------------------- 1 | __author__ = 'rama' 2 | from tokenizer.ptbtokenizer import PTBTokenizer 3 | from cider.cider import Cider 4 | from ciderD.ciderD import CiderD 5 | 6 | 7 | class CIDErEvalCap: 8 | def __init__(self, gts, res, df): 9 | print 'tokenization...' 10 | tokenizer = PTBTokenizer('gts') 11 | _gts = tokenizer.tokenize(gts) 12 | print 'tokenized refs' 13 | tokenizer = PTBTokenizer('res') 14 | _res = tokenizer.tokenize(res) 15 | print 'tokenized cands' 16 | 17 | self.gts = _gts 18 | self.res = _res 19 | self.df = df 20 | 21 | def evaluate(self): 22 | # ================================================= 23 | # Set up scorers 24 | # ================================================= 25 | 26 | print 'setting up scorers...' 27 | scorers = [ 28 | (Cider(df=self.df), "CIDEr"), (CiderD(df=self.df), "CIDErD") 29 | ] 30 | 31 | # ================================================= 32 | # Compute scores 33 | # ================================================= 34 | metric_scores = {} 35 | for scorer, method in scorers: 36 | print 'computing %s score...' % (scorer.method()) 37 | score, scores = scorer.compute_score(self.gts, self.res) 38 | print "Mean %s score: %0.3f" % (method, score) 39 | metric_scores[method] = list(scores) 40 | return metric_scores 41 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'hfang' 2 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/tokenizer/ptbtokenizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : ptbtokenizer.py 4 | # 5 | # Description : Do the PTB Tokenization and remove punctuations. 6 | # 7 | # Creation Date : 29-12-2014 8 | # Last Modified : Thu Mar 19 09:53:35 2015 9 | # Authors : Hao Fang and Tsung-Yi Lin 10 | 11 | import os 12 | import pdb # python debugger 13 | import sys 14 | import subprocess 15 | import re 16 | import tempfile 17 | import itertools 18 | 19 | # path to the stanford corenlp jar 20 | STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar' 21 | 22 | # punctuations to be removed from the sentences 23 | PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ 24 | ".", "?", "!", ",", ":", "-", "--", "...", ";"] 25 | 26 | class PTBTokenizer: 27 | """Python wrapper of Stanford PTBTokenizer""" 28 | def __init__(self, _source='gts'): 29 | self.source = _source 30 | 31 | def tokenize(self, captions_for_image): 32 | cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, \ 33 | 'edu.stanford.nlp.process.PTBTokenizer', \ 34 | '-preserveLines', '-lowerCase'] 35 | 36 | # ====================================================== 37 | # prepare data for PTB Tokenizer 38 | # ====================================================== 39 | 40 | if self.source == 'gts': 41 | image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))] 42 | sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v]) 43 | final_tokenized_captions_for_image = {} 44 | 45 | elif self.source == 'res': 46 | index = [i for i, v in enumerate(captions_for_image)] 47 | image_id = [v["image_id"] for v in captions_for_image] 48 | sentences = '\n'.join(v["caption"].replace('\n', ' ') for v in captions_for_image ) 49 | final_tokenized_captions_for_index = [] 50 | 51 | # ====================================================== 52 | # save sentences to temporary file 53 | # ====================================================== 54 | path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__)) 55 | tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dirname) 56 | tmp_file.write(sentences) 57 | tmp_file.close() 58 | 59 | # ====================================================== 60 | # tokenize sentence 61 | # ====================================================== 62 | cmd.append(os.path.basename(tmp_file.name)) 63 | p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \ 64 | stdout=subprocess.PIPE) 65 | token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0] 66 | lines = token_lines.split('\n') 67 | # remove temp file 68 | os.remove(tmp_file.name) 69 | 70 | # ====================================================== 71 | # create dictionary for tokenized captions 72 | # ====================================================== 73 | if self.source == 'gts': 74 | for k, line in zip(image_id, lines): 75 | if not k in final_tokenized_captions_for_image: 76 | final_tokenized_captions_for_image[k] = [] 77 | tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \ 78 | if w not in PUNCTUATIONS]) 79 | final_tokenized_captions_for_image[k].append(tokenized_caption) 80 | 81 | return final_tokenized_captions_for_image 82 | 83 | elif self.source == 'res': 84 | for k, img, line in zip(index, image_id, lines): 85 | tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \ 86 | if w not in PUNCTUATIONS]) 87 | final_tokenized_captions_for_index.append({'image_id': img, 'caption': [tokenized_caption]}) 88 | 89 | return final_tokenized_captions_for_index 90 | -------------------------------------------------------------------------------- /cocoeval/pyciderevalcap/tokenizer/stanford-corenlp-3.4.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pyciderevalcap/tokenizer/stanford-corenlp-3.4.1.jar -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/bleu.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/bleu.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/bleu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/bleu.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/bleu/__pycache__/bleu_scorer.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/bleu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : bleu.py 4 | # 5 | # Description : Wrapper for BLEU scorer. 6 | # 7 | # Creation Date : 06-01-2015 8 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT 9 | # Authors : Hao Fang and Tsung-Yi Lin 10 | 11 | from .bleu_scorer import BleuScorer 12 | 13 | 14 | class Bleu: 15 | def __init__(self, n=4): 16 | # default compute Blue score up to 4 17 | self._n = n 18 | self._hypo_for_image = {} 19 | self.ref_for_image = {} 20 | 21 | def compute_score(self, gts, res): 22 | 23 | assert(sorted(gts.keys()) == sorted(res.keys())) 24 | imgIds = sorted(gts.keys()) 25 | 26 | bleu_scorer = BleuScorer(n=self._n) 27 | for id in imgIds: 28 | hypo = res[id] 29 | ref = gts[id] 30 | 31 | # Sanity check. 32 | assert(type(hypo) is list) 33 | assert(len(hypo) == 1) 34 | assert(type(ref) is list) 35 | assert(len(ref) >= 1) 36 | 37 | bleu_scorer += (hypo[0], ref) 38 | 39 | #score, scores = bleu_scorer.compute_score(option='shortest') 40 | score, scores = bleu_scorer.compute_score(option='closest', verbose=1) 41 | #score, scores = bleu_scorer.compute_score(option='average', verbose=1) 42 | 43 | # return (bleu, bleu_info) 44 | return score, scores 45 | 46 | def method(self): 47 | return "Bleu" 48 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/bleu/bleu_scorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # bleu_scorer.py 4 | # David Chiang 5 | 6 | # Copyright (c) 2004-2006 University of Maryland. All rights 7 | # reserved. Do not redistribute without permission from the 8 | # author. Not for commercial use. 9 | 10 | # Modified by: 11 | # Hao Fang 12 | # Tsung-Yi Lin 13 | 14 | '''Provides: 15 | cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test(). 16 | cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked(). 17 | ''' 18 | 19 | import copy 20 | import sys, math, re 21 | from collections import defaultdict 22 | 23 | def precook(s, n=4, out=False): 24 | """Takes a string as input and returns an object that can be given to 25 | either cook_refs or cook_test. This is optional: cook_refs and cook_test 26 | can take string arguments as well.""" 27 | words = s.split() 28 | counts = defaultdict(int) 29 | for k in range(1,n+1): 30 | for i in range(len(words)-k+1): 31 | ngram = tuple(words[i:i+k]) 32 | counts[ngram] += 1 33 | return (len(words), counts) 34 | 35 | def cook_refs(refs, eff=None, n=4): ## lhuang: oracle will call with "average" 36 | '''Takes a list of reference sentences for a single segment 37 | and returns an object that encapsulates everything that BLEU 38 | needs to know about them.''' 39 | 40 | reflen = [] 41 | maxcounts = {} 42 | for ref in refs: 43 | rl, counts = precook(ref, n) 44 | reflen.append(rl) 45 | for (ngram,count) in counts.items(): 46 | maxcounts[ngram] = max(maxcounts.get(ngram,0), count) 47 | 48 | # Calculate effective reference sentence length. 49 | if eff == "shortest": 50 | reflen = min(reflen) 51 | elif eff == "average": 52 | reflen = float(sum(reflen))/len(reflen) 53 | 54 | ## lhuang: N.B.: leave reflen computaiton to the very end!! 55 | 56 | ## lhuang: N.B.: in case of "closest", keep a list of reflens!! (bad design) 57 | 58 | return (reflen, maxcounts) 59 | 60 | def cook_test(test, reflen, refmaxcounts, eff=None, n=4): 61 | '''Takes a test sentence and returns an object that 62 | encapsulates everything that BLEU needs to know about it.''' 63 | 64 | testlen, counts = precook(test, n, True) 65 | 66 | result = {} 67 | 68 | # Calculate effective reference sentence length. 69 | 70 | if eff == "closest": 71 | result["reflen"] = min((abs(l-testlen), l) for l in reflen)[1] 72 | else: ## i.e., "average" or "shortest" or None 73 | result["reflen"] = reflen 74 | 75 | result["testlen"] = testlen 76 | 77 | result["guess"] = [max(0,testlen-k+1) for k in range(1,n+1)] 78 | 79 | result['correct'] = [0]*n 80 | for (ngram, count) in counts.items(): 81 | result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count) 82 | 83 | return result 84 | 85 | class BleuScorer(object): 86 | """Bleu scorer. 87 | """ 88 | 89 | __slots__ = "n", "crefs", "ctest", "_score", "_ratio", "_testlen", "_reflen", "special_reflen" 90 | # special_reflen is used in oracle (proportional effective ref len for a node). 91 | 92 | def copy(self): 93 | ''' copy the refs.''' 94 | new = BleuScorer(n=self.n) 95 | new.ctest = copy.copy(self.ctest) 96 | new.crefs = copy.copy(self.crefs) 97 | new._score = None 98 | return new 99 | 100 | def __init__(self, test=None, refs=None, n=4, special_reflen=None): 101 | ''' singular instance ''' 102 | 103 | self.n = n 104 | self.crefs = [] 105 | self.ctest = [] 106 | self.cook_append(test, refs) 107 | self.special_reflen = special_reflen 108 | 109 | def cook_append(self, test, refs): 110 | '''called by constructor and __iadd__ to avoid creating new instances.''' 111 | 112 | if refs is not None: 113 | self.crefs.append(cook_refs(refs)) 114 | if test is not None: 115 | cooked_test = cook_test(test, *self.crefs[-1]) 116 | self.ctest.append(cooked_test) ## N.B.: -1 117 | else: 118 | self.ctest.append(None) # lens of crefs and ctest have to match 119 | 120 | self._score = None ## need to recompute 121 | 122 | def ratio(self, option=None): 123 | self.compute_score(option=option) 124 | return self._ratio 125 | 126 | def score_ratio(self, option=None): 127 | '''return (bleu, len_ratio) pair''' 128 | return (self.fscore(option=option), self.ratio(option=option)) 129 | 130 | def score_ratio_str(self, option=None): 131 | return "%.4f (%.2f)" % self.score_ratio(option) 132 | 133 | def reflen(self, option=None): 134 | self.compute_score(option=option) 135 | return self._reflen 136 | 137 | def testlen(self, option=None): 138 | self.compute_score(option=option) 139 | return self._testlen 140 | 141 | def retest(self, new_test): 142 | if type(new_test) is str: 143 | new_test = [new_test] 144 | assert len(new_test) == len(self.crefs), new_test 145 | self.ctest = [] 146 | for t, rs in zip(new_test, self.crefs): 147 | self.ctest.append(cook_test(t, *rs)) 148 | self._score = None 149 | 150 | return self 151 | 152 | def rescore(self, new_test): 153 | ''' replace test(s) with new test(s), and returns the new score.''' 154 | 155 | return self.retest(new_test).compute_score() 156 | 157 | def size(self): 158 | assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest)) 159 | return len(self.crefs) 160 | 161 | def __iadd__(self, other): 162 | '''add an instance (e.g., from another sentence).''' 163 | 164 | if type(other) is tuple: 165 | ## avoid creating new BleuScorer instances 166 | self.cook_append(other[0], other[1]) 167 | else: 168 | assert self.compatible(other), "incompatible BLEUs." 169 | self.ctest.extend(other.ctest) 170 | self.crefs.extend(other.crefs) 171 | self._score = None ## need to recompute 172 | 173 | return self 174 | 175 | def compatible(self, other): 176 | return isinstance(other, BleuScorer) and self.n == other.n 177 | 178 | def single_reflen(self, option="average"): 179 | return self._single_reflen(self.crefs[0][0], option) 180 | 181 | def _single_reflen(self, reflens, option=None, testlen=None): 182 | 183 | if option == "shortest": 184 | reflen = min(reflens) 185 | elif option == "average": 186 | reflen = float(sum(reflens))/len(reflens) 187 | elif option == "closest": 188 | reflen = min((abs(l-testlen), l) for l in reflens)[1] 189 | else: 190 | assert False, "unsupported reflen option %s" % option 191 | 192 | return reflen 193 | 194 | def recompute_score(self, option=None, verbose=0): 195 | self._score = None 196 | return self.compute_score(option, verbose) 197 | 198 | def compute_score(self, option=None, verbose=0): 199 | n = self.n 200 | small = 1e-9 201 | tiny = 1e-15 ## so that if guess is 0 still return 0 202 | bleu_list = [[] for _ in range(n)] 203 | 204 | if self._score is not None: 205 | return self._score 206 | 207 | if option is None: 208 | option = "average" if len(self.crefs) == 1 else "closest" 209 | 210 | self._testlen = 0 211 | self._reflen = 0 212 | totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n} 213 | 214 | # for each sentence 215 | for comps in self.ctest: 216 | testlen = comps['testlen'] 217 | self._testlen += testlen 218 | 219 | if self.special_reflen is None: ## need computation 220 | reflen = self._single_reflen(comps['reflen'], option, testlen) 221 | else: 222 | reflen = self.special_reflen 223 | 224 | self._reflen += reflen 225 | 226 | for key in ['guess','correct']: 227 | for k in range(n): 228 | totalcomps[key][k] += comps[key][k] 229 | 230 | # append per image bleu score 231 | bleu = 1. 232 | for k in range(n): 233 | bleu *= (float(comps['correct'][k]) + tiny) \ 234 | /(float(comps['guess'][k]) + small) 235 | bleu_list[k].append(bleu ** (1./(k+1))) 236 | ratio = (testlen + tiny) / (reflen + small) ## N.B.: avoid zero division 237 | if ratio < 1: 238 | for k in range(n): 239 | bleu_list[k][-1] *= math.exp(1 - 1/ratio) 240 | 241 | if verbose > 1: 242 | print(comps, reflen) 243 | 244 | totalcomps['reflen'] = self._reflen 245 | totalcomps['testlen'] = self._testlen 246 | 247 | bleus = [] 248 | bleu = 1. 249 | for k in range(n): 250 | bleu *= float(totalcomps['correct'][k] + tiny) \ 251 | / (totalcomps['guess'][k] + small) 252 | bleus.append(bleu ** (1./(k+1))) 253 | ratio = (self._testlen + tiny) / (self._reflen + small) ## N.B.: avoid zero division 254 | if ratio < 1: 255 | for k in range(n): 256 | bleus[k] *= math.exp(1 - 1/ratio) 257 | 258 | if verbose > 0: 259 | print(totalcomps) 260 | print("ratio:%f"%ratio) 261 | 262 | self._score = bleus 263 | return self._score, bleu_list 264 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/cider.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/cider.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/cider.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/cider.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/cider/__pycache__/cider_scorer.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/cider/cider.py: -------------------------------------------------------------------------------- 1 | # Filename: cider.py 2 | # 3 | # Description: Describes the class to compute the CIDEr (Consensus-Based Image Description Evaluation) Metric 4 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 5 | # 6 | # Creation Date: Sun Feb 8 14:16:54 2015 7 | # 8 | # Authors: Ramakrishna Vedantam and Tsung-Yi Lin 9 | 10 | from .cider_scorer import CiderScorer 11 | import pdb 12 | 13 | class Cider: 14 | """ 15 | Main Class to compute the CIDEr metric 16 | 17 | """ 18 | def __init__(self, test=None, refs=None, n=4, sigma=6.0): 19 | # set cider to sum over 1 to 4-grams 20 | self._n = n 21 | # set the standard deviation parameter for gaussian penalty 22 | self._sigma = sigma 23 | 24 | def compute_score(self, gts, res): 25 | """ 26 | Main function to compute CIDEr score 27 | :param hypo_for_image (dict) : dictionary with key and value 28 | ref_for_image (dict) : dictionary with key and value 29 | :return: cider (float) : computed CIDEr score for the corpus 30 | """ 31 | 32 | assert(sorted(gts.keys()) == sorted(res.keys())) 33 | imgIds = sorted(gts.keys()) 34 | 35 | cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) 36 | 37 | for id in imgIds: 38 | hypo = res[id] 39 | ref = gts[id] 40 | 41 | # Sanity check. 42 | assert(type(hypo) is list) 43 | assert(len(hypo) == 1) 44 | assert(type(ref) is list) 45 | assert(len(ref) >= 1) 46 | 47 | cider_scorer += (hypo[0], ref) 48 | 49 | (score, scores) = cider_scorer.compute_score() 50 | 51 | return score, scores 52 | 53 | def method(self): 54 | return "CIDEr" 55 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/eval.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | from .tokenizer.ptbtokenizer import PTBTokenizer 3 | from .bleu.bleu import Bleu 4 | from .meteor.meteor import Meteor 5 | from .rouge.rouge import Rouge 6 | from .cider.cider import Cider 7 | from .spice.spice import Spice 8 | 9 | class COCOEvalCap: 10 | def __init__(self, coco, cocoRes): 11 | self.evalImgs = [] 12 | self.eval = {} 13 | self.imgToEval = {} 14 | self.coco = coco 15 | self.cocoRes = cocoRes 16 | self.params = {'image_id': coco.getImgIds()} 17 | 18 | def evaluate(self): 19 | imgIds = self.params['image_id'] 20 | # imgIds = self.coco.getImgIds() 21 | gts = {} 22 | res = {} 23 | for imgId in imgIds: 24 | gts[imgId] = self.coco.imgToAnns[imgId] 25 | res[imgId] = self.cocoRes.imgToAnns[imgId] 26 | 27 | # ================================================= 28 | # Set up scorers 29 | # ================================================= 30 | print('tokenization...') 31 | tokenizer = PTBTokenizer() 32 | gts = tokenizer.tokenize(gts) 33 | res = tokenizer.tokenize(res) 34 | 35 | # ================================================= 36 | # Set up scorers 37 | # ================================================= 38 | print('setting up scorers...') 39 | scorers = [ 40 | (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), 41 | (Meteor(),"METEOR"), 42 | (Rouge(), "ROUGE_L"), 43 | (Cider(), "CIDEr"), 44 | (Spice(), "SPICE") 45 | ] 46 | 47 | # ================================================= 48 | # Compute scores 49 | # ================================================= 50 | for scorer, method in scorers: 51 | print('computing %s score...'%(scorer.method())) 52 | score, scores = scorer.compute_score(gts, res) 53 | if type(method) == list: 54 | for sc, scs, m in zip(score, scores, method): 55 | self.setEval(sc, m) 56 | self.setImgToEvalImgs(scs, gts.keys(), m) 57 | print("%s: %0.3f"%(m, sc)) 58 | else: 59 | self.setEval(score, method) 60 | self.setImgToEvalImgs(scores, gts.keys(), method) 61 | print("%s: %0.3f"%(method, score)) 62 | self.setEvalImgs() 63 | 64 | def setEval(self, score, method): 65 | self.eval[method] = score 66 | 67 | def setImgToEvalImgs(self, scores, imgIds, method): 68 | for imgId, score in zip(sorted(imgIds), scores): 69 | if not imgId in self.imgToEval: 70 | self.imgToEval[imgId] = {} 71 | self.imgToEval[imgId]["image_id"] = imgId 72 | self.imgToEval[imgId][method] = score 73 | 74 | def setEvalImgs(self): 75 | self.evalImgs = [self.imgToEval[imgId] for imgId in sorted(self.imgToEval.keys())] 76 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/meteor/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/meteor/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/__pycache__/meteor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/meteor/__pycache__/meteor.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/__pycache__/meteor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/meteor/__pycache__/meteor.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/meteor-1.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/meteor/meteor-1.5.jar -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/meteor/meteor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Python wrapper for METEOR implementation, by Xinlei Chen 4 | # Acknowledge Michael Denkowski for the generous discussion and help 5 | 6 | import os 7 | import sys 8 | import subprocess 9 | import threading 10 | 11 | # Assumes meteor-1.5.jar is in the same directory as meteor.py. Change as needed. 12 | METEOR_JAR = 'meteor-1.5.jar' 13 | # print METEOR_JAR 14 | 15 | class Meteor: 16 | 17 | def __init__(self): 18 | self.meteor_cmd = ['java', '-jar', '-Xmx2G', METEOR_JAR, \ 19 | '-', '-', '-stdio', '-l', 'en', '-norm'] 20 | self.meteor_p = subprocess.Popen(self.meteor_cmd, \ 21 | cwd=os.path.dirname(os.path.abspath(__file__)), \ 22 | stdin=subprocess.PIPE, \ 23 | stdout=subprocess.PIPE, \ 24 | stderr=subprocess.PIPE) 25 | # Used to guarantee thread safety 26 | self.lock = threading.Lock() 27 | 28 | def compute_score(self, gts, res): 29 | assert(sorted(gts.keys()) == sorted(res.keys())) 30 | imgIds = sorted(gts.keys()) 31 | scores = [] 32 | 33 | eval_line = 'EVAL' 34 | self.lock.acquire() 35 | for i in imgIds: 36 | assert(len(res[i]) == 1) 37 | stat = self._stat(res[i][0], gts[i]) 38 | eval_line += ' ||| {}'.format(stat) 39 | 40 | self.meteor_p.stdin.write('{}\n'.format(eval_line).encode()) 41 | self.meteor_p.stdin.flush() 42 | for i in range(0, len(imgIds)): 43 | scores.append(float(self.meteor_p.stdout.readline().decode().strip())) 44 | score = float(self.meteor_p.stdout.readline().decode().strip()) 45 | self.lock.release() 46 | 47 | return score, scores 48 | 49 | def method(self): 50 | return "METEOR" 51 | 52 | def _stat(self, hypothesis_str, reference_list): 53 | # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words 54 | hypothesis_str = hypothesis_str.replace('|||','').replace(' ',' ') 55 | score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str)) 56 | self.meteor_p.stdin.write('{}\n'.format(score_line).encode()) 57 | self.meteor_p.stdin.flush() 58 | return self.meteor_p.stdout.readline().decode().strip() 59 | 60 | def _score(self, hypothesis_str, reference_list): 61 | self.lock.acquire() 62 | # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words 63 | hypothesis_str = hypothesis_str.replace('|||','').replace(' ',' ') 64 | score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str)) 65 | self.meteor_p.stdin.write('{}\n'.format(score_line).encode()) 66 | self.meteor_p.stdin.flush() 67 | stats = self.meteor_p.stdout.readline().decode().strip() 68 | eval_line = 'EVAL ||| {}'.format(stats) 69 | # EVAL ||| stats 70 | self.meteor_p.stdin.write('{}\n'.format(eval_line).encode()) 71 | self.meteor_p.stdin.flush() 72 | score = float(self.meteor_p.stdout.readline().decode().strip()) 73 | # bug fix: there are two values returned by the jar file, one average, and one all, so do it twice 74 | # thanks for Andrej for pointing this out 75 | score = float(self.meteor_p.stdout.readline().strip()) 76 | self.lock.release() 77 | return score 78 | 79 | def __exit__(self): 80 | self.lock.acquire() 81 | self.meteor_p.stdin.close() 82 | self.meteor_p.kill() 83 | self.meteor_p.wait() 84 | self.lock.release() 85 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'vrama91' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/rouge/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/rouge/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/__pycache__/rouge.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/rouge/__pycache__/rouge.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/__pycache__/rouge.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/rouge/__pycache__/rouge.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/rouge/rouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : rouge.py 4 | # 5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 6 | # 7 | # Creation Date : 2015-01-07 06:03 8 | # Author : Ramakrishna Vedantam 9 | 10 | import numpy as np 11 | import pdb 12 | 13 | def my_lcs(string, sub): 14 | """ 15 | Calculates longest common subsequence for a pair of tokenized strings 16 | :param string : list of str : tokens from a string split using whitespace 17 | :param sub : list of str : shorter string, also split using whitespace 18 | :returns: length (list of int): length of the longest common subsequence between the two strings 19 | 20 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS 21 | """ 22 | if(len(string)< len(sub)): 23 | sub, string = string, sub 24 | 25 | lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)] 26 | 27 | for j in range(1,len(sub)+1): 28 | for i in range(1,len(string)+1): 29 | if(string[i-1] == sub[j-1]): 30 | lengths[i][j] = lengths[i-1][j-1] + 1 31 | else: 32 | lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1]) 33 | 34 | return lengths[len(string)][len(sub)] 35 | 36 | class Rouge(): 37 | ''' 38 | Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set 39 | 40 | ''' 41 | def __init__(self): 42 | # vrama91: updated the value below based on discussion with Hovey 43 | self.beta = 1.2 44 | 45 | def calc_score(self, candidate, refs): 46 | """ 47 | Compute ROUGE-L score given one candidate and references for an image 48 | :param candidate: str : candidate sentence to be evaluated 49 | :param refs: list of str : COCO reference sentences for the particular image to be evaluated 50 | :returns score: int (ROUGE-L score for the candidate evaluated against references) 51 | """ 52 | assert(len(candidate)==1) 53 | assert(len(refs)>0) 54 | prec = [] 55 | rec = [] 56 | 57 | # split into tokens 58 | token_c = candidate[0].split(" ") 59 | 60 | for reference in refs: 61 | # split into tokens 62 | token_r = reference.split(" ") 63 | # compute the longest common subsequence 64 | lcs = my_lcs(token_r, token_c) 65 | prec.append(lcs/float(len(token_c))) 66 | rec.append(lcs/float(len(token_r))) 67 | 68 | prec_max = max(prec) 69 | rec_max = max(rec) 70 | 71 | if(prec_max!=0 and rec_max !=0): 72 | score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max) 73 | else: 74 | score = 0.0 75 | return score 76 | 77 | def compute_score(self, gts, res): 78 | """ 79 | Computes Rouge-L score given a set of reference and candidate sentences for the dataset 80 | Invoked by evaluate_captions.py 81 | :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values 82 | :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values 83 | :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images) 84 | """ 85 | assert(sorted(gts.keys()) == sorted(res.keys())) 86 | imgIds = sorted(gts.keys()) 87 | 88 | score = [] 89 | for id in imgIds: 90 | hypo = res[id] 91 | ref = gts[id] 92 | 93 | score.append(self.calc_score(hypo, ref)) 94 | 95 | # Sanity check. 96 | assert(type(hypo) is list) 97 | assert(len(hypo) == 1) 98 | assert(type(ref) is list) 99 | assert(len(ref) >= 1) 100 | 101 | average_score = np.mean(np.array(score)) 102 | return average_score, np.array(score) 103 | 104 | def method(self): 105 | return "Rouge" 106 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'hfang' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/tokenizer/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-35.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/tokenizer/__pycache__/ptbtokenizer.cpython-36.pyc -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/ptbtokenizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : ptbtokenizer.py 4 | # 5 | # Description : Do the PTB Tokenization and remove punctuations. 6 | # 7 | # Creation Date : 29-12-2014 8 | # Last Modified : Thu Mar 19 09:53:35 2015 9 | # Authors : Hao Fang and Tsung-Yi Lin 10 | 11 | import os 12 | import sys 13 | import subprocess 14 | import tempfile 15 | import itertools 16 | 17 | # path to the stanford corenlp jar 18 | STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar' 19 | 20 | # punctuations to be removed from the sentences 21 | PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ 22 | ".", "?", "!", ",", ":", "-", "--", "...", ";"] 23 | 24 | class PTBTokenizer: 25 | """Python wrapper of Stanford PTBTokenizer""" 26 | 27 | def tokenize(self, captions_for_image): 28 | cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, \ 29 | 'edu.stanford.nlp.process.PTBTokenizer', \ 30 | '-preserveLines', '-lowerCase'] 31 | 32 | # ====================================================== 33 | # prepare data for PTB Tokenizer 34 | # ====================================================== 35 | final_tokenized_captions_for_image = {} 36 | image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))] 37 | sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v]) 38 | 39 | # ====================================================== 40 | # save sentences to temporary file 41 | # ====================================================== 42 | path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__)) 43 | tmp_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=path_to_jar_dirname) 44 | tmp_file.write(sentences) 45 | tmp_file.close() 46 | 47 | # ====================================================== 48 | # tokenize sentence 49 | # ====================================================== 50 | cmd.append(os.path.basename(tmp_file.name)) 51 | p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \ 52 | stdout=subprocess.PIPE) 53 | token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0] 54 | lines = token_lines.decode().split('\n') 55 | # remove temp file 56 | os.remove(tmp_file.name) 57 | 58 | # ====================================================== 59 | # create dictionary for tokenized captions 60 | # ====================================================== 61 | for k, line in zip(image_id, lines): 62 | if not k in final_tokenized_captions_for_image: 63 | final_tokenized_captions_for_image[k] = [] 64 | tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') \ 65 | if w not in PUNCTUATIONS]) 66 | final_tokenized_captions_for_image[k].append(tokenized_caption) 67 | 68 | return final_tokenized_captions_for_image 69 | -------------------------------------------------------------------------------- /cocoeval/pycocoevalcap/tokenizer/stanford-corenlp-3.4.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/cocoeval/pycocoevalcap/tokenizer/stanford-corenlp-3.4.1.jar -------------------------------------------------------------------------------- /cocoeval/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoeval/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | name: pytorch12 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - absl-py=0.11.0=pyhd3eb1b0_1 10 | - aiohttp=3.7.4=py37h27cfd23_1 11 | - asn1crypto=0.24.0=py37_0 12 | - async-timeout=3.0.1=py37h06a4308_0 13 | - asyncssh=1.18.0=py_0 14 | - attrs=19.2.0=py_0 15 | - backcall=0.1.0=py_0 16 | - bleach=3.1.0=py_0 17 | - blinker=1.4=py37h06a4308_0 18 | - bzip2=1.0.8=h7b6447c_0 19 | - c-ares=1.17.1=h27cfd23_0 20 | - ca-certificates=2020.10.14=0 21 | - cachetools=4.2.1=pyhd3eb1b0_0 22 | - catalogue=1.0.0=py37_1 23 | - certifi=2020.6.20=py37_0 24 | - cffi=1.12.3=py37h2e261b9_0 25 | - chardet=3.0.4=py37_1 26 | - click=7.1.2=pyhd3eb1b0_0 27 | - cloudpickle=1.2.2=py_0 28 | - coloredlogs=10.0=py37_1000 29 | - conda-package-handling=1.3.11=py37_0 30 | - coverage=5.5=py37h27cfd23_2 31 | - cryptography=2.7=py37h1ba5d50_0 32 | - cudatoolkit=9.2=0 33 | - cycler=0.10.0=py_1 34 | - cymem=2.0.4=py37h2531618_0 35 | - cython=0.29.14=py37he1b5a44_0 36 | - cython-blis=0.4.1=py37h7b6447c_1 37 | - cytoolz=0.10.1=py37h516909a_0 38 | - dask-core=2.7.0=py_0 39 | - dbus=1.13.6=he372182_0 40 | - decorator=4.4.0=py_0 41 | - defusedxml=0.6.0=py_0 42 | - entrypoints=0.3=py37_1000 43 | - expat=2.2.5=he1b5a44_1003 44 | - fontconfig=2.13.1=he4413a7_1000 45 | - freetype=2.10.0=he983fc9_1 46 | - gettext=0.19.8.1=hc5be6a0_1002 47 | - glib=2.58.3=h6f030ca_1002 48 | - google-auth=1.27.1=pyhd3eb1b0_0 49 | - google-auth-oauthlib=0.4.1=py_2 50 | - grpcio=1.36.1=py37h2157cd5_1 51 | - gst-plugins-base=1.14.5=h0935bb2_0 52 | - gstreamer=1.14.5=h36ae1b5_0 53 | - humanfriendly=4.18=py37_0 54 | - icu=58.2=hf484d3e_1000 55 | - idna=2.8=py37_0 56 | - imageio=2.6.1=py37_0 57 | - importlib-metadata=2.0.0=py_1 58 | - importlib_metadata=0.23=py37_0 59 | - intel-openmp=2019.4=243 60 | - ipykernel=5.1.2=py37h5ca1d4c_0 61 | - ipython=7.8.0=py37h5ca1d4c_0 62 | - ipython_genutils=0.2.0=py_1 63 | - ipywidgets=7.5.1=py_0 64 | - jedi=0.15.1=py37_0 65 | - jinja2=2.10.3=py_0 66 | - joblib=0.13.2=py_0 67 | - jpeg=9c=h14c3975_1001 68 | - jsonschema=3.1.1=py37_0 69 | - jupyter=1.0.0=py37_7 70 | - jupyter_client=5.3.3=py37_1 71 | - jupyter_console=6.0.0=py_0 72 | - jupyter_core=4.5.0=py_0 73 | - kiwisolver=1.1.0=py37hc9558a2_0 74 | - libarchive=3.3.3=h5d8350f_5 75 | - libblas=3.8.0=12_openblas 76 | - libcblas=3.8.0=12_openblas 77 | - libedit=3.1.20181209=hc058e9b_0 78 | - libffi=3.2.1=hd88cf55_4 79 | - libgcc-ng=9.1.0=hdf63c60_0 80 | - libgfortran-ng=7.3.0=hdf63c60_0 81 | - libiconv=1.15=h516909a_1005 82 | - liblapack=3.8.0=12_openblas 83 | - libopenblas=0.3.7=h6e990d7_1 84 | - libpng=1.6.37=hed695b0_0 85 | - libprotobuf=3.10.1=h8b12597_0 86 | - libsodium=1.0.17=h516909a_0 87 | - libstdcxx-ng=9.1.0=hdf63c60_0 88 | - libtiff=4.0.10=h2733197_2 89 | - libuuid=2.32.1=h14c3975_1000 90 | - libxcb=1.13=h14c3975_1002 91 | - libxml2=2.9.9=hea5a465_1 92 | - lz4-c=1.8.1.2=h14c3975_0 93 | - lzo=2.10=h49e0be7_2 94 | - markdown=3.3.4=py37h06a4308_0 95 | - markupsafe=1.1.1=py37h14c3975_0 96 | - matplotlib=3.1.1=py37h5429711_0 97 | - mistune=0.8.4=py37h14c3975_1000 98 | - mkl=2019.4=243 99 | - more-itertools=7.2.0=py_0 100 | - multidict=5.1.0=py37h27cfd23_2 101 | - murmurhash=1.0.2=py37he6710b0_0 102 | - nbconvert=5.6.0=py37_1 103 | - nbformat=4.4.0=py_1 104 | - ncurses=6.1=he6710b0_1 105 | - networkx=2.4=py_0 106 | - ninja=1.9.0=h6bb024c_0 107 | - nltk=3.4.4=py_0 108 | - notebook=6.0.1=py37_0 109 | - numpy=1.17.2=py37h95a1406_0 110 | - oauthlib=3.1.0=py_0 111 | - olefile=0.46=py_0 112 | - openssl=1.1.1j=h27cfd23_0 113 | - pandas=0.25.1=py37hb3f55d8_0 114 | - pandoc=2.7.3=0 115 | - pandocfilters=1.4.2=py_1 116 | - parso=0.5.1=py_0 117 | - patsy=0.5.1=py_0 118 | - pcre=8.41=hf484d3e_1003 119 | - pexpect=4.7.0=py37_0 120 | - pickleshare=0.7.5=py37_1000 121 | - pillow=6.1.0=py37h34e0f95_0 122 | - pip=21.0.1=pyhd8ed1ab_0 123 | - plac=0.9.6=py37_1 124 | - preshed=3.0.2=py37he6710b0_1 125 | - prometheus_client=0.7.1=py_0 126 | - prompt_toolkit=2.0.9=py_0 127 | - protobuf=3.10.1=py37he6710b0_0 128 | - pthread-stubs=0.4=h14c3975_1001 129 | - ptyprocess=0.6.0=py_1001 130 | - pyasn1=0.4.8=py_0 131 | - pyasn1-modules=0.2.8=py_0 132 | - pycocotools=2.0.0=py37h516909a_1000 133 | - pycosat=0.6.3=py37h14c3975_0 134 | - pycparser=2.19=py37_0 135 | - pygments=2.4.2=py_0 136 | - pyjwt=2.0.1=py37h06a4308_0 137 | - pyopenssl=19.0.0=py37_0 138 | - pyparsing=2.4.2=py_0 139 | - pyqt=5.9.2=py37hcca6a23_4 140 | - pyrsistent=0.15.4=py37h516909a_0 141 | - pysocks=1.7.0=py37_0 142 | - python=3.7.3=h0371630_0 143 | - python-dateutil=2.8.0=py_0 144 | - python-libarchive-c=2.8=py37_11 145 | - python_abi=3.7=1_cp37m 146 | - pytorch=1.2.0=py3.7_cuda9.2.148_cudnn7.6.2_0 147 | - pytz=2019.2=py_0 148 | - pywavelets=1.1.1=py37hc1659b7_0 149 | - pyzmq=18.1.0=py37h1768529_0 150 | - qt=5.9.7=h52cfd70_2 151 | - qtconsole=4.5.5=py_0 152 | - readline=7.0=h7b6447c_5 153 | - requests=2.22.0=py37_0 154 | - requests-oauthlib=1.3.0=py_0 155 | - rsa=4.7.2=pyhd3eb1b0_1 156 | - ruamel_yaml=0.15.46=py37h14c3975_0 157 | - scikit-image=0.15.0=py37he6710b0_0 158 | - scikit-learn=0.23.2=py37h0573a6f_0 159 | - scipy=1.3.1=py37h921218d_2 160 | - seaborn=0.9.0=py_0 161 | - send2trash=1.5.0=py_0 162 | - setuptools=41.0.1=py37_0 163 | - sip=4.19.8=py37hf484d3e_1000 164 | - six=1.12.0=py37_1000 165 | - spacy=2.3.2=py37h99015e2_0 166 | - sqlite=3.29.0=h7b6447c_0 167 | - srsly=1.0.2=py37he6710b0_0 168 | - statsmodels=0.10.1=py37hc1659b7_0 169 | - tensorboard=2.4.1=pyhd8ed1ab_0 170 | - tensorboard-plugin-wit=1.6.0=py_0 171 | - tensorboardx=1.9=py_0 172 | - terminado=0.8.2=py37_0 173 | - testpath=0.4.2=py_1001 174 | - thinc=7.4.1=py37hfd86e86_0 175 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 176 | - tk=8.6.8=hbc83047_0 177 | - toolz=0.10.0=py_0 178 | - torchtext=0.6.0=py_1 179 | - tornado=6.0.3=py37h516909a_0 180 | - tqdm=4.32.1=py_0 181 | - traitlets=4.3.2=py37_1000 182 | - typing-extensions=3.7.4.3=hd3eb1b0_0 183 | - typing_extensions=3.7.4.3=pyh06a4308_0 184 | - urllib3=1.24.2=py37_0 185 | - wasabi=0.8.0=py_0 186 | - wcwidth=0.1.7=py_1 187 | - webencodings=0.5.1=py_1 188 | - werkzeug=1.0.1=pyhd3eb1b0_0 189 | - wheel=0.33.4=py37_0 190 | - widgetsnbextension=3.5.1=py37_0 191 | - xorg-libxau=1.0.9=h14c3975_0 192 | - xorg-libxdmcp=1.1.3=h516909a_0 193 | - xz=5.2.4=h14c3975_4 194 | - yaml=0.1.7=had09818_2 195 | - yarl=1.6.3=py37h27cfd23_0 196 | - zeromq=4.3.2=he1b5a44_2 197 | - zipp=0.6.0=py_0 198 | - zlib=1.2.11=h7b6447c_3 199 | - zstd=1.3.7=h0b5b093_0 200 | - pip: 201 | - tensorboard-logger==0.1.0 202 | - torchvision==0.4.0a0+9232c4a 203 | prefix: /home/amafla/miniconda3/envs/pytorch12 204 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from vocab import Vocabulary 3 | import evaluation 4 | from model_CVSE import CVSE 5 | 6 | 7 | '''1) Evaluate COCO''' 8 | parser = argparse.ArgumentParser(description='WILDCAT evaluate') 9 | parser.add_argument('--data_path', default='/data2fast/users/amafla/Precomp_features/data/', help='path to dataset ') 10 | parser.add_argument('--data_name', default='coco', help='{coco,f30k}_precomp') 11 | parser.add_argument('--model_path', default='./runs/coco/CVSE_scratch_mini_5/model_best.pth.tar',help='Path to load the model.') 12 | #parser.add_argument('--model_path', default='.runs/coco/CVSE_scratch/model_best.pth.tar',help='Path to load the model.') 13 | parser.add_argument('--vocab_path', default='./vocab/', help='Path to saved vocabulary json files.') 14 | parser.add_argument('--data_name_vocab', default='coco_precomp', help='{coco,f30k}_precomp') 15 | parser.add_argument('--transfer_test', action='store_true', help='Whether to perform cross-dataset testing.') 16 | parser.add_argument('--split', default='test', help='Choose to evaluate on coco 1k test set or 5k test set. (test | testall)') # 1k test set 17 | # parser.add_argument('--split', default='testall', help='Choose to evaluate on 1k test set or 5k test set. (test | testall)') # 5k test set 18 | parser.add_argument('--concept_path', default='./data/coco_annotations/Concept_annotations/', 19 | help='path to load the concept data') 20 | 21 | '''2) Evaluate f30k''' 22 | # parser = argparse.ArgumentParser(description='WILDCAT evaluate') 23 | # parser.add_argument('--data_path', default='../Bottom-up-atten-feature/data/', help='path to dataset ') 24 | # parser.add_argument('--data_name', default='f30k_precomp', help='{coco,f30k}_precomp') 25 | # parser.add_argument('--model_path', default='./runs/f30k/CVSE_f30k/model_best.pth.tar',help='Path to load the model.') 26 | # parser.add_argument('--vocab_path', default='./vocab/', help='Path to saved vocabulary json files.') 27 | # parser.add_argument('--data_name_vocab', default='f30k_precomp', help='{coco,f30k}_precomp') 28 | # parser.add_argument('--transfer_test', action='store_true', help='Whether to perform cross-dataset testing.') 29 | # parser.add_argument('--split', default='test', help='Evaluate on f30k 1k test set. ') 30 | # parser.add_argument('--concept_path', default='data/coco_to_f30k_annotations/Concept_annotations/', 31 | # help='path to load the concept data') 32 | 33 | '''3) Evaluate coco-to-f30k transfer''' 34 | # parser = argparse.ArgumentParser(description='WILDCAT evaluate') 35 | # parser.add_argument('--data_path', default='../Bottom-up-atten-feature/data/', help='path to dataset ') 36 | # parser.add_argument('--data_name', default='f30k_precomp', help='{coco,f30k}_precomp') 37 | # parser.add_argument('--model_path', default='./runs/coco/CVSE_COCO/model_best.pth.tar',help='Path to load the model.') 38 | # parser.add_argument('--vocab_path', default='./vocab/', help='Path to saved vocabulary json files.') 39 | # parser.add_argument('--data_name_vocab', default='coco_precomp', help='{coco,f30k}_precomp') 40 | # parser.add_argument('--transfer_test', action='store_false', help='Whether to perform cross-dataset testing.') 41 | # parser.add_argument('--concept_path', default='data/coco_to_f30k_annotations/Concept_annotations/', 42 | # help='path to load the concept data') 43 | 44 | 45 | 46 | def main_test(): 47 | global args 48 | args = parser.parse_args() 49 | 50 | if args.transfer_test != True: 51 | evaluation.evalrank(model_path=args.model_path, data_path=args.data_path, data_name=args.data_name, 52 | data_name_vocab=args.data_name_vocab, split=args.split, VSE_model=CVSE) 53 | else: 54 | evaluation.evalrank(model_path=args.model_path, data_path=args.data_path, data_name=args.data_name, 55 | data_name_vocab=args.data_name_vocab, split="test", VSE_model=CVSE, 56 | concept_path=args.concept_path, transfer_test=True) 57 | 58 | 59 | if __name__ == '__main__': 60 | main_test() 61 | -------------------------------------------------------------------------------- /figures/framework_CVSE.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/figures/framework_CVSE.jpg -------------------------------------------------------------------------------- /metrics/README.md: -------------------------------------------------------------------------------- 1 | # Insert Paper Title 2 | 3 | ## Necessary Files (To those who just wanna run shit!) 4 | Link1, Link2, Link3 5 | 6 | 7 | ## How did you do it? (To those who are curious!) 8 | ### SPICE 9 | 10 | If you would like to compile from scratch or would like to see the changes we made to SPICE, please check the 11 | [submodule](https://github.com/furkanbiten/SPICE/tree/a8f69f1478faea2d61d94f759ebc2ca112be3111)! 12 | 13 | However, if you are pragmatist and just wanna use the code (I feel you!), here is the link to download 14 | [SPICE.zip](https://drive.google.com/file/d/1U9M-Z44fluvIovdR4DFNd3YyyjNfL702/view?usp=sharing). 15 | 16 | After downloading, unzip the file and run `python get_stanford models` and then run `java -Xmx8G -jar spice-1.0.jar ./example.json` to see if it works. 17 | This should result in a file called spice_pairwise.csv. 18 | 19 | CHECK IMG_IX = 80, 256 for qualitative example 20 | 21 | Negative examples = 54 22 | -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /metrics/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/metrics/__init__.pyc -------------------------------------------------------------------------------- /metrics/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/metrics/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /metrics/__pycache__/metric.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/metrics/__pycache__/metric.cpython-37.pyc -------------------------------------------------------------------------------- /metrics/eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from metric import Metric 8 | 9 | if __name__ == "__main__": 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--dataset_path', type=str, default='/data2fast/users/amafla/Precomp_features/data/', help='ground truth data path') 12 | 13 | parser.add_argument('--metric_path', type=str, default='./out', help='the path that has metrics and model output') 14 | 15 | parser.add_argument('--dataset', type=str, default='coco', help='which dataset to use, options are: coco, f30k') 16 | parser.add_argument('--split', type=str, default='test', 17 | help='Choose to evaluate on coco 1k test set or 5k test set. (test | testall)') 18 | 19 | parser.add_argument('--metric_name', type=str, default='spice', 20 | help='which image captioning metric to use, options are: cider, spice') 21 | 22 | parser.add_argument('--recall_type', type=str, default='recall', help='Options are recall and vse_recall') 23 | 24 | parser.add_argument('--score', default=['hard', 'soft', 'softer'], nargs="+", 25 | help='which scoring method to use, options are: hard, soft, softer') 26 | 27 | parser.add_argument('--model_name', type=str, default='CVSE_cider', 28 | help='which model to use, options are: VSEPP, SCAN, VSRN, CVSE') 29 | 30 | parser.add_argument('--threshold', type=int, default=1, 31 | help='Threshold of number of relevant samples to compute metrics, options are: 1,2,3') 32 | parser.add_argument('--recall_thresholds', default=[1, 5, 10, 20, 30], nargs="+", help='K values in Recall_at_K') 33 | parser.add_argument('--include_anns', action='store_true', 34 | help='Include human annotations to define relevant items') 35 | 36 | args = parser.parse_args() 37 | 38 | if args.metric_name == 'spice': 39 | metric = pd.read_csv(os.path.join(args.metric_path, args.dataset + '_' + args.metric_name + '.csv'), sep=',', 40 | header=None) 41 | metric = metric.to_numpy() 42 | if args.dataset == 'coco': metric = metric[:, :5000] 43 | if args.dataset == 'f30k': metric = metric[:, :1000] 44 | 45 | elif args.metric_name == 'cider': 46 | metric = np.load(os.path.join(args.metric_path, args.dataset + '_cider.npy')) 47 | 48 | if args.split == 'testall' and args.dataset == 'coco': 49 | metric = metric[:, :5000] 50 | elif args.split == 'test' and args.dataset == 'coco': 51 | metric = metric[:, :1000] 52 | 53 | filename = os.path.join(args.metric_path, 'sims_' + args.model_name + '_' + args.dataset + '_precomp.json') 54 | sims = json.load(open(filename, 'r')) 55 | 56 | if len(sims) == 1000 and args.dataset == 'coco' and args.split == 'testall': 57 | raise ValueError('You cant have coco 1k and testall option together') 58 | if len(sims) == 5000 and args.dataset == 'coco' and args.split == 'test': 59 | raise ValueError('You cant have coco 5k and test option together') 60 | 61 | M = Metric(metric, sims, recall_type=args.recall_type, score=args.score, metric_name=args.metric_name, 62 | recall_thresholds=args.recall_thresholds, threshold=args.threshold, dataset=args.dataset, 63 | include_anns=args.include_anns, model_name=args.model_name) 64 | 65 | print("\n ... LOADING DATA ...\n") 66 | scores = M.compute_metrics() 67 | -------------------------------------------------------------------------------- /metrics/eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Print array values in lines 4 | echo "RUNNING EVALUATION METRICS FOR CMR" 5 | 6 | for DATASET in coco f30k; 7 | do 8 | for METRIC in spice; 9 | do 10 | for RECALL in vse_recall; 11 | do 12 | for MODEL in VSEPP SCAN VSRN CVSE; 13 | do 14 | for THRESH in 1 2 3; 15 | do 16 | python metric.py --dataset $DATASET --metric $METRIC --recall_type $RECALL --model_name $MODEL --threshold $THRESH 17 | done 18 | done 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /metrics/metric.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/metrics/metric.pyc -------------------------------------------------------------------------------- /metrics/preprocess.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--root', default='./data') 9 | parser.add_argument('--out', default='./data') 10 | args = parser.parse_args() 11 | 12 | datasets = ['f30k', 'coco'] 13 | splits = ['dev', 'test'] 14 | 15 | for dataset in datasets: 16 | for split in splits: 17 | 18 | # Get the captions 19 | caps = [] 20 | PATH = os.path.join(args.root, dataset, split+'_caps.txt') 21 | with open(PATH, 'r') as f: 22 | for i in f.readlines(): 23 | caps.append(i.split('\n')[0]) 24 | 25 | if split=='dev' and dataset == 'f30k': 26 | caps = caps[:5000] 27 | 28 | # Get the ids 29 | ids = [] 30 | PATH = os.path.join(args.root, dataset, split+'_ids.txt') 31 | with open(PATH, 'r') as f: 32 | for i in f.readlines(): 33 | ids.append(i.split('\n')[0]) 34 | 35 | caps_json = [] 36 | for ix, cap in enumerate(zip(*[iter(caps)] * 5)): 37 | caps_json.append({'image_id': ids[ix*5], 'refs': list(cap), 'test': ''}) 38 | 39 | OUT_PATH = os.path.join(args.out, dataset+'_'+split+'.json') 40 | json.dump(caps_json, open(OUT_PATH, 'w')) -------------------------------------------------------------------------------- /paper_images/ITM_fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/paper_images/ITM_fig1.png -------------------------------------------------------------------------------- /paper_images/ITM_fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/paper_images/ITM_fig2.png -------------------------------------------------------------------------------- /run_all.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import argparse 3 | import os 4 | 5 | flatten = lambda t: [item for sublist in t for item in sublist] 6 | softer = lambda out_metric: [[eval(i.split(': ')[1]) for i in e.split(',')[1:4]] for e in out_metric.split('\n') if e.startswith('Softer score')] 7 | harder = lambda out_metric: [[eval(i.split(': ')[1]) for i in e.split(',')[1:4]] for e in out_metric.split('\n') if e.startswith('Hard score with Recall')] 8 | print_func = lambda x: ' '.join('{:.2f}'.format(i) for i in flatten(x)) 9 | 10 | def get_metrics(args, sums, metric_name = 'cider'): 11 | metric_env_python = os.path.join(args.metric_env, 'bin/python') 12 | cmd = subprocess.Popen( metric_env_python + ' metrics/eval.py --metric_name '+ metric_name 13 | +' --dataset '+ args.dataset 14 | +' --recall_type recall --model_name '+ args.model_name 15 | +' --split ' + args.split, 16 | shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 17 | 18 | out_metric, err_metric = cmd.communicate() 19 | out_metric = out_metric.decode('utf-8') 20 | m_results_softer = softer(out_metric) 21 | # m_results_hard = harder(out_metric) 22 | 23 | cmd = subprocess.Popen(metric_env_python + ' metrics/eval.py --metric_name ' + metric_name 24 | +' --dataset '+ args.dataset 25 | +' --recall_type recall --include_anns --model_name ' + args.model_name 26 | +' --split ' + args.split, 27 | shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 28 | out_metric, err_metric = cmd.communicate() 29 | out_metric = out_metric.decode('utf-8') 30 | 31 | m_results_softer_with_anns = softer(out_metric) 32 | m_results_hard_with_anns = harder(out_metric) 33 | 34 | print(metric_name.capitalize()+' -NON GT Softer:') 35 | print(print_func(m_results_softer)) 36 | print(metric_name.capitalize()+' -GT Softer:') 37 | print(print_func(m_results_softer_with_anns)) 38 | 39 | if metric_name=='cider': 40 | sums[5] = sum(flatten(m_results_hard_with_anns)[:3]) 41 | sums[0] = sum(flatten(m_results_softer_with_anns)) 42 | sums[1] = sum(flatten(m_results_softer)) 43 | elif metric_name == 'spice': 44 | sums[2] = sum(flatten(m_results_softer_with_anns)) 45 | sums[3] = sum(flatten(m_results_softer)) 46 | 47 | if __name__ == '__main__': 48 | parser = argparse.ArgumentParser() 49 | #parser.add_argument('--model', default='/home/amafla/CVSE/runs/coco/CVSE_scratch_mini_10/model_best.pth.tar') 50 | parser.add_argument('--model', default='/data1slow/users/amafla/Adaptive_Margin/CVSE/runs/coco/CVSE_Cider5_RS_div6_scratch_tuning_mini_3/model_best.pth.tar') 51 | parser.add_argument('--model_env', default='/home/amafla/miniconda3/envs/pytorch12') 52 | parser.add_argument('--metric_env', default='/home/amafla/miniconda3/envs/pytorch12') 53 | parser.add_argument('--dataset', default='coco') 54 | #parser.add_argument('--model_name', default='sims_CVSE_cider_f30k_precomp.json') 55 | parser.add_argument('--model_name', default='CVSE_cider') 56 | parser.add_argument('--data_path', default='/data2fast/users/amafla/Precomp_features/data/') 57 | parser.add_argument('--vocab_path', default='/data1slow/users/amafla/Adaptive_Margin/CVSE/vocab/') 58 | parser.add_argument('--split', default='test', help='Choose to evaluate on coco 1k test set or 5k test set. (test | testall)') 59 | parser.add_argument('--transfer_test', action='store_true', help='Cross Eval Coco to Flickr30k') 60 | 61 | args = parser.parse_args() 62 | if args.dataset == 'f30k': data_name = 'f30k_precomp' 63 | elif args.dataset == 'coco': data_name = 'coco_precomp' 64 | 65 | sums = [0]*6 66 | if args.transfer_test: 67 | string_cmd = args.model_env+'/bin/python evaluate.py --model_path ' + args.model + ' --data_name f30k_precomp' + ' --data_name_vocab ' + data_name + ' --data_path ' + args.data_path + ' --vocab_path ' + args.vocab_path + ' --split ' + args.split + ' --transfer_test' 68 | 69 | cmd = subprocess.Popen(string_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 70 | args.dataset = 'f30k' 71 | else: 72 | string_cmd = args.model_env+'/bin/python evaluate.py --model_path ' + args.model + ' --data_name ' + data_name + ' --data_name_vocab ' + data_name + ' --data_path ' + args.data_path + ' --vocab_path ' + args.vocab_path + ' --split ' + args.split 73 | 74 | cmd = subprocess.Popen(string_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 75 | 76 | print ('Command: ', string_cmd) 77 | out, err = cmd.communicate() 78 | #import pdb; pdb.set_trace() 79 | 80 | out = out.decode('utf-8') 81 | vse_recall = [i.split(':')[1].split(' ')[1:4] for i in out.split('\n') if i.startswith('Image to') or i.startswith('Text to')] 82 | vse_recall = [[eval(e) for e in elm] for elm in vse_recall] 83 | 84 | sums[4] = sum(flatten(vse_recall)) 85 | print('Finished with model, calculating metrics') 86 | print('VSE RECALL:') 87 | print(print_func(vse_recall)) 88 | get_metrics(args, sums, 'spice') 89 | get_metrics(args, sums, 'cider') 90 | print('Sums: Cider-GT, Cider-NonGT, Spice-GT, Spice-NonGt, VSE_recall, Recall') 91 | print(' '.join('{:.2f}'.format(i) for i in sums)) 92 | -------------------------------------------------------------------------------- /util/C_GCN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn import Parameter 5 | 6 | from util.util_C_GCN import * 7 | 8 | 9 | def l2norm(X, dim=-1, eps=1e-12): 10 | """L2-normalize columns of X 11 | """ 12 | norm = torch.pow(X, 2).sum(dim=dim, keepdim=True).sqrt() + eps 13 | X = torch.div(X, norm) 14 | return X 15 | 16 | 17 | class GraphConvolution(nn.Module): 18 | """ 19 | Simple GCN layer, which shared the weight between two separate graphs 20 | """ 21 | def __init__(self, in_features, out_features, bias=False): 22 | super(GraphConvolution, self).__init__() 23 | self.in_features = in_features 24 | self.out_features = out_features 25 | self.weight = Parameter(torch.Tensor(in_features, out_features)) 26 | if bias: 27 | self.bias = Parameter(torch.Tensor(1, 1, out_features)) 28 | else: 29 | self.register_parameter('bias', None) 30 | self.reset_parameters() 31 | 32 | def reset_parameters(self): 33 | stdv = 1. / math.sqrt(self.weight.size(1)) 34 | self.weight.data.uniform_(-stdv, stdv) 35 | if self.bias is not None: 36 | self.bias.data.uniform_(-stdv, stdv) 37 | 38 | def forward(self, input, adj, conv_mode='whole_graph'): 39 | ''' 40 | Graph Conv function 41 | :param input: input signal 42 | :param adj: adj graph dict [OPC, OMC, all] 43 | :param conv_mode: choose which graph to make convolution (separate graphs or whole graph) 44 | ''' 45 | 46 | if conv_mode=='dual_graph': 47 | support = torch.matmul(input, self.weight) 48 | 49 | output_1 = torch.matmul(adj['adj_O_P'], support) 50 | output_2 = torch.matmul(adj['adj_O_M'], support) 51 | output = (output_1 + output_2) / 2 52 | 53 | if self.bias is not None: 54 | return output + self.bias 55 | else: 56 | return output 57 | 58 | elif conv_mode=='whole_graph': 59 | support = torch.matmul(input, self.weight) 60 | output = torch.matmul(adj['adj_all'], support) 61 | if self.bias is not None: 62 | return output + self.bias 63 | else: 64 | return output 65 | 66 | def __repr__(self): 67 | return self.__class__.__name__ + ' (' \ 68 | + str(self.in_features) + ' -> ' \ 69 | + str(self.out_features) + ')' 70 | 71 | 72 | 73 | class C_GCN(nn.Module): 74 | 75 | def __init__(self, num_classes, in_channel=300, t=0, adj_file=None, norm_func='sigmoid', adj_gen_mode='C_GCN', opt=None): 76 | super(C_GCN, self).__init__() 77 | 78 | self.num_classes = num_classes 79 | self.gc1 = GraphConvolution(in_channel, opt.embed_size // 2) 80 | self.gc2 = GraphConvolution(opt.embed_size // 2, opt.embed_size) 81 | self.relu = nn.LeakyReLU(0.2) 82 | 83 | # concept correlation mat generation 84 | _adj = gen_A_concept(num_classes, t, adj_file, gen_mode=adj_gen_mode) 85 | 86 | self.adj_O_P = Parameter(torch.from_numpy(_adj['adj_O_P']).float()) 87 | self.adj_O_M = Parameter(torch.from_numpy(_adj['adj_O_M']).float()) 88 | self.adj_all = Parameter(torch.from_numpy(_adj['adj_all']).float()) 89 | 90 | self.norm_func = norm_func 91 | self.softmax = nn.Softmax(dim=1) 92 | self.joint_att_emb = nn.Linear(opt.embed_size, opt.embed_size) 93 | self.embed_size = opt.embed_size 94 | self.init_weights() 95 | 96 | def init_weights(self): 97 | """Xavier initialization""" 98 | r = np.sqrt(6.) / np.sqrt(self.embed_size + self.embed_size) 99 | self.joint_att_emb.weight.data.uniform_(-r, r) 100 | self.joint_att_emb.bias.data.fill_(0) 101 | 102 | 103 | def forward(self, feature, inp, conv_mode='whole_graph'): 104 | 105 | inp = inp[0] 106 | 107 | adj_O_P = gen_adj(self.adj_O_P ).detach() 108 | adj_O_M = gen_adj(self.adj_O_M ).detach() 109 | adj_all = gen_adj(self.adj_all).detach() 110 | 111 | adj = {} 112 | adj['adj_O_P'] = adj_O_P 113 | adj['adj_O_M'] = adj_O_M 114 | adj['adj_all'] = adj_all 115 | 116 | x = self.gc1(inp, adj, conv_mode=conv_mode) 117 | x = self.relu(x) 118 | x = self.gc2(x, adj, conv_mode=conv_mode) 119 | 120 | concept_feature = x 121 | concept_feature = l2norm(concept_feature) 122 | 123 | return concept_feature 124 | 125 | 126 | def get_config_optim(self, lr, lrp): 127 | return [ 128 | {'params': self.gc1.parameters(), 'lr': lr}, 129 | {'params': self.gc2.parameters(), 'lr': lr}, 130 | ] 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /util/__pycache__/C_GCN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/C_GCN.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/C_GCN.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/C_GCN.cpython-37.pyc -------------------------------------------------------------------------------- /util/__pycache__/util_C_GCN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/util_C_GCN.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/util_C_GCN.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/util_C_GCN.cpython-37.pyc -------------------------------------------------------------------------------- /util/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/util/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /util/util_C_GCN.py: -------------------------------------------------------------------------------- 1 | import math 2 | from urllib.request import urlretrieve 3 | import torch 4 | from PIL import Image 5 | from tqdm import tqdm 6 | import numpy as np 7 | import random 8 | import torch.nn.functional as F 9 | 10 | 11 | '''gen_A: co-occur matrix generation''' 12 | def gen_A(num_classes, t, adj_file): 13 | import pickle 14 | result = pickle.load(open(adj_file, 'rb')) 15 | 16 | _adj = result['adj'] # (ndarray) (300, 300), count the co-accur numbers for each word in vocab 17 | _nums = result['nums'] # (ndarray) (300), count the total emerging numbers for each word in vocab 18 | 19 | # turn mat to binary according to threshold t (default t=0.4) 20 | _adj[_adj < t] = 0 21 | _adj[_adj >= t] = 1 22 | 23 | _adj = _adj * 0.25 / (_adj.sum(0, keepdims=True) + 1e-6) 24 | _adj = _adj + np.identity(num_classes, np.int) # identity square matrix 25 | return _adj 26 | 27 | 28 | ''' define concept adj_matrix''' 29 | def gen_A_concept(num_classes, t, adj_file, gen_mode='ML_GCN'): 30 | import pickle 31 | result = pickle.load(open(adj_file, 'rb')) 32 | 33 | _nums = result['nums'] 34 | _nums = _nums[:, np.newaxis] 35 | 36 | # smooth normalized adj matrix: _A_adj 37 | _A_adj = {} 38 | 39 | for key, value in result.items(): 40 | if key == 'adj_O_P': 41 | _adj_OPC = result['adj_O_P'] 42 | _adj_OPC = _adj_OPC / _nums 43 | # only eq.(3) in paper 44 | if gen_mode == 'ML_GCN': 45 | '''ML_GCN method''' 46 | _adj_OPC[_adj_OPC < t] = 0 47 | _adj_OPC[_adj_OPC >= t] = 1 48 | _adj_OPC = _adj_OPC * 0.25 / (_adj_OPC.sum(0, keepdims=True) + 1e-6) 49 | # only eq.(2) in paper 50 | elif gen_mode == 'My_rescale': 51 | '''Use My rescale function''' 52 | _adj_OPC = rescale_adj_matrix(_adj_OPC) # rescale function eq.(2) 53 | # combine eq.(2) and (3) in paper 54 | elif gen_mode == 'Complex': 55 | _adj_OPC = rescale_adj_matrix(_adj_OPC) # rescale function eq.(2) 56 | _adj_OPC[_adj_OPC < t] = 0 57 | _adj_OPC[_adj_OPC >= t] = 1 58 | _adj_OPC = _adj_OPC * 0.25 / (_adj_OPC.sum(0, keepdims=True) + 1e-6) # 59 | 60 | _adj_OPC = _adj_OPC / (_adj_OPC.sum(0, keepdims=True) + 1e-8) 61 | _adj_OPC = _adj_OPC + np.identity(num_classes, np.int) # identity square matrix 62 | _A_adj['adj_O_P'] = _adj_OPC 63 | 64 | elif key == 'adj_O_M': 65 | _adj_OMC = result['adj_O_M'] 66 | _adj_OMC = _adj_OMC / _nums 67 | # only eq.(3) in paper 68 | if gen_mode == 'ML_GCN': 69 | '''ML_GCN method''' 70 | _adj_OMC[_adj_OMC < t] = 0 71 | _adj_OMC[_adj_OMC >= t] = 1 72 | _adj_OPC = _adj_OMC * 0.25 / (_adj_OMC.sum(0, keepdims=True) + 1e-6) # 73 | # only eq.(2) in paper 74 | elif gen_mode == 'My_rescale': 75 | '''Use My rescale function''' 76 | _adj_OMC = rescale_adj_matrix(_adj_OMC) # rescale function eq.(2) 77 | # combine eq.(2) and (3) in paper 78 | elif gen_mode == 'Complex': 79 | _adj_OMC = rescale_adj_matrix(_adj_OMC) # rescale function eq.(2) 80 | _adj_OMC[_adj_OMC < t] = 0 81 | _adj_OMC[_adj_OMC >= t] = 1 82 | _adj_OMC = _adj_OMC * 0.25 / (_adj_OMC.sum(0, keepdims=True) + 1e-6) 83 | 84 | _adj_OMC = _adj_OMC / (_adj_OMC.sum(0, keepdims=True) + 1e-8) 85 | _adj_OMC = _adj_OMC + np.identity(num_classes, np.int) # identity square matrix 86 | _A_adj['adj_O_M'] = _adj_OMC 87 | 88 | elif key == 'adj_all': 89 | _adj_all = result['adj_all'] 90 | _adj_all = _adj_all / _nums 91 | # only eq.(3) in paper 92 | if gen_mode == 'ML_GCN': 93 | '''ML_GCN method''' 94 | _adj_all[_adj_all < t] = 0 95 | _adj_all[_adj_all >= t] = 1 96 | _adj_all = _adj_all * 0.25 / (_adj_all.sum(0, keepdims=True) + 1e-6) 97 | # only eq.(2) in paper 98 | elif gen_mode == 'My_rescale': 99 | '''Use My rescale function''' 100 | _adj_all = rescale_adj_matrix(_adj_all) # rescale function eq.(2) 101 | # combine eq.(2) and (3) in paper 102 | elif gen_mode == 'Complex': 103 | _adj_all = rescale_adj_matrix(_adj_all) # rescale function eq.(2) 104 | _adj_all[_adj_all < t] = 0 105 | _adj_all[_adj_all >= t] = 1 106 | _adj_all = _adj_all * 0.25 / (_adj_all.sum(0, keepdims=True) + 1e-6) 107 | _adj_all = _adj_all + np.identity(num_classes, np.int) # identity square matrix 108 | _A_adj['adj_all'] = _adj_all 109 | 110 | return _A_adj 111 | 112 | 113 | '''define the function to smooth the adj_matrix''' 114 | def rescale_adj_matrix(adj_mat, t=5, p=0.02): 115 | """This function is to smooth the adj_matrix for dealing with the long-tail effect 116 | adj_mat: co-occurence adj matrix 117 | 118 | t: parameter_1, determine the amplify/shrink rate 119 | p: parameter_2, determine the borderline prob value of un-important concept to shrink 120 | context_word_length: we need to know the nums of context word, 121 | because we need to suppress the role of context words for the whole representation 122 | """ 123 | adj_mat_smooth = np.power(t, adj_mat - p) - np.power(t, -p) 124 | return adj_mat_smooth 125 | 126 | 127 | '''Laplacian Matrix transorm''' 128 | def gen_adj(A): 129 | D = torch.pow(A.sum(1).float(), -0.5) 130 | D = torch.diag(D) 131 | adj = torch.matmul(torch.matmul(A, D).t(), D) 132 | return adj 133 | 134 | 135 | '''Laplacian Matrix transform for concept graph''' 136 | def gen_adj_concept(A): 137 | 138 | adj = {} 139 | for key, value in A.items(): 140 | if key == 'adj_O_P': 141 | D = torch.pow(A['adj_O_P'].sum(1).float(), -0.5) 142 | D = torch.diag(D) 143 | adj['adj_O_P'] = torch.matmul(torch.matmul(A['adj_O_P'], D).t(), D) 144 | adj['adj_O_P'].detach() 145 | 146 | if key == 'adj_O_M': 147 | D = torch.pow(A['adj_O_M'].sum(1).float(), -0.5) 148 | D = torch.diag(D) 149 | adj['adj_O_M'] = torch.matmul(torch.matmul(A['adj_O_M'], D).t(), D) 150 | adj['adj_O_M'].detach() 151 | 152 | elif key == 'adj_all': 153 | D = torch.pow(A['adj_all'].sum(1).float(), -0.5) 154 | D = torch.diag(D) 155 | adj['adj_all'] = torch.matmul(torch.matmul(A['adj_all'], D).t(), D) 156 | adj['adj_all'].detach() 157 | 158 | return adj 159 | -------------------------------------------------------------------------------- /util/utils.py: -------------------------------------------------------------------------------- 1 | '''Utils function''' 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | def l1norm(X, dim=1, eps=1e-12): 7 | """L1-normalize columns of X 8 | """ 9 | norm = torch.abs(X).sum(dim=dim, keepdim=True) + eps 10 | X = torch.div(X, norm) 11 | return X 12 | 13 | def l2norm(X, dim=-1, eps=1e-12): 14 | """L2-normalize columns of X 15 | """ 16 | norm = torch.pow(X, 2).sum(dim=dim, keepdim=True).sqrt() + eps 17 | X = torch.div(X, norm) 18 | return X 19 | 20 | def lambda_sigmoid(x, lamda=5): 21 | return 1 / ( 1 + torch.exp(-lamda * x) ) 22 | 23 | def min_max_resacle(x, eps=1e-12): 24 | return torch.div(x - x.min(), x.max() - x.min() + eps) 25 | 26 | def cosine_sim(im, s): 27 | """Cosine similarity between all the image and sentence pairs 28 | """ 29 | return im.mm(s.t()) 30 | 31 | def order_sim(im, s): 32 | """Order embeddings similarity measure $max(0, s-im)$ 33 | """ 34 | YmX = (s.unsqueeze(1).expand(s.size(0), im.size(0), s.size(1)) 35 | - im.unsqueeze(0).expand(s.size(0), im.size(0), s.size(1))) 36 | score = -YmX.clamp(min=0).pow(2).sum(2).sqrt().t() 37 | return score 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /vocab.py: -------------------------------------------------------------------------------- 1 | """Vocabulary wrapper""" 2 | import nltk 3 | from collections import Counter 4 | import argparse 5 | import os 6 | import json 7 | 8 | annotations = { 9 | 'coco_precomp': ['train_caps.txt', 'dev_caps.txt'], 10 | 'f30k_precomp': ['train_caps.txt', 'dev_caps.txt'], 11 | } 12 | 13 | 14 | class Vocabulary(object): 15 | """Simple vocabulary wrapper.""" 16 | 17 | def __init__(self): 18 | self.word2idx = {} 19 | self.idx2word = {} 20 | self.idx = 0 21 | 22 | def add_word(self, word): 23 | if word not in self.word2idx: 24 | self.word2idx[word] = self.idx 25 | self.idx2word[self.idx] = word 26 | self.idx += 1 27 | 28 | def __call__(self, word): 29 | if word not in self.word2idx: 30 | return self.word2idx[''] 31 | return self.word2idx[word] 32 | 33 | def __len__(self): 34 | return len(self.word2idx) 35 | 36 | 37 | def serialize_vocab(vocab, dest): 38 | d = {} 39 | d['word2idx'] = vocab.word2idx 40 | d['idx2word'] = vocab.idx2word 41 | d['idx'] = vocab.idx 42 | with open(dest, "w") as f: 43 | json.dump(d, f) 44 | 45 | 46 | def deserialize_vocab(src): 47 | with open(src) as f: 48 | d = json.load(f) 49 | vocab = Vocabulary() 50 | vocab.word2idx = d['word2idx'] 51 | vocab.idx2word = d['idx2word'] 52 | vocab.idx = d['idx'] 53 | return vocab 54 | 55 | 56 | def from_txt(txt): 57 | captions = [] 58 | with open(txt, 'rb') as f: 59 | for line in f: 60 | captions.append(line.strip()) 61 | return captions 62 | 63 | 64 | def build_vocab(data_path, data_name, caption_file, threshold): 65 | """Build a simple vocabulary wrapper.""" 66 | counter = Counter() 67 | for path in caption_file[data_name]: 68 | full_path = os.path.join(os.path.join(data_path, data_name), path) 69 | captions = from_txt(full_path) 70 | for i, caption in enumerate(captions): 71 | tokens = nltk.tokenize.word_tokenize( 72 | caption.lower().decode('utf-8')) 73 | counter.update(tokens) 74 | 75 | if i % 1000 == 0: 76 | print("[%d/%d] tokenized the captions." % (i, len(captions))) 77 | 78 | # Discard if the occurrence of the word is less than min_word_cnt. 79 | words = [word for word, cnt in counter.items() if cnt >= threshold] 80 | 81 | # Create a vocab wrapper and add some special tokens. 82 | vocab = Vocabulary() 83 | vocab.add_word('') 84 | vocab.add_word('') 85 | vocab.add_word('') 86 | vocab.add_word('') 87 | 88 | # Add words to the vocabulary. 89 | for i, word in enumerate(words): 90 | vocab.add_word(word) 91 | return vocab 92 | 93 | 94 | def main(data_path, data_name): 95 | vocab = build_vocab(data_path, data_name, caption_file=annotations, threshold=4) 96 | serialize_vocab(vocab, './vocab/%s_vocab.json' % data_name) 97 | print("Saved vocabulary file to ", './vocab/%s_vocab.json' % data_name) 98 | 99 | 100 | 101 | if __name__ == '__main__': 102 | parser = argparse.ArgumentParser() 103 | parser.add_argument('--data_path', default='data') 104 | parser.add_argument('--data_name', default='f30k_precomp', 105 | help='{coco,f30k}_precomp') 106 | opt = parser.parse_args() 107 | main(opt.data_path, opt.data_name) 108 | -------------------------------------------------------------------------------- /vocab/coco_precomp_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/coco_precomp_vocab.pkl -------------------------------------------------------------------------------- /vocab/coco_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/coco_vocab.pkl -------------------------------------------------------------------------------- /vocab/f30k_precomp_vocab.json: -------------------------------------------------------------------------------- 1 | {"word2idx": { -------------------------------------------------------------------------------- /vocab/f30k_precomp_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/f30k_precomp_vocab.pkl -------------------------------------------------------------------------------- /vocab/f30k_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/f30k_vocab.pkl -------------------------------------------------------------------------------- /vocab/f8k_precomp_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/f8k_precomp_vocab.pkl -------------------------------------------------------------------------------- /vocab/f8k_vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndresPMD/semantic_adaptive_margin/1e8bf2f1836498c48df030cb0a967b72b52e8460/vocab/f8k_vocab.pkl --------------------------------------------------------------------------------