├── modules ├── __init__.py ├── metrics.py ├── Beam_search.py ├── model.py ├── ori_models.py └── custom_callbacks.py ├── web_app ├── modules │ ├── __init__.py │ ├── Beam_search.py │ └── model.py ├── tmp │ ├── dog.jpeg │ ├── fair.jpeg │ ├── trans.jpg │ ├── Iron-Man.jpg │ ├── download.jpeg │ ├── profpic.jpeg │ ├── download (1).jpeg │ ├── maxresdefault.jpg │ ├── cc_seedling_250.jpg │ ├── running_cover_1.jpg │ ├── modi-2014-i-day_file-photo.jpg │ ├── 578211-gettyimages-542930526.jpg │ ├── 1200px-Kaldari_Stellaria_media_01.jpg │ ├── 1800x1200_cat_relaxing_on_patio_other.jpg │ ├── 01-guina-nationalgeographic_2750749.adapt.1900.1.jpg │ └── Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg ├── templates │ ├── base.html │ └── index.html ├── static │ ├── css │ │ └── main.css │ ├── js │ │ └── main.js │ └── Vocab_5_cap_per_img_2_min_word_freq.json ├── application.py ├── Beam_search.py └── ori_models.py ├── snapshots ├── Z_t.png ├── eval.jpeg ├── model.png ├── Z_t copy.png ├── lr_find.png ├── beam_search.png ├── caption_gen.gif ├── loss_stage1.png ├── loss_stage2.png ├── encoded_image_vec.png ├── encoded_image_vec copy.png ├── attention_and_alpha_scores.png └── attention_and_alpha_scores copy.png ├── .gitignore ├── fastext_wordembedding.py ├── requirements.txt ├── notebook2script.py └── readme.md /modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /web_app/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snapshots/Z_t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/Z_t.png -------------------------------------------------------------------------------- /snapshots/eval.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/eval.jpeg -------------------------------------------------------------------------------- /snapshots/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/model.png -------------------------------------------------------------------------------- /web_app/tmp/dog.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/dog.jpeg -------------------------------------------------------------------------------- /snapshots/Z_t copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/Z_t copy.png -------------------------------------------------------------------------------- /snapshots/lr_find.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/lr_find.png -------------------------------------------------------------------------------- /web_app/tmp/fair.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/fair.jpeg -------------------------------------------------------------------------------- /web_app/tmp/trans.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/trans.jpg -------------------------------------------------------------------------------- /snapshots/beam_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/beam_search.png -------------------------------------------------------------------------------- /snapshots/caption_gen.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/caption_gen.gif -------------------------------------------------------------------------------- /snapshots/loss_stage1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/loss_stage1.png -------------------------------------------------------------------------------- /snapshots/loss_stage2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/loss_stage2.png -------------------------------------------------------------------------------- /web_app/tmp/Iron-Man.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/Iron-Man.jpg -------------------------------------------------------------------------------- /web_app/tmp/download.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/download.jpeg -------------------------------------------------------------------------------- /web_app/tmp/profpic.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/profpic.jpeg -------------------------------------------------------------------------------- /web_app/tmp/download (1).jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/download (1).jpeg -------------------------------------------------------------------------------- /web_app/tmp/maxresdefault.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/maxresdefault.jpg -------------------------------------------------------------------------------- /snapshots/encoded_image_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/encoded_image_vec.png -------------------------------------------------------------------------------- /web_app/tmp/cc_seedling_250.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/cc_seedling_250.jpg -------------------------------------------------------------------------------- /web_app/tmp/running_cover_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/running_cover_1.jpg -------------------------------------------------------------------------------- /snapshots/encoded_image_vec copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/encoded_image_vec copy.png -------------------------------------------------------------------------------- /snapshots/attention_and_alpha_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/attention_and_alpha_scores.png -------------------------------------------------------------------------------- /web_app/tmp/modi-2014-i-day_file-photo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/modi-2014-i-day_file-photo.jpg -------------------------------------------------------------------------------- /snapshots/attention_and_alpha_scores copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/attention_and_alpha_scores copy.png -------------------------------------------------------------------------------- /web_app/tmp/578211-gettyimages-542930526.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/578211-gettyimages-542930526.jpg -------------------------------------------------------------------------------- /web_app/tmp/1200px-Kaldari_Stellaria_media_01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/1200px-Kaldari_Stellaria_media_01.jpg -------------------------------------------------------------------------------- /web_app/tmp/1800x1200_cat_relaxing_on_patio_other.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/1800x1200_cat_relaxing_on_patio_other.jpg -------------------------------------------------------------------------------- /web_app/tmp/01-guina-nationalgeographic_2750749.adapt.1900.1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/01-guina-nationalgeographic_2750749.adapt.1900.1.jpg -------------------------------------------------------------------------------- /web_app/tmp/Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .Coo_dataset 3 | .ipynb_checkpoints 4 | .vscode 5 | caption_datasets 6 | exp 7 | flicker8k-dataset 8 | models 9 | exp 10 | modules_old 11 | runs 12 | web_app/models 13 | BEST_checkpoint_Vocab_5_cap_per_img_2_min_word_freq_withoutFineTuning.pth.tar 14 | checkpoint_Vocab_5_cap_per_img_2_min_word_freq_yesturday.pth.tar 15 | checkpoint_Vocab_5_cap_per_img_2_min_word_freq.pth.tar 16 | Main-checkpoint.ipynb 17 | main-Finalized_LanguageModel.ipynb 18 | nb_prepare_WordEmb.py 19 | BEST_checkpoint_Vocab_5_cap_per_img_2_min_word_freq.pth.tar 20 | Article.md 21 | Untitled.ipynb 22 | -------------------------------------------------------------------------------- /fastext_wordembedding.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import numpy as np 3 | import fasttext as ft 4 | import json 5 | import pickle 6 | from pdb import set_trace 7 | 8 | emb_path = '/home/skumar/.nlp_wordembeddings/cc.en.300.bin' 9 | word_map_path = '/home/skumar/DataScience/Projects_Section/Projects_Working/Image_Captioning_Pytorch/flicker8k-dataset/Vocab_5_cap_per_img_2_min_word_freq.json' # word map index dict path 10 | embExport_pkl_path = '/home/skumar/DataScience/Projects_Section/Projects_Working/Image_Captioning_Pytorch/flicker8k-dataset/Fastext_embedd_wordMap.pkl' 11 | emb_dim = 300 12 | 13 | # get words in the wordmap with index 14 | 15 | with open(word_map_path,'r') as j: 16 | word_map = json.load(j) 17 | 18 | # create a dictionary of words and corresponding verctor array 19 | word_emb = np.zeros((len(word_map),emb_dim)) 20 | missing = ['NA']*len(word_map) 21 | 22 | # load fasttext word vectors 23 | en_vecs = ft.load_model(str(emb_path)) 24 | 25 | for i,k in tqdm(enumerate(word_map)): 26 | if k in en_vecs.get_words(): 27 | word_emb[i] = en_vecs.get_word_vector(k) 28 | else: 29 | missing[i] = k 30 | 31 | print(missing) 32 | 33 | with open(embExport_pkl_path,'wb') as f: 34 | pickle.dump(word_emb, f, 2) -------------------------------------------------------------------------------- /web_app/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Caption Generator Demo 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 23 |
24 |
{% block content %}{% endblock %}
25 |
26 | 27 | -------------------------------------------------------------------------------- /web_app/static/css/main.css: -------------------------------------------------------------------------------- 1 | .img-preview { 2 | margin-left: auto; 3 | margin-right: auto; 4 | width: 256px; 5 | height: 256px; 6 | position: relative; 7 | border: 5px solid #F8F8F8; 8 | box-shadow: 0px 2px 4px 0px rgba(0, 0, 0, 0.1); 9 | margin-top: 1em; 10 | margin-bottom: 1em; 11 | } 12 | 13 | .img-preview>div { 14 | margin-left: auto; 15 | margin-right: auto; 16 | width: 100%; 17 | height: 100%; 18 | background-size: 256px 256px; 19 | background-repeat: no-repeat; 20 | background-position: center; 21 | } 22 | 23 | input[type="file"] { 24 | display: none; 25 | } 26 | 27 | .upload-label{ 28 | display: inline-block; 29 | padding: 12px 30px; 30 | border-radius: .4rem; 31 | background: cadetblue; 32 | color: #fff; 33 | font-size: 1em; 34 | transition: all .4s; 35 | cursor: pointer; 36 | } 37 | 38 | .upload-label:hover{ 39 | background: #34495E; 40 | color: darkgray; 41 | font-weight: bold; 42 | } 43 | 44 | .loader { 45 | border: 8px solid #f3f3f3; /* Light grey */ 46 | border-top: 8px solid maroon; /* Blue */ 47 | border-radius: 50%; 48 | width: 50px; 49 | height: 50px; 50 | animation: spin 1s linear infinite; 51 | } 52 | 53 | @keyframes spin { 54 | 0% { transform: rotate(0deg); } 55 | 100% { transform: rotate(360deg); } 56 | } -------------------------------------------------------------------------------- /web_app/static/js/main.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function () { 2 | // Init 3 | $('.image-section').hide(); 4 | $('.loader').hide(); 5 | $('#result').hide(); 6 | 7 | // Upload Preview 8 | function readURL(input) { 9 | if (input.files && input.files[0]) { 10 | var reader = new FileReader(); 11 | reader.onload = function (e) { 12 | $('#imagePreview').css('background-image', 'url(' + e.target.result + ')'); 13 | $('#imagePreview').hide(); 14 | $('#imagePreview').fadeIn(650); 15 | } 16 | reader.readAsDataURL(input.files[0]); 17 | } 18 | } 19 | $("#imageUpload").change(function () { 20 | $('.image-section').show(); 21 | $('#btn-predict').show(); 22 | $('#result').text(''); 23 | $('#result').hide(); 24 | readURL(this); 25 | }); 26 | 27 | // Predict 28 | $('#btn-predict').click(function () { 29 | var form_data = new FormData($('#upload-file')[0]); 30 | 31 | // Show loading animation 32 | $(this).hide(); 33 | $('.loader').show(); 34 | 35 | // Make prediction by calling api /predict 36 | $.ajax({ 37 | type: 'POST', 38 | url: '/predict', 39 | data: form_data, 40 | contentType: false, 41 | cache: false, 42 | processData: false, 43 | async: true, 44 | success: function (data) { 45 | // Get and display the result 46 | $('.loader').hide(); 47 | $('#result').fadeIn(600); 48 | $('#result').text(data); 49 | console.log('Success!'); 50 | }, 51 | }); 52 | }); 53 | 54 | }); 55 | -------------------------------------------------------------------------------- /web_app/application.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, flash, request, jsonify, Markup 2 | import os 3 | import torch 4 | from pathlib2 import Path 5 | import logging 6 | from modules.Beam_search import * 7 | from PIL import Image 8 | import json 9 | from torchvision import transforms 10 | #import settings 11 | 12 | #set devise 13 | device = 'cpu' 14 | #defaults.device = torch.device('cpu') 15 | 16 | # Load model architecture and parameters 17 | path = Path() 18 | 19 | checkpoint_path = "models/Best_model.pth" 20 | 21 | # Vocab dict loading 22 | vocab = json.load((path/"static/Vocab_5_cap_per_img_2_min_word_freq.json").open('rb')) 23 | ind_str = dict(zip(vocab.values(),vocab.keys())) 24 | 25 | # tranformation 26 | transformations = transforms.Compose([ 27 | transforms.Resize((224,224)), 28 | transforms.ToTensor(), 29 | transforms.Normalize([0.5238, 0.5003, 0.4718], [0.3159, 0.3091, 0.3216])]) 30 | 31 | 32 | # set flask params 33 | app = Flask(__name__) 34 | 35 | 36 | app.config["IMAGE_UPLOADS"] = 'tmp' 37 | 38 | @app.errorhandler(500) 39 | def server_error(e): 40 | logging.exception('some eror') 41 | return """ 42 | And internal error
{}
43 | """.format(e), 500 44 | 45 | @app.route("/", methods=['GET']) 46 | def startup(): 47 | return render_template('index.html') # pred_class 48 | 49 | @app.route('/predict', methods=["GET",'POST']) 50 | def predict(): 51 | if request.method == "POST": 52 | # if not request.files: 53 | image = request.files['file'] 54 | upload_path = os.path.join(app.config["IMAGE_UPLOADS"], image.filename) 55 | image.save(upload_path) 56 | caps = beam_search(checkpoint_path,img_path = upload_path, beam_size = 5, vocab = vocab, transforms = transformations,device=device) 57 | caps = [ind_str[x] for x in caps] 58 | return ' '.join(caps)#jsonify(predict=str(pred_class)) 59 | #app.logger.info("Image %s classified as %s" % (url, pred_class)) 60 | return None 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | app.run(debug=True) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.3.0 2 | backcall==0.2.0 3 | beautifulsoup4==4.9.1 4 | blis==0.2.4 5 | Bottleneck==1.3.2 6 | brotlipy==0.7.0 7 | certifi==2020.6.20 8 | cffi==1.14.0 9 | chardet==3.0.4 10 | click==7.1.2 11 | conda==4.8.3 12 | conda-package-handling==1.7.0 13 | cryptography==3.2 14 | cycler==0.10.0 15 | cymem==2.0.2 16 | dataclasses==0.6 17 | decorator==4.4.2 18 | fastai==1.0.61 19 | fastprogress==0.2.2 20 | Flask==1.1.2 21 | idna @ file:///tmp/build/80754af9/idna_1593446292537/work 22 | importlib-metadata @ file:///tmp/build/80754af9/importlib-metadata_1593446433964/work 23 | ipykernel @ file:///tmp/build/80754af9/ipykernel_1596206602906/work/dist/ipykernel-5.3.4-py3-none-any.whl 24 | ipython @ file:///tmp/build/80754af9/ipython_1593447367857/work 25 | ipython-genutils==0.2.0 26 | itsdangerous==1.1.0 27 | jedi @ file:///tmp/build/80754af9/jedi_1592841914522/work 28 | Jinja2==2.11.2 29 | joblib @ file:///tmp/build/80754af9/joblib_1594236160679/work 30 | jsonschema==3.2.0 31 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1594826976318/work 32 | jupyter-core==4.6.3 33 | kiwisolver==1.2.0 34 | MarkupSafe==1.1.1 35 | matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-base_1595353103837/work 36 | murmurhash==1.0.2 37 | nltk @ file:///tmp/build/80754af9/nltk_1592496090529/work 38 | numexpr==2.7.1 39 | numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1596233718326/work 40 | olefile==0.46 41 | packaging==20.4 42 | pandas @ file:///tmp/build/80754af9/pandas_1592841660978/work 43 | parso==0.7.0 44 | pathlib2==2.3.5 45 | pexpect==4.8.0 46 | pickleshare==0.7.5 47 | Pillow @ file:///tmp/build/80754af9/pillow_1594307298074/work 48 | plac @ file:///tmp/build/80754af9/plac_1594261902054/work 49 | preshed==2.0.1 50 | prompt-toolkit==3.0.5 51 | protobuf==3.12.4 52 | ptyprocess==0.6.0 53 | pycosat==0.6.3 54 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work 55 | Pygments==2.6.1 56 | pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1594392929924/work 57 | pyparsing==2.4.7 58 | pyrsistent==0.16.0 59 | PySocks==1.7.1 60 | python-dateutil==2.8.1 61 | pytz==2020.1 62 | PyYAML==5.3.1 63 | pyzmq==19.0.1 64 | regex @ file:///tmp/build/80754af9/regex_1593435572336/work 65 | requests @ file:///tmp/build/80754af9/requests_1592841827918/work 66 | ruamel-yaml==0.15.87 67 | scikit-learn @ file:///tmp/build/80754af9/scikit-learn_1592502866951/work 68 | scipy @ file:///tmp/build/80754af9/scipy_1592930497347/work 69 | six==1.15.0 70 | soupsieve==2.0.1 71 | spacy==2.1.8 72 | srsly==0.1.0 73 | tensorboardX==2.1 74 | thinc==7.0.8 75 | threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl 76 | torch==1.4.0 77 | torchvision==0.5.0 78 | tornado==6.0.4 79 | tqdm @ file:///tmp/build/80754af9/tqdm_1593446365756/work 80 | traitlets==4.3.3 81 | urllib3==1.25.9 82 | wasabi==0.2.2 83 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work 84 | Werkzeug==1.0.1 85 | zipp==3.1.0 86 | -------------------------------------------------------------------------------- /web_app/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} {% block content %} 2 | 3 |

Image Caption Generator

4 | 5 | 6 |
7 |
8 | 11 | 12 | 13 |
14 | 15 | 24 | 25 | 26 | 27 |

28 | 29 |

30 | 31 |
32 | 33 | 88 | 89 | {% endblock %} 90 | -------------------------------------------------------------------------------- /notebook2script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json,fire,re 4 | from pathlib import Path 5 | import io 6 | 7 | def is_export(cell): 8 | if cell['cell_type'] != 'code': return False 9 | src = cell['source'] 10 | if len(src) == 0 or len(src[0]) < 7: return False 11 | #import pdb; pdb.set_trace() 12 | return re.match(r'^\s*#\s*export\s*$', src[0], re.IGNORECASE) is not None 13 | 14 | def getSortedFiles(allFiles, upTo=None): 15 | '''Returns all the notebok files sorted by name. 16 | allFiles = True : returns all files 17 | = '*_*.ipynb' : returns this pattern 18 | upTo = None : no upper limit 19 | = filter : returns all files up to 'filter' included 20 | The sorting optioj is important to ensure that the notebok are executed in correct order. 21 | ''' 22 | import glob 23 | ret = [] 24 | if (allFiles==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True 25 | if (isinstance(allFiles,str)): ret = glob.glob(allFiles) 26 | if 0==len(ret): 27 | print('WARNING: No files found') 28 | return ret 29 | if upTo is not None: ret = [f for f in ret if str(f)<=str(upTo)] 30 | return sorted(ret) 31 | 32 | def notebook2script(fname=None, allFiles=None, upTo=None): 33 | '''Finds cells starting with `#export` and puts them into a new module 34 | + allFiles: convert all files in the folder 35 | + upTo: convert files up to specified one included 36 | 37 | ES: 38 | notebook2script --allFiles=True # Parse all files 39 | notebook2script --allFiles=nb* # Parse all files starting with nb* 40 | notebook2script --upTo=10 # Parse all files with (name<='10') 41 | notebook2script --allFiles=*_*.ipynb --upTo=10 # Parse all files with an '_' and (name<='10') 42 | ''' 43 | # initial checks 44 | if (allFiles is None) and (upTo is not None): allFiles=True # Enable allFiles if upTo is present 45 | if (fname is None) and (not allFiles): print('Should provide a file name') 46 | if not allFiles: notebook2scriptSingle(fname) 47 | else: 48 | print('Begin...') 49 | [notebook2scriptSingle(f) for f in getSortedFiles(allFiles,upTo)] 50 | print('...End') 51 | 52 | 53 | def notebook2scriptSingle(fname): 54 | "Finds cells starting with `#export` and puts them into a new module" 55 | fname = Path(fname) 56 | fname_out = f"nb_{fname.stem.split('_')[0]}.py" 57 | main_dic = json.load(open(fname,'r',encoding="utf-8")) 58 | code_cells = [c for c in main_dic['cells'] if is_export(c)] 59 | module = f''' 60 | ################################################# 61 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ### 62 | ################################################# 63 | # file to edit: dev_nb/{fname.name} 64 | 65 | ''' 66 | for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n' 67 | # remove trailing spaces 68 | module = re.sub(r' +$', '', module, flags=re.MULTILINE) 69 | if not (fname.parent/'exp').exists(): (fname.parent/'exp').mkdir() 70 | output_path = fname.parent/'exp'/fname_out 71 | with io.open(output_path, "w", encoding="utf-8") as f: 72 | f.write(module[:-2]) 73 | print(f"Converted {fname} to {output_path}") 74 | 75 | if __name__ == '__main__': fire.Fire(notebook2script) 76 | 77 | -------------------------------------------------------------------------------- /modules/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from pdb import set_trace 4 | 5 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 6 | 7 | def beam_search(learn, img,vocab = None, beam_size = 5): 8 | with torch.no_grad(): 9 | k = beam_size 10 | 11 | ## imput tensor preparation 12 | img = img.unsqueeze(0) #treating as batch of size 1 13 | 14 | ## model prepartion 15 | mod = learn.model 16 | 17 | # encoder output 18 | encoder_out = mod.encoder(img) 19 | encoder_dim = encoder_out.size(-1) 20 | num_pixels = encoder_out.size(1) 21 | 22 | # expand or repeat 'k' time 23 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim) 24 | 25 | # Tensor to store top k previous words at each step; now they're just 26 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1) 27 | 28 | # Tensor to store top k sequences; now they're just 29 | seqs = k_prev_words # (k, 1) 30 | 31 | # Tensor to store top k sequences' scores; now they're just 0 32 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1) 33 | 34 | # Lists to store completed sequences and scores 35 | complete_seqs = list() 36 | complete_seqs_scores = list() 37 | 38 | # Start decoding 39 | step = 1 40 | h, c = mod.decoder.init_hidden_state(encoder_out) 41 | 42 | references = list() 43 | hypotheses = list() 44 | 45 | # s is a number less than or equal to k, because sequences are removed from this process once they hit 46 | while True: 47 | embeddings = mod.decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim) 48 | awe, _ = mod.decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels) 49 | gate = mod.decoder.sigmoid(mod.decoder.f_beta(h)) 50 | awe = (gate * awe) 51 | 52 | h, c = mod.decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c)) 53 | scores = mod.decoder.fc(h) 54 | scores = F.log_softmax(scores, dim=1) 55 | 56 | 57 | # Add scores to prev scores 58 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size) 59 | 60 | # For the first step, all k points will have the same scores (since same k previous words, h, c) 61 | if step == 1: 62 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s) 63 | else: 64 | # Unroll and find top scores, and their unrolled indices 65 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s) 66 | 67 | # Convert unrolled indices to actual indices of scores 68 | prev_word_inds = top_k_words / len(vocab) # (s) 69 | next_word_inds = top_k_words % len(vocab) # (s) 70 | 71 | # Add new words to sequences 72 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words 73 | 74 | # Which sequences are incomplete (didn't reach )? 75 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if 76 | next_word != vocab['']] 77 | 78 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds)) 79 | 80 | # Set aside complete sequences 81 | if len(complete_inds) > 0: 82 | complete_seqs.extend(seqs[complete_inds].tolist()) 83 | complete_seqs_scores.extend(top_k_scores[complete_inds]) 84 | k -= len(complete_inds) # reduce beam length accordingly 85 | 86 | # Proceed with incomplete sequences 87 | if k == 0: 88 | break 89 | seqs = seqs[incomplete_inds] 90 | h = h[prev_word_inds[incomplete_inds]] 91 | c = c[prev_word_inds[incomplete_inds]] 92 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]] 93 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) 94 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1) 95 | 96 | 97 | # Break if things have been going on too long 98 | if step > 50: 99 | break 100 | step += 1 101 | 102 | i = complete_seqs_scores.index(max(complete_seqs_scores)) 103 | seq = complete_seqs[i] 104 | 105 | # Hypotheses 106 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}]) 107 | 108 | return hypotheses 109 | -------------------------------------------------------------------------------- /web_app/Beam_search.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from modules.model import Encoder, DecoderWithAttention 4 | from nltk.translate.bleu_score import corpus_bleu 5 | from PIL import Image 6 | import torch.nn.functional as F 7 | from pdb import set_trace 8 | 9 | 10 | def beam_search(checkpoint_path, img_path, beam_size = 5, vocab = None, transforms = None, device = None): 11 | 12 | k = beam_size 13 | ## imput tensor preparation 14 | img = Image.open(img_path) 15 | if transforms is not None: img = transforms(img) 16 | img = img.unsqueeze(0) #treating as batch of size 1 17 | 18 | # Checkpoint loading 19 | checkpoint = torch.load(checkpoint_path, map_location=str(device)) 20 | decoder = checkpoint['decoder'] 21 | decoder = decoder.to(device) 22 | decoder.eval() 23 | encoder = checkpoint['encoder'] 24 | encoder = encoder.to(device) 25 | encoder.eval() 26 | 27 | 28 | # encoder output 29 | encoder_out = encoder(img) 30 | encoder_dim = encoder_out.size(-1) 31 | num_pixels = encoder_out.size(1) 32 | 33 | # expand or repeat 'k' time 34 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim) 35 | 36 | # Tensor to store top k previous words at each step; now they're just 37 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1) 38 | 39 | # Tensor to store top k sequences; now they're just 40 | seqs = k_prev_words # (k, 1) 41 | 42 | # Tensor to store top k sequences' scores; now they're just 0 43 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1) 44 | 45 | # Lists to store completed sequences and scores 46 | complete_seqs = list() 47 | complete_seqs_scores = list() 48 | 49 | # Start decoding 50 | step = 1 51 | h, c = decoder.init_hidden_state(encoder_out) 52 | 53 | hypotheses = list() 54 | 55 | # s is a number less than or equal to k, because sequences are removed from this process once they hit 56 | while True: 57 | 58 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim) 59 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels) 60 | gate = decoder.sigmoid(decoder.f_beta(h)) 61 | awe = (gate * awe) 62 | 63 | h, c = decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c)) 64 | scores = decoder.fc(h) 65 | scores = F.log_softmax(scores, dim=1) 66 | 67 | 68 | # Add scores to prev scores 69 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size) 70 | 71 | # For the first step, all k points will have the same scores (since same k previous words, h, c) 72 | if step == 1: 73 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s) 74 | else: 75 | # Unroll and find top scores, and their unrolled indices 76 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s) 77 | 78 | # Convert unrolled indices to actual indices of scores 79 | prev_word_inds = top_k_words / len(vocab) # (s) 80 | next_word_inds = top_k_words % len(vocab) # (s) 81 | 82 | # Add new words to sequences 83 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words 84 | 85 | # Which sequences are incomplete (didn't reach )? 86 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if 87 | next_word != vocab['']] 88 | 89 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds)) 90 | 91 | # Set aside complete sequences 92 | if len(complete_inds) > 0: 93 | complete_seqs.extend(seqs[complete_inds].tolist()) 94 | complete_seqs_scores.extend(top_k_scores[complete_inds]) 95 | k -= len(complete_inds) # reduce beam length accordingly 96 | 97 | # Proceed with incomplete sequences 98 | if k == 0: 99 | break 100 | seqs = seqs[incomplete_inds] 101 | h = h[prev_word_inds[incomplete_inds]] 102 | c = c[prev_word_inds[incomplete_inds]] 103 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]] 104 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) 105 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1) 106 | 107 | 108 | 109 | # Break if things have been going on too long 110 | if step > 50: 111 | break 112 | step += 1 113 | 114 | 115 | i = complete_seqs_scores.index(max(complete_seqs_scores)) 116 | seq = complete_seqs[i] 117 | 118 | # Hypotheses 119 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}]) 120 | hypotheses = hypotheses[0] 121 | return hypotheses -------------------------------------------------------------------------------- /modules/Beam_search.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from nltk.translate.bleu_score import corpus_bleu 4 | from PIL import Image 5 | import torch.nn.functional as F 6 | from pdb import set_trace 7 | 8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 9 | 10 | def beam_search(learn, img_path, beam_size = 5, vocab = None, transforms = None): 11 | 12 | k = beam_size 13 | ## imput tensor preparation 14 | img = Image.open(img_path) 15 | if transforms is not None: img = transforms(img) 16 | img = img.unsqueeze(0) #treating as batch of size 1 17 | img = img.to(device) 18 | 19 | # Checkpoint loading 20 | decoder = learn.model.decoder 21 | decoder = decoder.to(device) 22 | decoder.eval() 23 | encoder = learn.model.encoder 24 | encoder = encoder.to(device) 25 | encoder.eval() 26 | 27 | 28 | # encoder output 29 | encoder_out = encoder(img) 30 | encoder_dim = encoder_out.size(-1) 31 | encoder_out = encoder_out.view(1, -1, encoder_dim) 32 | 33 | # expand or repeat 'k' time 34 | num_pixels = encoder_out.size(1) 35 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim) 36 | 37 | # Tensor to store top k previous words at each step; now they're just 38 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1) 39 | 40 | # Tensor to store top k sequences; now they're just 41 | seqs = k_prev_words # (k, 1) 42 | 43 | # Tensor to store top k sequences' scores; now they're just 0 44 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1) 45 | 46 | # Lists to store completed sequences and scores 47 | complete_seqs = list() 48 | complete_seqs_scores = list() 49 | 50 | # Start decoding 51 | step = 1 52 | h, c = decoder.init_hidden_state(encoder_out) 53 | 54 | hypotheses = list() 55 | 56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit 57 | while True: 58 | 59 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim) 60 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels) 61 | gate = decoder.sigmoid(decoder.f_beta(h)) 62 | awe = (gate * awe) 63 | 64 | h, c = decoder.decode_step(torch.cat([embeddings, awe], dim=1), (h, c)) 65 | scores = decoder.fc(h) 66 | scores = F.log_softmax(scores, dim=1) 67 | 68 | 69 | # Add scores to prev scores 70 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size) 71 | 72 | # For the first step, all k points will have the same scores (since same k previous words, h, c) 73 | if step == 1: 74 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s) 75 | else: 76 | # Unroll and find top scores, and their unrolled indices 77 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s) 78 | 79 | # Convert unrolled indices to actual indices of scores 80 | prev_word_inds = top_k_words / len(vocab) # (s) 81 | next_word_inds = top_k_words % len(vocab) # (s) 82 | 83 | # Add new words to sequences 84 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words 85 | 86 | # Which sequences are incomplete (didn't reach )? 87 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if 88 | next_word != vocab['']] 89 | 90 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds)) 91 | 92 | # Set aside complete sequences 93 | if len(complete_inds) > 0: 94 | complete_seqs.extend(seqs[complete_inds].tolist()) 95 | complete_seqs_scores.extend(top_k_scores[complete_inds]) 96 | k -= len(complete_inds) # reduce beam length accordingly 97 | 98 | # Proceed with incomplete sequences 99 | if k == 0: 100 | break 101 | seqs = seqs[incomplete_inds] 102 | h = h[prev_word_inds[incomplete_inds]] 103 | c = c[prev_word_inds[incomplete_inds]] 104 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]] 105 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) 106 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1) 107 | 108 | 109 | 110 | # Break if things have been going on too long 111 | if step > 50: 112 | break 113 | step += 1 114 | 115 | 116 | i = complete_seqs_scores.index(max(complete_seqs_scores)) 117 | seq = complete_seqs[i] 118 | 119 | # Hypotheses 120 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}]) 121 | hypotheses = hypotheses[0] 122 | return hypotheses -------------------------------------------------------------------------------- /web_app/modules/Beam_search.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from ori_models import Encoder, DecoderWithAttention 4 | from nltk.translate.bleu_score import corpus_bleu 5 | from PIL import Image 6 | import torch.nn.functional as F 7 | from pdb import set_trace 8 | 9 | 10 | def beam_search(checkpoint_path, img_path, beam_size = 5, vocab = None, transforms = None, device = None): 11 | 12 | k = beam_size 13 | ## imput tensor preparation 14 | img = Image.open(img_path) 15 | if transforms is not None: img = transforms(img) 16 | img = img.unsqueeze(0) #treating as batch of size 1 17 | 18 | # Checkpoint loading 19 | checkpoint = torch.load(checkpoint_path, map_location=str(device)) 20 | decoder = checkpoint['decoder'] 21 | decoder = decoder.to(device) 22 | decoder.eval() 23 | encoder = checkpoint['encoder'] 24 | encoder = encoder.to(device) 25 | encoder.eval() 26 | 27 | 28 | # encoder output 29 | encoder_out = encoder(img) 30 | encoder_dim = encoder_out.size(-1) 31 | encoder_out = encoder_out.view(1, -1, encoder_dim) 32 | 33 | # expand or repeat 'k' time 34 | num_pixels = encoder_out.size(1) 35 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim) 36 | 37 | # Tensor to store top k previous words at each step; now they're just 38 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1) 39 | 40 | # Tensor to store top k sequences; now they're just 41 | seqs = k_prev_words # (k, 1) 42 | 43 | # Tensor to store top k sequences' scores; now they're just 0 44 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1) 45 | 46 | # Lists to store completed sequences and scores 47 | complete_seqs = list() 48 | complete_seqs_scores = list() 49 | 50 | # Start decoding 51 | step = 1 52 | h, c = decoder.init_hidden_state(encoder_out) 53 | 54 | hypotheses = list() 55 | 56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit 57 | while True: 58 | 59 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim) 60 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels) 61 | gate = decoder.sigmoid(decoder.f_beta(h)) 62 | awe = (gate * awe) 63 | 64 | h, c = decoder.decode_step(torch.cat([embeddings, awe], dim=1), (h, c)) 65 | scores = decoder.fc(h) 66 | scores = F.log_softmax(scores, dim=1) 67 | 68 | 69 | # Add scores to prev scores 70 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size) 71 | 72 | # For the first step, all k points will have the same scores (since same k previous words, h, c) 73 | if step == 1: 74 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s) 75 | else: 76 | # Unroll and find top scores, and their unrolled indices 77 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s) 78 | 79 | # Convert unrolled indices to actual indices of scores 80 | prev_word_inds = top_k_words / len(vocab) # (s) 81 | next_word_inds = top_k_words % len(vocab) # (s) 82 | 83 | # Add new words to sequences 84 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words 85 | 86 | # Which sequences are incomplete (didn't reach )? 87 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if 88 | next_word != vocab['']] 89 | 90 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds)) 91 | 92 | # Set aside complete sequences 93 | if len(complete_inds) > 0: 94 | complete_seqs.extend(seqs[complete_inds].tolist()) 95 | complete_seqs_scores.extend(top_k_scores[complete_inds]) 96 | k -= len(complete_inds) # reduce beam length accordingly 97 | 98 | # Proceed with incomplete sequences 99 | if k == 0: 100 | break 101 | seqs = seqs[incomplete_inds] 102 | h = h[prev_word_inds[incomplete_inds]] 103 | c = c[prev_word_inds[incomplete_inds]] 104 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]] 105 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) 106 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1) 107 | 108 | 109 | 110 | # Break if things have been going on too long 111 | if step > 50: 112 | break 113 | step += 1 114 | 115 | 116 | i = complete_seqs_scores.index(max(complete_seqs_scores)) 117 | seq = complete_seqs[i] 118 | 119 | # Hypotheses 120 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}]) 121 | hypotheses = hypotheses[0] 122 | return hypotheses -------------------------------------------------------------------------------- /modules/model.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F, init 3 | from torchvision import transforms, models 4 | import torch 5 | import random 6 | from pdb import set_trace 7 | 8 | 9 | device =torch.device("cuda" if torch.cuda.is_available() else "cpu") 10 | 11 | 12 | 13 | # create a embedding layer 14 | def create_emb(embedding_array): 15 | emb = nn.Embedding(len(word_map),embedding_dim) 16 | emb.weight.data = torch.from_numpy(embedding_array).float() 17 | return emb 18 | 19 | class Encoder(nn.Module): 20 | def __init__(self,encode_img_size, fine_tune = False): 21 | super(Encoder, self).__init__() 22 | self.enc_imgsize = encode_img_size 23 | resnet = models.resnet101(pretrained=True) 24 | self.encoder = nn.Sequential(*list(resnet.children())[:-2]) # removing final Linear layer 25 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encode_img_size,encode_img_size)) 26 | self.fine_tune = fine_tune 27 | self.fine_tune_h() 28 | 29 | def fine_tune_h(self): 30 | """ 31 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder. 32 | :param fine_tune: Allow? 33 | """ 34 | for p in self.encoder.parameters(): 35 | p.requires_grad = False 36 | 37 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4 38 | for c in list(self.encoder.children())[5:]: 39 | for p in c.parameters(): 40 | p.requires_grad = self.fine_tune 41 | 42 | def forward(self,X): 43 | out = self.encoder(X) # X is tensor of size (batch size, 3 (RGB), input height, width) 44 | out = self.adaptive_pool(out) # output (batch_size, 2048, encoded_image_size, encoded_image_size) 45 | out = out.permute(0, 2, 3, 1) 46 | out = out.view(out.size(0), -1, out.size(3)) 47 | return out 48 | 49 | class Decoder(nn.Module): 50 | def __init__(self,attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5, pretrained_embedding = None,teacher_forcing_ratio = 0): 51 | super(Decoder, self).__init__() 52 | self.attention_dim = attention_dim 53 | self.embed_dim = embed_dim 54 | self.decoder_dim = decoder_dim 55 | self.encoder_dim = encoder_dim 56 | self.vocab_size = vocab_size 57 | self.teacher_forcing_ratio = teacher_forcing_ratio 58 | self.dropout = nn.Dropout(dropout) 59 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) 60 | self.embedding = nn.Embedding(vocab_size,embed_dim) 61 | self.lstm = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) #use 62 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell 63 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell 64 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # gate 65 | self.pretrained_embedding = pretrained_embedding 66 | self.sigmoid = nn.Sigmoid() 67 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary 68 | self.init_weights() 69 | 70 | def init_weights(self): 71 | """ 72 | Initilizes some parametes with values from the uniform Dist 73 | 74 | """ 75 | self.embedding.weight.data.uniform_(0.1, 0.1) 76 | self.fc.bias.data.fill_(0) 77 | self.fc.weight.data.uniform_(-0.1,0.1) 78 | 79 | # Kaiming initialization 80 | #init.kaiming_normal_(self.init_h.weight, mode='fan_in') 81 | #init.kaiming_normal_(self.init_c.weight, mode='fan_in') 82 | #init.kaiming_normal_(self.f_beta.weight, mode='fan_in') 83 | #init.kaiming_normal_(self.fc.weight, mode='fan_in') 84 | 85 | def pretrained(self): 86 | if self.pretrained_embedding is not None: 87 | self.embedding.weight.data = torch.from_numpy(self.pretrained_embedding) 88 | 89 | def init_hidden_state(self, encoder_out): 90 | 91 | mean_encoder_out = encoder_out.mean(dim=1) 92 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim) 93 | c = self.init_c(mean_encoder_out) 94 | return h, c 95 | 96 | def forward(self,encoder_out, encoded_captions,decode_lengths,inds): 97 | batch_size = encoder_out.size(0) 98 | encoder_dim = encoder_out.size(-1) 99 | vocab_size = self.vocab_size 100 | num_pixels = encoder_out.size(1) 101 | #embeddings = self.embedding(encoded_captions) 102 | 103 | ## initililize hidden encoding 104 | h, c = self.init_hidden_state(encoder_out) 105 | 106 | #dec_out = torch.zeros(1,batch_size,self.decoder_dim).to(device) #uncomment for teacher forcing 107 | 108 | decode_lengths = decode_lengths - 1 109 | 110 | max_len = max(decode_lengths).item() 111 | 112 | 113 | # Create tensors to hold word predicion scores and alphas 114 | predictions = torch.zeros(batch_size, max_len, vocab_size) 115 | alphas = torch.zeros(batch_size, max_len, num_pixels) 116 | 117 | for t in range(max_len): 118 | batch_size_t = sum([l.item() > t for l in decode_lengths]) 119 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t], 120 | h[:batch_size_t]) 121 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim) 122 | attention_weighted_encoding = gate * attention_weighted_encoding 123 | 124 | # teacher forcing 125 | use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False 126 | 127 | 128 | inp_emb = self.embedding(encoded_captions[:batch_size_t,t]).float() if (use_teacher_forcing or t==0) else self.embedding(prev_word[:batch_size_t]).float() 129 | #self.emb2dec_dim((embeddings[:batch_size_t, t, :]).float()) use syntax for teacher forcing 130 | #inp_emb = inp_emb if (use_teacher_forcing or t==0) else dec_out.squeeze(0)[:batch_size_t] #uncomment to add teacher forcing 131 | 132 | h, c = self.lstm( 133 | torch.cat([inp_emb, attention_weighted_encoding], dim=1), 134 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim) 135 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size) 136 | predictions[:batch_size_t,t , :] = preds 137 | alphas[:batch_size_t, t, :] = alpha 138 | 139 | _,prev_word = preds.max(dim=-1) 140 | return predictions,decode_lengths, alphas, inds 141 | 142 | class Attention(nn.Module): 143 | def __init__(self, encoder_dim, decoder_dim, attention_dim): 144 | super(Attention, self).__init__() 145 | 146 | self.enc_att = nn.Linear(encoder_dim,attention_dim) 147 | self.dec_att = nn.Linear(decoder_dim,attention_dim) 148 | self.att = nn.Linear(attention_dim,1) 149 | self.relu = nn.ReLU() 150 | self.softmax = nn.Softmax(dim=1) 151 | 152 | # kaiming 153 | #init.kaiming_normal_(self.enc_att.weight, mode='fan_in') 154 | #init.kaiming_normal_(self.dec_att.weight, mode='fan_in') 155 | #init.kaiming_normal_(self.att.weight, mode='fan_in') 156 | 157 | def forward(self,encoder_out, decoder_hidden): 158 | encoder_att = self.enc_att(encoder_out) 159 | decoder_att = self.dec_att(decoder_hidden) 160 | att = self.att(self.relu(encoder_att + decoder_att.unsqueeze(1))).squeeze(2) #testing added batchnorm 161 | alpha = self.softmax(att) 162 | attention_weighted_encoding = (encoder_out*alpha.unsqueeze(2)).sum(dim=1) 163 | 164 | return attention_weighted_encoding, alpha -------------------------------------------------------------------------------- /modules/ori_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torchvision 4 | from pdb import set_trace 5 | 6 | 7 | class Encoder(nn.Module): 8 | """ 9 | Encoder. 10 | """ 11 | 12 | def __init__(self, encoded_image_size=14): 13 | super(Encoder, self).__init__() 14 | self.enc_image_size = encoded_image_size 15 | 16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101 17 | 18 | # Remove linear and pool layers (since we're not doing classification) 19 | modules = list(resnet.children())[:-2] 20 | self.resnet = nn.Sequential(*modules) 21 | 22 | # Resize image to fixed size to allow input images of variable size 23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size)) 24 | 25 | self.fine_tune() 26 | 27 | def forward(self, images): 28 | """ 29 | Forward propagation. 30 | 31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size) 32 | :return: encoded images 33 | """ 34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32) 35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size) 36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048) 37 | return out 38 | 39 | def fine_tune(self, fine_tune=True): 40 | """ 41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder. 42 | 43 | :param fine_tune: Allow? 44 | """ 45 | for p in self.resnet.parameters(): 46 | p.requires_grad = False 47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4 48 | for c in list(self.resnet.children())[5:]: 49 | for p in c.parameters(): 50 | p.requires_grad = fine_tune 51 | 52 | 53 | class Attention(nn.Module): 54 | """ 55 | Attention Network. 56 | """ 57 | 58 | def __init__(self, encoder_dim, decoder_dim, attention_dim): 59 | """ 60 | :param encoder_dim: feature size of encoded images 61 | :param decoder_dim: size of decoder's RNN 62 | :param attention_dim: size of the attention network 63 | """ 64 | super(Attention, self).__init__() 65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image 66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output 67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed 68 | self.relu = nn.ReLU() 69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights 70 | 71 | def forward(self, encoder_out, decoder_hidden): 72 | """ 73 | Forward propagation. 74 | 75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim) 77 | :return: attention weighted encoding, weights 78 | """ 79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim) 80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim) 81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels) 82 | alpha = self.softmax(att) # (batch_size, num_pixels) 83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim) 84 | 85 | return attention_weighted_encoding, alpha 86 | 87 | 88 | class DecoderWithAttention(nn.Module): 89 | """ 90 | Decoder. 91 | """ 92 | 93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): 94 | """ 95 | :param attention_dim: size of attention network 96 | :param embed_dim: embedding size 97 | :param decoder_dim: size of decoder's RNN 98 | :param vocab_size: size of vocabulary 99 | :param encoder_dim: feature size of encoded images 100 | :param dropout: dropout 101 | """ 102 | super(DecoderWithAttention, self).__init__() 103 | 104 | self.encoder_dim = encoder_dim 105 | self.attention_dim = attention_dim 106 | self.embed_dim = embed_dim 107 | self.decoder_dim = decoder_dim 108 | self.vocab_size = vocab_size 109 | self.dropout = dropout 110 | 111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network 112 | 113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer 114 | self.dropout = nn.Dropout(p=self.dropout) 115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell 116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell 117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell 118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate 119 | self.sigmoid = nn.Sigmoid() 120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary 121 | self.init_weights() # initialize some layers with the uniform distribution 122 | 123 | def init_weights(self): 124 | """ 125 | Initializes some parameters with values from the uniform distribution, for easier convergence. 126 | """ 127 | self.embedding.weight.data.uniform_(-0.1, 0.1) 128 | self.fc.bias.data.fill_(0) 129 | self.fc.weight.data.uniform_(-0.1, 0.1) 130 | 131 | def load_pretrained_embeddings(self, embeddings): 132 | """ 133 | Loads embedding layer with pre-trained embeddings. 134 | 135 | :param embeddings: pre-trained embeddings 136 | """ 137 | self.embedding.weight = nn.Parameter(embeddings) 138 | 139 | def fine_tune_embeddings(self, fine_tune=True): 140 | """ 141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings). 142 | 143 | :param fine_tune: Allow? 144 | """ 145 | for p in self.embedding.parameters(): 146 | p.requires_grad = fine_tune 147 | 148 | def init_hidden_state(self, encoder_out): 149 | """ 150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images. 151 | 152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 153 | :return: hidden state, cell state 154 | """ 155 | mean_encoder_out = encoder_out.mean(dim=1) 156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim) 157 | c = self.init_c(mean_encoder_out) 158 | return h, c 159 | 160 | def forward(self, encoder_out, encoded_captions, caption_lengths): 161 | """ 162 | Forward propagation. 163 | 164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim) 165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length) 166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1) 167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices 168 | """ 169 | 170 | batch_size = encoder_out.size(0) 171 | encoder_dim = encoder_out.size(-1) 172 | vocab_size = self.vocab_size 173 | 174 | # Flatten image 175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim) 176 | num_pixels = encoder_out.size(1) 177 | 178 | # Sort input data by decreasing lengths; why? apparent below 179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True) 180 | encoder_out = encoder_out[sort_ind] 181 | encoded_captions = encoded_captions[sort_ind] 182 | 183 | # Embedding 184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim) 185 | 186 | # Initialize LSTM state 187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim) 188 | 189 | # We won't decode at the position, since we've finished generating as soon as we generate 190 | # So, decoding lengths are actual lengths - 1 191 | decode_lengths = (caption_lengths - 1).tolist() 192 | 193 | # Create tensors to hold word predicion scores and alphas 194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device) 195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device) 196 | 197 | # At each time-step, decode by 198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output 199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding 200 | for t in range(max(decode_lengths)): 201 | batch_size_t = sum([l > t for l in decode_lengths]) 202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t], 203 | h[:batch_size_t]) 204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim) 205 | attention_weighted_encoding = gate * attention_weighted_encoding 206 | h, c = self.decode_step( 207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1), 208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim) 209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size) 210 | predictions[:batch_size_t, t, :] = preds 211 | alphas[:batch_size_t, t, :] = alpha 212 | 213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind 214 | -------------------------------------------------------------------------------- /web_app/modules/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torchvision 4 | from pdb import set_trace 5 | 6 | 7 | class Encoder(nn.Module): 8 | """ 9 | Encoder. 10 | """ 11 | 12 | def __init__(self, encoded_image_size=14): 13 | super(Encoder, self).__init__() 14 | self.enc_image_size = encoded_image_size 15 | 16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101 17 | 18 | # Remove linear and pool layers (since we're not doing classification) 19 | modules = list(resnet.children())[:-2] 20 | self.resnet = nn.Sequential(*modules) 21 | 22 | # Resize image to fixed size to allow input images of variable size 23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size)) 24 | 25 | self.fine_tune() 26 | 27 | def forward(self, images): 28 | """ 29 | Forward propagation. 30 | 31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size) 32 | :return: encoded images 33 | """ 34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32) 35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size) 36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048) 37 | return out 38 | 39 | def fine_tune(self, fine_tune=True): 40 | """ 41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder. 42 | 43 | :param fine_tune: Allow? 44 | """ 45 | for p in self.resnet.parameters(): 46 | p.requires_grad = False 47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4 48 | for c in list(self.resnet.children())[5:]: 49 | for p in c.parameters(): 50 | p.requires_grad = fine_tune 51 | 52 | 53 | class Attention(nn.Module): 54 | """ 55 | Attention Network. 56 | """ 57 | 58 | def __init__(self, encoder_dim, decoder_dim, attention_dim): 59 | """ 60 | :param encoder_dim: feature size of encoded images 61 | :param decoder_dim: size of decoder's RNN 62 | :param attention_dim: size of the attention network 63 | """ 64 | super(Attention, self).__init__() 65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image 66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output 67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed 68 | self.relu = nn.ReLU() 69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights 70 | 71 | def forward(self, encoder_out, decoder_hidden): 72 | """ 73 | Forward propagation. 74 | 75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim) 77 | :return: attention weighted encoding, weights 78 | """ 79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim) 80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim) 81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels) 82 | alpha = self.softmax(att) # (batch_size, num_pixels) 83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim) 84 | 85 | return attention_weighted_encoding, alpha 86 | 87 | 88 | class DecoderWithAttention(nn.Module): 89 | """ 90 | Decoder. 91 | """ 92 | 93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): 94 | """ 95 | :param attention_dim: size of attention network 96 | :param embed_dim: embedding size 97 | :param decoder_dim: size of decoder's RNN 98 | :param vocab_size: size of vocabulary 99 | :param encoder_dim: feature size of encoded images 100 | :param dropout: dropout 101 | """ 102 | super(DecoderWithAttention, self).__init__() 103 | 104 | self.encoder_dim = encoder_dim 105 | self.attention_dim = attention_dim 106 | self.embed_dim = embed_dim 107 | self.decoder_dim = decoder_dim 108 | self.vocab_size = vocab_size 109 | self.dropout = dropout 110 | 111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network 112 | 113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer 114 | self.dropout = nn.Dropout(p=self.dropout) 115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell 116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell 117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell 118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate 119 | self.sigmoid = nn.Sigmoid() 120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary 121 | self.init_weights() # initialize some layers with the uniform distribution 122 | 123 | def init_weights(self): 124 | """ 125 | Initializes some parameters with values from the uniform distribution, for easier convergence. 126 | """ 127 | self.embedding.weight.data.uniform_(-0.1, 0.1) 128 | self.fc.bias.data.fill_(0) 129 | self.fc.weight.data.uniform_(-0.1, 0.1) 130 | 131 | def load_pretrained_embeddings(self, embeddings): 132 | """ 133 | Loads embedding layer with pre-trained embeddings. 134 | 135 | :param embeddings: pre-trained embeddings 136 | """ 137 | self.embedding.weight = nn.Parameter(embeddings) 138 | 139 | def fine_tune_embeddings(self, fine_tune=True): 140 | """ 141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings). 142 | 143 | :param fine_tune: Allow? 144 | """ 145 | for p in self.embedding.parameters(): 146 | p.requires_grad = fine_tune 147 | 148 | def init_hidden_state(self, encoder_out): 149 | """ 150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images. 151 | 152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 153 | :return: hidden state, cell state 154 | """ 155 | mean_encoder_out = encoder_out.mean(dim=1) 156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim) 157 | c = self.init_c(mean_encoder_out) 158 | return h, c 159 | 160 | def forward(self, encoder_out, encoded_captions, caption_lengths): 161 | """ 162 | Forward propagation. 163 | 164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim) 165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length) 166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1) 167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices 168 | """ 169 | 170 | batch_size = encoder_out.size(0) 171 | encoder_dim = encoder_out.size(-1) 172 | vocab_size = self.vocab_size 173 | 174 | # Flatten image 175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim) 176 | num_pixels = encoder_out.size(1) 177 | 178 | # Sort input data by decreasing lengths; why? apparent below 179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True) 180 | encoder_out = encoder_out[sort_ind] 181 | encoded_captions = encoded_captions[sort_ind] 182 | 183 | # Embedding 184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim) 185 | 186 | # Initialize LSTM state 187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim) 188 | 189 | # We won't decode at the position, since we've finished generating as soon as we generate 190 | # So, decoding lengths are actual lengths - 1 191 | decode_lengths = (caption_lengths - 1).tolist() 192 | 193 | # Create tensors to hold word predicion scores and alphas 194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device) 195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device) 196 | 197 | # At each time-step, decode by 198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output 199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding 200 | for t in range(max(decode_lengths)): 201 | batch_size_t = sum([l > t for l in decode_lengths]) 202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t], 203 | h[:batch_size_t]) 204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim) 205 | attention_weighted_encoding = gate * attention_weighted_encoding 206 | h, c = self.decode_step( 207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1), 208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim) 209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size) 210 | predictions[:batch_size_t, t, :] = preds 211 | alphas[:batch_size_t, t, :] = alpha 212 | 213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind 214 | -------------------------------------------------------------------------------- /web_app/ori_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torchvision 4 | from pdb import set_trace 5 | 6 | 7 | class Encoder(nn.Module): 8 | """ 9 | Encoder. 10 | """ 11 | 12 | def __init__(self, encoded_image_size=14): 13 | super(Encoder, self).__init__() 14 | self.enc_image_size = encoded_image_size 15 | 16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101 17 | 18 | # Remove linear and pool layers (since we're not doing classification) 19 | modules = list(resnet.children())[:-2] 20 | self.resnet = nn.Sequential(*modules) 21 | 22 | # Resize image to fixed size to allow input images of variable size 23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size)) 24 | 25 | self.fine_tune() 26 | 27 | def forward(self, images): 28 | """ 29 | Forward propagation. 30 | 31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size) 32 | :return: encoded images 33 | """ 34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32) 35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size) 36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048) 37 | return out 38 | 39 | def fine_tune(self, fine_tune=True): 40 | """ 41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder. 42 | 43 | :param fine_tune: Allow? 44 | """ 45 | for p in self.resnet.parameters(): 46 | p.requires_grad = False 47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4 48 | for c in list(self.resnet.children())[5:]: 49 | for p in c.parameters(): 50 | p.requires_grad = fine_tune 51 | 52 | 53 | class Attention(nn.Module): 54 | """ 55 | Attention Network. 56 | """ 57 | 58 | def __init__(self, encoder_dim, decoder_dim, attention_dim): 59 | """ 60 | :param encoder_dim: feature size of encoded images 61 | :param decoder_dim: size of decoder's RNN 62 | :param attention_dim: size of the attention network 63 | """ 64 | super(Attention, self).__init__() 65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image 66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output 67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed 68 | self.relu = nn.ReLU() 69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights 70 | 71 | def forward(self, encoder_out, decoder_hidden): 72 | """ 73 | Forward propagation. 74 | 75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim) 77 | :return: attention weighted encoding, weights 78 | """ 79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim) 80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim) 81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels) 82 | alpha = self.softmax(att) # (batch_size, num_pixels) 83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim) 84 | 85 | return attention_weighted_encoding, alpha 86 | 87 | 88 | class DecoderWithAttention(nn.Module): 89 | """ 90 | Decoder. 91 | """ 92 | 93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): 94 | """ 95 | :param attention_dim: size of attention network 96 | :param embed_dim: embedding size 97 | :param decoder_dim: size of decoder's RNN 98 | :param vocab_size: size of vocabulary 99 | :param encoder_dim: feature size of encoded images 100 | :param dropout: dropout 101 | """ 102 | super(DecoderWithAttention, self).__init__() 103 | 104 | self.encoder_dim = encoder_dim 105 | self.attention_dim = attention_dim 106 | self.embed_dim = embed_dim 107 | self.decoder_dim = decoder_dim 108 | self.vocab_size = vocab_size 109 | self.dropout = dropout 110 | 111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network 112 | 113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer 114 | self.dropout = nn.Dropout(p=self.dropout) 115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell 116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell 117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell 118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate 119 | self.sigmoid = nn.Sigmoid() 120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary 121 | self.init_weights() # initialize some layers with the uniform distribution 122 | 123 | def init_weights(self): 124 | """ 125 | Initializes some parameters with values from the uniform distribution, for easier convergence. 126 | """ 127 | self.embedding.weight.data.uniform_(-0.1, 0.1) 128 | self.fc.bias.data.fill_(0) 129 | self.fc.weight.data.uniform_(-0.1, 0.1) 130 | 131 | def load_pretrained_embeddings(self, embeddings): 132 | """ 133 | Loads embedding layer with pre-trained embeddings. 134 | 135 | :param embeddings: pre-trained embeddings 136 | """ 137 | self.embedding.weight = nn.Parameter(embeddings) 138 | 139 | def fine_tune_embeddings(self, fine_tune=True): 140 | """ 141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings). 142 | 143 | :param fine_tune: Allow? 144 | """ 145 | for p in self.embedding.parameters(): 146 | p.requires_grad = fine_tune 147 | 148 | def init_hidden_state(self, encoder_out): 149 | """ 150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images. 151 | 152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim) 153 | :return: hidden state, cell state 154 | """ 155 | mean_encoder_out = encoder_out.mean(dim=1) 156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim) 157 | c = self.init_c(mean_encoder_out) 158 | return h, c 159 | 160 | def forward(self, encoder_out, encoded_captions, caption_lengths): 161 | """ 162 | Forward propagation. 163 | 164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim) 165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length) 166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1) 167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices 168 | """ 169 | 170 | batch_size = encoder_out.size(0) 171 | encoder_dim = encoder_out.size(-1) 172 | vocab_size = self.vocab_size 173 | 174 | # Flatten image 175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim) 176 | num_pixels = encoder_out.size(1) 177 | 178 | # Sort input data by decreasing lengths; why? apparent below 179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True) 180 | encoder_out = encoder_out[sort_ind] 181 | encoded_captions = encoded_captions[sort_ind] 182 | 183 | # Embedding 184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim) 185 | 186 | # Initialize LSTM state 187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim) 188 | 189 | # We won't decode at the position, since we've finished generating as soon as we generate 190 | # So, decoding lengths are actual lengths - 1 191 | decode_lengths = (caption_lengths - 1).tolist() 192 | 193 | # Create tensors to hold word predicion scores and alphas 194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device) 195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device) 196 | 197 | # At each time-step, decode by 198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output 199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding 200 | for t in range(max(decode_lengths)): 201 | batch_size_t = sum([l > t for l in decode_lengths]) 202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t], 203 | h[:batch_size_t]) 204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim) 205 | attention_weighted_encoding = gate * attention_weighted_encoding 206 | h, c = self.decode_step( 207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1), 208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim) 209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size) 210 | predictions[:batch_size_t, t, :] = preds 211 | alphas[:batch_size_t, t, :] = alpha 212 | 213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind 214 | -------------------------------------------------------------------------------- /modules/custom_callbacks.py: -------------------------------------------------------------------------------- 1 | from statistics import mean 2 | from fastai.callback import Callback 3 | import copy as cp 4 | from torch import nn 5 | from fastai.vision import * 6 | from pathlib import Path, posixpath 7 | from pdb import set_trace 8 | from nltk.translate.bleu_score import corpus_bleu 9 | from torch.nn.utils.rnn import pack_padded_sequence 10 | 11 | 12 | 13 | 14 | 15 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 16 | 17 | 18 | def beam_search(mod, img,vocab = None, beam_size = 5): 19 | with torch.no_grad(): 20 | k = beam_size 21 | 22 | ## imput tensor preparation 23 | img = img.unsqueeze(0) #treating as batch of size 1 24 | 25 | ## model prepartion 26 | #mod = learn.model 27 | 28 | # encoder output 29 | encoder_out = mod.encoder(img) 30 | encoder_dim = encoder_out.size(-1) 31 | num_pixels = encoder_out.size(1) 32 | 33 | # expand or repeat 'k' time 34 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim) 35 | 36 | # Tensor to store top k previous words at each step; now they're just 37 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1) 38 | 39 | # Tensor to store top k sequences; now they're just 40 | seqs = k_prev_words # (k, 1) 41 | 42 | # Tensor to store top k sequences' scores; now they're just 0 43 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1) 44 | 45 | # Lists to store completed sequences and scores 46 | complete_seqs = list() 47 | complete_seqs_scores = list() 48 | 49 | # Start decoding 50 | step = 1 51 | h, c = mod.decoder.init_hidden_state(encoder_out) 52 | 53 | references = list() 54 | hypotheses = list() 55 | 56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit 57 | while True: 58 | embeddings = mod.decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim) 59 | awe, _ = mod.decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels) 60 | gate = mod.decoder.sigmoid(mod.decoder.f_beta(h)) 61 | awe = (gate * awe) 62 | 63 | h, c = mod.decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c)) 64 | scores = mod.decoder.fc(h) 65 | scores = F.log_softmax(scores, dim=1) 66 | 67 | 68 | # Add scores to prev scores 69 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size) 70 | 71 | # For the first step, all k points will have the same scores (since same k previous words, h, c) 72 | if step == 1: 73 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s) 74 | else: 75 | # Unroll and find top scores, and their unrolled indices 76 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s) 77 | 78 | # Convert unrolled indices to actual indices of scores 79 | prev_word_inds = top_k_words / len(vocab) # (s) 80 | next_word_inds = top_k_words % len(vocab) # (s) 81 | 82 | # Add new words to sequences 83 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words 84 | 85 | # Which sequences are incomplete (didn't reach )? 86 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if 87 | next_word != vocab['']] 88 | 89 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds)) 90 | 91 | # Set aside complete sequences 92 | if len(complete_inds) > 0: 93 | complete_seqs.extend(seqs[complete_inds].tolist()) 94 | complete_seqs_scores.extend(top_k_scores[complete_inds]) 95 | k -= len(complete_inds) # reduce beam length accordingly 96 | 97 | # Proceed with incomplete sequences 98 | if k == 0: 99 | break 100 | seqs = seqs[incomplete_inds] 101 | h = h[prev_word_inds[incomplete_inds]] 102 | c = c[prev_word_inds[incomplete_inds]] 103 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]] 104 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) 105 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1) 106 | 107 | 108 | # Break if things have been going on too long 109 | if step > 50: 110 | break 111 | step += 1 112 | 113 | i = complete_seqs_scores.index(max(complete_seqs_scores)) 114 | seq = complete_seqs[i] 115 | 116 | # Hypotheses 117 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}]) 118 | 119 | return hypotheses 120 | 121 | 122 | # Loss Function 123 | def loss_func(input,targets, lamb=1): 124 | pred, decode_lengths, alphas,_ = input 125 | pred = pack_padded_sequence(pred, decode_lengths, batch_first=True).to(device) 126 | targs = pack_padded_sequence(targets, decode_lengths, batch_first=True).to(device) 127 | loss = nn.CrossEntropyLoss().to(device)(pred.data, targs.data.long()) 128 | loss += (lamb*((1. - alphas.sum(dim=1)) ** 2.).mean()).to(device) #stochastic attention 129 | return loss #loss(pred.data.long(), targets.data.long()) 130 | 131 | 132 | 133 | def topK_accuracy(input, targets, k=5): 134 | """ 135 | Computes top-k accuracy, from predicted and true labels. 136 | :param scores: scores from the model 137 | :param targets: true labels 138 | :param k: k in top-k accuracy 139 | :return: top-k accuracy 140 | """ 141 | pred, decode_lengths, alphas,_ = input 142 | batch_size = targets.size(0) 143 | scores = pack_padded_sequence(pred, decode_lengths, batch_first=True).to(device) 144 | targ = pack_padded_sequence(targets, decode_lengths, batch_first=True).to(device) 145 | batch_size = targ.data.size(0) 146 | _, ind = scores.data.topk(k, 1, True, True) 147 | correct = ind.eq(targ.data.view(-1, 1).expand_as(ind)) 148 | correct_total = correct.view(-1).float().sum() # 0D tensor 149 | return correct_total * (100.0 / batch_size) 150 | 151 | 152 | class TeacherForcingCallback(Callback): 153 | def __init__(self, learn:Learner): 154 | super().__init__() 155 | self.learn = learn 156 | 157 | def on_batch_begin(self, epoch,**kwargs): 158 | self.learn.model.decoder.teacher_forcing_ratio = (10 - epoch) * 0.1 if epoch < 10 else 0 159 | 160 | def on_batch_end(self,**kwargs): 161 | self.learn.model.decoder.teacher_forcing_ratio = 0. 162 | 163 | class GradientClipping(LearnerCallback): 164 | "Gradient clipping during training." 165 | def __init__(self, learn:Learner, clip:float = 0.3): 166 | super().__init__(learn) 167 | self.clip = clip 168 | 169 | def on_backward_end(self, **kwargs): 170 | "Clip the gradient before the optimizer step." 171 | if self.clip: nn.utils.clip_grad_norm_(self.learn.model.parameters(), self.clip) 172 | 173 | 174 | 175 | class BleuMetric(Callback): 176 | def __init__(self,metadata = None, vocab = None): 177 | super().__init__() 178 | self.vocab = vocab 179 | self.metadata = metadata 180 | 181 | def on_epoch_begin(self, **kwargs): 182 | self.bleureferences = list() 183 | self.bleucandidates = list() 184 | 185 | 186 | def on_batch_end(self, last_output, last_target, **kwargs): 187 | pred, decode_lengths,_,inds = last_output 188 | references = self.metadata.numericalized_ref.loc[inds.tolist()] 189 | _,pred_words = pred.max(dim=-1) 190 | pred_words, decode_lengths,references = list(pred_words), decode_lengths, list(references) 191 | hypotheses = list() 192 | for i,cap in enumerate(pred_words): hypotheses.append([x for x in cap.tolist()[:decode_lengths[i]] if x not in {self.vocab[''], self.vocab[''], self.vocab['']}]) 193 | #for i,cap in enumerate(pred_words): hypotheses.append([x for x in cap.tolist() if x not in {self.vocab['xxunk'], self.vocab['xxpad'], self.vocab['xxbos'], self.vocab['xxeos'],self.vocab['xxfld'],self.vocab['xxmaj'],self.vocab['xxup'],self.vocab['xxrep'],self.vocab['xxwrep']}]) 194 | self.bleureferences.extend(references) 195 | self.bleucandidates.extend(hypotheses) 196 | 197 | 198 | 199 | 200 | def on_epoch_end(self, last_metrics, **kwargs): 201 | assert len(self.bleureferences) == len(self.bleucandidates) 202 | # print('\n'+' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[0]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[0][0]])) 203 | # print(' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[25]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[25][0]])) 204 | # print(' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[99]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[99][0]])+'\n') 205 | 206 | bleu4 = corpus_bleu(self.bleureferences, self.bleucandidates) 207 | return add_metrics(last_metrics,bleu4) 208 | 209 | 210 | class BeamSearchBleu4(LearnerCallback): 211 | def __init__(self,learn:Learner,metadata = None, vocab = None, beam_fn = beam_search): 212 | super().__init__(learn) 213 | self.beam_fn = beam_fn 214 | self.vocab = vocab 215 | self.metadata = metadata 216 | 217 | def on_epoch_begin(self, **kwargs): 218 | self.beamreferences = list() 219 | self.beamcandidates = list() 220 | 221 | def on_batch_end(self,last_input, last_target, **kwargs): 222 | model_copy = cp.deepcopy(self.learn.model) 223 | imgs,_,_,inds = last_input 224 | references = self.metadata.numericalized_ref.loc[inds.tolist()] 225 | references = list(references) 226 | hypotheses = list() 227 | for img in imgs: hypotheses.append(self.beam_fn(model_copy,img,self.vocab)[0]) 228 | self.beamreferences.extend(references) 229 | self.beamcandidates.extend(hypotheses) 230 | 231 | def on_epoch_end(self, last_metrics, **kwargs): 232 | assert len(self.beamreferences) == len(self.beamcandidates) 233 | print(' '.join([list(self.vocab.keys())[i-1] for i in self.beamcandidates[8]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.beamreferences[8][0]])) 234 | return add_metrics(last_metrics,corpus_bleu(self.beamreferences, self.beamcandidates)) 235 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Image Caption Generation 2 | 3 | #### Implementation of ***Show, Attend and Tell*** paper 4 | 5 | - [Image Caption Generation](#image-caption-generation) 6 | - [Demo](#demo) 7 | - [What's in this repo?](#whats-in-this-repo) 8 | - [Dataset Description](#dataset-description) 9 | - [Input files preparation](#input-files-preparation) 10 | - [Model architecture](#model-architecture) 11 | - [Encoder](#encoder) 12 | - [Attention Layer](#attention-layer) 13 | - [Decoder](#decoder) 14 | - [Training (using Fastai)](#training-using-fastai) 15 | - [Fastai utilities](#fastai-utilities) 16 | - [Training in Stages](#training-in-stages) 17 | - [Model intrpretation](#model-intrpretation) 18 | - [Technology used](#technology-used) 19 | - [Credits](#credits) 20 | - [Creator](#creator) 21 | 22 | 23 | ## Demo 24 | ![](snapshots/caption_gen.gif) 25 | 26 | ## What's in this repo? 27 | * [main-Finalized.ipynb](main-Finalized.ipynb) - Notebook with all the preprocessing, data prepartion, and model building training steps. 28 | * [modules/model.py](modules/model.py) - Pytorch implementation of model architecture. 29 | * [modules/custom_callbacks.py](modules/custom_callbacks.py) - Fastai Callback utilities such as Teacher forcing, gradient clipping, loss and validation metric functions. 30 | * [web_app](web_app) - This directory contains model deployment setup files. 31 | 32 | ## Dataset Description 33 | 34 | https://www.kaggle.com/ming666/flicker8k-dataset 35 | 36 | **Flickr8k** Dataset consisting of around 8,000 images that are each paired with five different captions which provide clear descriptions of the salient entities and events. The images were chosen from six different Flickr groups, and tend not to contain any well-known people or locations, but were manually selected to depict a variety of scenes and situations. 6000 are used for training, 1000 for test and 1000 for development. 37 | 38 | 39 | 40 | ## Input files preparation 41 | 42 | #### 1. preparation of vocabulary dictionary. 43 | 44 | The caption labels needs to be converted into numbers as a network does not accept strings as labels. we need a look-up dictionary and store word to numeric mappings in it. 45 | 46 | Along with it, caption lengths are also computed. Caption lengths are used for optimizing training (discussed in detail in the training part). 47 | 48 | 49 | #### 2. Create Dataset class 50 | 51 | In PyTorch, for Deep learning tasks, inputs are fed in batches because of memory constraints. To facilitate this we should create a class called **Dataset** that facilitates batch creation and loading. 52 | 53 | The primary function of Dataset is stores the input paths. This class will be used by Pytorch's *DataLoader()* for loading images in batches. 54 | 55 | #### 3. Create Dataloader object 56 | 57 | The purpose of the **Dataloader** is to load a batch of inputs and labels pairs to be fed into the network. 58 | 59 | It is always a good idea to sort by order of captions length for faster computation. On validation set, **SortSampler** funtion from *Fastai* is used which is built on top of PyTorch's **Sampler**. On the training set, **SortishSampler** that sorts data by order of length with a bit of randomness is used. The sampler return iterator of indices. 60 | 61 | 62 | #### 4. Create Pad_collate function 63 | Since the captions lengths are of different lengths, padding should be added for shorter captions to bring them to same length as PyTorch expects caption lengths to be of the same size. 64 | 65 | Funtion collect samples and return labels tensor with padding. This funtion is passed as an argment( ```collate_fn``` ) while creating ```DataLoader``` object. 66 | 67 | ## Model architecture 68 | 69 | The network architecture consists of three components i.e encoder, Attention, and decoder. 70 | 71 | ### Encoder 72 | 73 | The encoder is a convolution neural network that takes in raw images as input and outputs extracted features as encoded images. The extractor produces **L** (no of output convolution layers) vectors each of **D**-dimension (no of pixels) corresponds to part of the image thus indicates **L** different features at different locations have been identified. 74 | 75 | For the encoder part, I have used **Resnet-101** architecture pre-trained on **Imagenet**. Since Resnet is trained for classifying different objects last Linear layer outputs 1-d prbability tensor. But, our objective is to get feature images so we have to retain only convolution layers and drop the last feed-forward layers. 76 | 77 | ### Attention Layer 78 | 79 | The attention model generates attention weights at every step based on previous step (**h[t-1]**) hidden state vector it receives from decoder. Hidden state carries information about context the caption that has been generated so far. 80 | 81 | ### Decoder 82 | 83 | The decoder is the one that generates captions (one word at a step) has LSTM network architecture. The decoder takes attention weighted hidden state which is an improvised version of decoder hidden state at step **t-1** that tells which part of the image should be focused to generate the next word. 84 | 85 | The flow is depicted in the following image: 86 | ![](snapshots/model.png) 87 | 88 | #### Model architecture dimensions 89 | ```py 90 | embedding input dimension = 300 91 | attention dimension = 512 92 | decoder dimension = 512 93 | decoder dropout = 0.5 94 | encoder output dimension = 2048 95 | ``` 96 | 97 | 98 | ## Training (using Fastai) 99 | 100 | As we are using pre-trained weights for the encoder which has been trained on the Imagenet dataset consisting of images of 1000's of different objects, that most likely includes objects found in our dataset. Therefore, the network need not require much of tuning. On the other hand, the decoder has to learn a lot as it starts language modeling from scratch. 101 | 102 | So, it is better to train just decoder part (fine_tune off) for the first few epochs until we bring both of them to the same level then train the entire network for the next few epochs. In this way, we can save computational time involved in encoder's gradient computation while the decoder takes most of the updation in the initial few epochs. 103 | 104 | Training decoder from scratch requires a lot of computation hence more time. Instead, we can use pre-trained word embeddings (word represent as a numeric vector) to train embedding layer output of which is passed into decoder along with the previous hidden state. 105 | 106 | 107 | ### Fastai utilities 108 | 109 | Fastai is deep learning framework built on top of PyTorch with implementation of variuos state of the art methods. It provides a smooth API making it easier for most important deep learning applications. 110 | 111 | * **lr _finder** - It will do a mock training by going over a large range of learning rates, then plot them against the losses. We will pick a value a bit before the minimum, where the loss still improves. 112 | 113 | ![](snapshots/lr_find.png) 114 | 115 | * **fit_one_cycle** - Method is implementation of one cycle policy. lr goes up to max and comes down for one cycle of passing through all mini-batches. In one fit cycle takes entire input and divides into batches of size 'bs'. then start with lr_min for the first batch increase gradually for next batches and when the batch number reaches 30 percent of total batches, lr reaches lr_max and then starts going down and reaches lr_min again at last batch. 116 | 117 | The original 1cycle policy has three steps: 118 | 119 | 1. We progressively increase our learning rate from lr_max/div_factor to lr_max and at the same time, we progressively decrease our momentum from mom_max to mom_min. 120 | 2. We do the exact opposite: we progressively decrease our learning rate from lr_max to lr_max/div_factor and at the same time, we progressively increase our momentum from mom_min to mom_max. 121 | 3. We further decrease our learning rate from lr_max/div_factor to lr_max/(div_factor x 100) and we keep momentum steady at mom_max. 122 | 123 | 124 | **Clipping gradients**: 125 | * Gradients can vanish because they are continuously multiplied by numbers less than one. This is called the vanishing gradient problem. 126 | 127 | * It has little effect on learning, but if you have a "bad minibatch" that would cause gradients to explode for some reason, the clipping prevents that iteration from messing up your entire model. 128 | 129 | **Early Stopping** 130 | 131 | * The authors of *Show, Attend and Tell paper* observe that correlation between the loss and the BLEU score breaks down after a point, so they recommend to stop training early on when the BLEU score starts degrading or stops improving. 132 | 133 | ### Training in Stages 134 | 135 | In the first stage, the model is trained with encoder part froze i.e only decoder weights allowed to be updated for faster training. The model was run with a batch of ```25``` images for 12 epochs using ```Adam()``` optimizer with a learning rate of ```4e-04``` 136 | 137 | **Results**: 138 | epoch | train_loss | valid_loss | topK_accuracy | bleu_metric | time 139 | ------|------------|------------|---------------|-------------|----- 140 | 0 | 4.649515 | 4.511709 | 58.052895 | 0.106774 | 18:29 141 | 1 | 4.234053 | 4.231682 | 62.291264 | 0.125098 | 17:41 142 | 2 | 4.048578 | 4.089489 | 64.173981 | 0.136820 | 17:13 143 | 3 | 3.918362 | 4.001822 | 65.538071 | 0.142155 | 17:17 144 | 4 | 3.820599 | 3.946904 | 66.606972 | 0.147784 | 16:14 145 | 5 | 3.676066 | 3.904321 | 67.152397 | 0.140314 | 16:08 146 | 6 | 3.632400 | 3.884929 | 67.566093 | 0.145791 | 16:08 147 | 7 | 3.533431 | 3.860997 | 68.075752 | 0.154064 | 16:08 148 | 8 | 3.480697 | 3.852596 | 68.334770 | 0.151733 | 16:08 149 | 9 | 3.406797 | 3.853946 | 68.293274 | 0.150269 | 16:08 150 | 151 | ![](snapshots/loss_stage1.png) 152 | 153 | 154 | In the second stage, the model is trained with the encoder part unfrozen condition. The model was run with batch of ```5``` images for 10 epochs using ```Adam()``` optimizer with ```1e-04``` learning rate adopting ```one cycle policy`` 155 | 156 | **Results**: 157 | 158 | epoch | train_loss | valid_loss | topK_accuracy | bleu_metric | time 159 | ------|------------|------------|---------------|-------------|----- 160 | 0 | 3.547406 | 3.914244 | 67.741348 | 0.134781 | 40:54 161 | 1 | 3.717416 | 3.972998 | 66.951462 | 0.142118 | 42:23 162 | 2 | 3.721014 | 3.950798 | 67.553833 | 0.150034 | 42:25 163 | 3 | 3.566937 | 3.928402 | 68.072418 | 0.155043 | 41:56 164 | 4 | 3.473794 | 3.910442 | 68.245857 | 0.163102 | 40:16 165 | 5 | 3.350647 | 3.915221 | 68.383591 | 0.161378 | 39:18 166 | 167 | 168 | ![](snapshots/loss_stage2.png) 169 | 170 | **Evaluation Beam search** 171 | 172 | **Beam search**: Involves selecting words with top ```k```(beam width) scores rather than a word with the best score at each step. Beam Search is useful for any language modeling problem because it finds the most optimal sequence. 173 | 174 | ![](snapshots/beam_search.png) 175 | 176 |
177 | 178 | **Validation results** 179 | 180 | Beam Size | Test BLEU-4 181 | ----------|------------- 182 | 1 | 21.8 183 | 3 | 23.46 184 | 5 | 23.9 185 | 186 | 187 | ### Model intrpretation 188 | 189 | ![](snapshots/eval.jpeg) 190 | 191 | 192 | ## Technology used 193 | 194 | ![](https://forthebadge.com/images/badges/made-with-python.svg) 195 | 196 | [](https://pytorch.org/) 197 | [](https://www.fast.ai/) 198 | [](https://flask.palletsprojects.com/en/1.1.x/) 199 | []() 200 | [](https://jquery.com/) 201 | 202 | 203 |
204 | 205 | ## Credits 206 | 207 | 1. [Show, Attend and Tell - paper (arxiv)](https://arxiv.org/abs/1502.03044) 208 | 209 | 2. [Illustrated Guide to LSTM's and GRU's - Medium](https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21) 210 | 211 | 2. [a-PyTorch-Tutorial-to-Image-Captioning - GitHub](https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning) 212 | 213 |
214 | 215 | ------ 216 | ## Creator 217 | [](https://skumar-djangoblog.herokuapp.com/) 218 | -------------------------------------------------------------------------------- /web_app/static/Vocab_5_cap_per_img_2_min_word_freq.json: -------------------------------------------------------------------------------- 1 | {"a": 1, "black": 2, "dog": 3, "is": 4, "running": 5, "after": 6, "white": 7, "in": 8, "the": 9, "snow": 10, "chasing": 11, "brown": 12, "through": 13, "two": 14, "dogs": 15, "chase": 16, "each": 17, "other": 18, "across": 19, "snowy": 20, "ground": 21, "play": 22, "together": 23, "low": 24, "lying": 25, "body": 26, "of": 27, "water": 28, "little": 29, "baby": 30, "plays": 31, "croquet": 32, "girl": 33, "next": 34, "to": 35, "truck": 36, "child": 37, "playing": 38, "by": 39, "kid": 40, "front": 41, "car": 42, "with": 43, "put": 44, "and": 45, "ball": 46, "boy": 47, "hammer": 48, "beside": 49, "has": 50, "something": 51, "hot": 52, "pink": 53, "its": 54, "mouth": 55, "holding": 56, "hat": 57, "shirt": 58, "carrying": 59, "while": 60, "walking": 61, "looking": 62, "forward": 63, "along": 64, "beach": 65, "wearing": 66, "collar": 67, "walks": 68, "on": 69, "sand": 70, "near": 71, "large": 72, "ocean": 73, "red": 74, "frisbee": 75, "standing": 76, "sandy": 77, "drops": 78, "disc": 79, "flying": 80, "air": 81, "catching": 82, "dropping": 83, "cyclist": 84, "helmet": 85, "riding": 86, "pavement": 87, "bike": 88, "street": 89, "person": 90, "down": 91, "woman": 92, "wears": 93, "blue": 94, "as": 95, "she": 96, "goes": 97, "for": 98, "ride": 99, "shade": 100, "road": 101, "man": 102, "dressed": 103, "purple": 104, "bandanna": 105, "smiles": 106, "at": 107, "people": 108, "watching": 109, "him": 110, "leather": 111, "chaps": 112, "poses": 113, "camera": 114, "stands": 115, "onlookers": 116, "there": 117, "men": 118, "t": 119, "woodland": 120, "runs": 121, "some": 122, "trees": 123, "young": 124, "dancing": 125, "around": 126, "short": 127, "sleeved": 128, "jeans": 129, "stops": 130, "smile": 131, "dress": 132, "back": 133, "smiling": 134, "braids": 135, "looks": 136, "green": 137, "skier": 138, "yellow": 139, "jacket": 140, "airborne": 141, "above": 142, "mountains": 143, "jumps": 144, "high": 145, "view": 146, "skiing": 147, "very": 148, "it": 149, "though": 150, "he": 151, "doing": 152, "ski": 153, "jump": 154, "pants": 155, "appears": 156, "almost": 157, "fly": 158, "into": 159, "sky": 160, "photographer": 161, "over": 162, "hills": 163, "videotaping": 164, "natural": 165, "landscape": 166, "out": 167, "rolling": 168, "tripod": 169, "lady": 170, "her": 171, "set": 172, "up": 173, "field": 174, "record": 175, "bunch": 176, "girls": 177, "cheerleader": 178, "outfits": 179, "group": 180, "cheerleaders": 181, "parade": 182, "perform": 183, "many": 184, "walk": 185, "uniforms": 186, "boat": 187, "canopy": 188, "floating": 189, "calm": 190, "waters": 191, "roof": 192, "middle": 193, "floats": 194, "lake": 195, "catches": 196, "midair": 197, "terrier": 198, "mix": 199, "leaping": 200, "object": 201, "old": 202, "sitting": 203, "an": 204, "advertisement": 205, "asian": 206, "waiting": 207, "underground": 208, "train": 209, "stop": 210, "sits": 211, "transit": 212, "station": 213, "backlit": 214, "subway": 215, "umbrella": 216, "wall": 217, "blond": 218, "trail": 219, "side": 220, "metal": 221, "fence": 222, "coat": 223, "rural": 224, "flute": 225, "parka": 226, "fenced": 227, "past": 228, "enclosed": 229, "area": 230, "family": 231, "nine": 232, "including": 233, "four": 234, "children": 235, "pose": 236, "brick": 237, "fireplace": 238, "christmas": 239, "tree": 240, "posing": 241, "happy": 242, "couples": 243, "kids": 244, "picture": 245, "sweater": 246, "pointing": 247, "one": 248, "arms": 249, "outstretched": 250, "finger": 251, "pointed": 252, "another": 253, "stares": 254, "from": 255, "behind": 256, "who": 257, "his": 258, "look": 259, "toward": 260, "points": 261, "hallway": 262, "medium": 263, "sized": 264, "small": 265, "larger": 266, "grassy": 267, "big": 268, "tall": 269, "grass": 270, "three": 271, "flowers": 272, "stuffed": 273, "kitten": 274, "garden": 275, "among": 276, "wildflowers": 277, "toy": 278, "cat": 279, "long": 280, "raises": 281, "stripes": 282, "signs": 283, "african": 284, "american": 285, "building": 286, "handicapped": 287, "space": 288, "orange": 289, "backwards": 290, "wet": 291, "stick": 292, "shore": 293, "bounds": 294, "splashing": 295, "off": 296, "paws": 297, "carries": 298, "are": 299, "racing": 300, "race": 301, "track": 302, "greyhounds": 303, "muzzles": 304, "inside": 305, "curb": 306, "railed": 307, "dirt": 308, "run": 309, "baseball": 310, "pitcher": 311, "throws": 312, "player": 313, "glove": 314, "pitches": 315, "male": 316, "sports": 317, "outfit": 318, "tries": 319, "catch": 320, "wades": 321, "rock": 322, "shallow": 323, "reach": 324, "outcropping": 325, "light": 326, "paw": 327, "about": 328, "ready": 329, "swim": 330, "5": 331, "school": 332, "cross": 333, "takes": 334, "outside": 335, "photograph": 336, "taking": 337, "pictures": 338, "women": 339, "stand": 340, "jean": 341, "hand": 342, "chest": 343, "nose": 344, "writing": 345, "hands": 346, "video": 347, "store": 348, "denim": 349, "full": 350, "house": 351, "sunglasses": 352, "flowered": 353, "sit": 354, "nearby": 355, "left": 356, "adults": 357, "display": 358, "case": 359, "digital": 360, "glass": 361, "sides": 362, "holds": 363, "sideways": 364, "arm": 365, "snowboarder": 366, "ramp": 367, "snowboard": 368, "performing": 369, "trick": 370, "making": 371, "icy": 372, "mountain": 373, "climbers": 374, "line": 375, "their": 376, "way": 377, "covered": 378, "background": 379, "facing": 380, "jumping": 381, "hoop": 382, "ring": 383, "using": 384, "laptop": 385, "cellphone": 386, "against": 387, "use": 388, "electronics": 389, "bench": 390, "engage": 391, "electronic": 392, "devices": 393, "races": 394, "number": 395, "6": 396, "greyhound": 397, "muzzle": 398, "six": 399, "tan": 400, "gold": 401, "edge": 402, "cliff": 403, "scouts": 404, "rest": 405, "bluff": 406, "overlooking": 407, "top": 408, "couple": 409, "wooded": 410, "them": 411, "lined": 412, "friends": 413, "stroll": 414, "forest": 415, "path": 416, "woods": 417, "sponsored": 418, "smoking": 419, "tires": 420, "drives": 421, "rain": 422, "driving": 423, "headlights": 424, "advertising": 425, "rainy": 426, "lights": 427, "raceway": 428, "bearded": 429, "whilst": 430, "bicycle": 431, "bicycles": 432, "hair": 433, "gives": 434, "peace": 435, "sign": 436, "cap": 437, "crowd": 438, "waving": 439, "flags": 440, "passing": 441, "under": 442, "bridge": 443, "or": 444, "banners": 445, "gets": 446, "pull": 447, "chair": 448, "meal": 449, "table": 450, "cafe": 451, "restaurant": 452, "getting": 453, "grabs": 454, "seat": 455, "racket": 456, "round": 457, "handle": 458, "backyard": 459, "lawn": 460, "clothing": 461, "yard": 462, "toys": 463, "enjoy": 464, "themselves": 465, "wind": 466, "blows": 467, "trampoline": 468, "blonde": 469, "bicyclists": 470, "between": 471, "wire": 472, "fences": 473, "cyclists": 474, "country": 475, "desert": 476, "bikers": 477, "dry": 478, "land": 479, "climbing": 480, "steep": 481, "hill": 482, "climbs": 483, "great": 484, "dane": 485, "spins": 486, "merry": 487, "go": 488, "watch": 489, "spinning": 490, "playground": 491, "that": 492, "skirt": 493, "golden": 494, "retriever": 495, "towards": 496, "away": 497, "greet": 498, "bus": 499, "onward": 500, "door": 501, "vehicle": 502, "window": 503, "passengers": 504, "peaking": 505, "peering": 506, "hiding": 507, "peeking": 508, "striped": 509, "peeks": 510, "hold": 511, "drinks": 512, "point": 513, "bottles": 514, "several": 515, "this": 516, "photo": 517, "beers": 518, "border": 519, "collie": 520, "bed": 521, "tennis": 522, "climber": 523, "peak": 524, "rocks": 525, "snowcapped": 526, "hikes": 527, "following": 528, "surrounded": 529, "distance": 530, "smoke": 531, "comes": 532, "starts": 533, "broken": 534, "during": 535, "racetrack": 536, "bull": 537, "leaves": 538, "bags": 539, "igloo": 540, "cave": 541, "luggage": 542, "stay": 543, "backpacks": 544, "piled": 545, "ice": 546, "visible": 547, "hole": 548, "starting": 549, "steps": 550, "murky": 551, "putting": 552, "legs": 553, "pond": 554, "busy": 555, "city": 556, "silhouette": 557, "buildings": 558, "crowded": 559, "shot": 560, "turned": 561, "chases": 562, "threw": 563, "interacting": 564, "opposite": 565, "direction": 566, "fetch": 567, "also": 568, "outdoors": 569, "cameras": 570, "third": 571, "river": 572, "skis": 573, "backdrop": 574, "face": 575, "rope": 576, "swimming": 577, "creek": 578, "playfully": 579, "rolls": 580, "int": 581, "chewing": 582, "resting": 583, "game": 584, "elderly": 585, "straw": 586, "alone": 587, "park": 588, "haired": 589, "gray": 590, "dark": 591, "beard": 592, "guitar": 593, "attractive": 594, "head": 595, "ceiling": 596, "wide": 597, "drink": 598, "upward": 599, "reading": 600, "paperback": 601, "book": 602, "elder": 603, "scarf": 604, "older": 605, "reads": 606, "motocross": 607, "motorcycle": 608, "tight": 609, "turn": 610, "motorbike": 611, "bright": 612, "someone": 613, "fall": 614, "rider": 615, "cots": 616, "sleeping": 617, "makeshift": 618, "beds": 619, "hanging": 620, "mountainside": 621, "skateboarder": 622, "skateboard": 623, "hopes": 624, "landing": 625, "teenage": 626, "flipping": 627, "tricks": 628, "colored": 629, "protest": 630, "banner": 631, "staring": 632, "religious": 633, "sidewalk": 634, "poster": 635, "mambo": 636, "image": 637, "teddy": 638, "bear": 639, "showing": 640, "humping": 641, "stool": 642, "trying": 643, "sell": 644, "animals": 645, "selling": 646, "various": 647, "vendor": 648, "sells": 649, "s": 650, "inflatable": 651, "guy": 652, "navy": 653, "shorts": 654, "pool": 655, "football": 656, "trunks": 657, "strange": 658, "diving": 659, "plastic": 660, "slide": 661, "equipment": 662, "shaded": 663, "fire": 664, "baton": 665, "day": 666, "juggling": 667, "flames": 668, "bubbles": 669, "float": 670, "popping": 671, "woven": 672, "toboggan": 673, "right": 674, "shoes": 675, "sandals": 676, "tug": 677, "war": 678, "pulling": 679, "end": 680, "rottweiler": 681, "surf": 682, "wave": 683, "surfers": 684, "surfing": 685, "attempting": 686, "sea": 687, "huge": 688, "sunset": 689, "rides": 690, "biking": 691, "muddy": 692, "slope": 693, "crossing": 694, "buses": 695, "hurrying": 696, "heads": 697, "night": 698, "phone": 699, "talks": 700, "pile": 701, "rail": 702, "stunt": 703, "soccer": 704, "uniform": 705, "kick": 706, "match": 707, "watches": 708, "knee": 709, "female": 710, "break": 711, "driver": 712, "wheel": 713, "thumbs": 714, "before": 715, "polka": 716, "dots": 717, "boots": 718, "hip": 719, "kimono": 720, "purse": 721, "followed": 722, "redheaded": 723, "pedestrians": 724, "casually": 725, "barefoot": 726, "cute": 727, "puppy": 728, "fetches": 729, "chew": 730, "fluffy": 731, "rings": 732, "dock": 733, "laughing": 734, "bucket": 735, "splashes": 736, "pier": 737, "beneath": 738, "dances": 739, "room": 740, "balloons": 741, "floor": 742, "socks": 743, "wooden": 744, "strewn": 745, "strings": 746, "confetti": 747, "wood": 748, "mother": 749, "both": 750, "ledge": 751, "scales": 752, "tent": 753, "being": 754, "enter": 755, "fishing": 756, "setting": 757, "hut": 758, "iced": 759, "tarp": 760, "structure": 761, "surface": 762, "few": 763, "foam": 764, "statue": 765, "liberty": 766, "hats": 767, "take": 768, "photos": 769, "new": 770, "york": 771, "get": 772, "taken": 773, "fight": 774, "jersey": 775, "leaps": 776, "dives": 777, "going": 778, "begin": 779, "sheer": 780, "climb": 781, "pretty": 782, "flat": 783, "rocky": 784, "card": 785, "what": 786, "says": 787, "they": 788, "bathing": 789, "suit": 790, "sprinklers": 791, "sprinkler": 792, "sliding": 793, "paddling": 794, "kiddie": 795, "lone": 796, "flies": 797, "leafless": 798, "pulled": 799, "shovel": 800, "distorted": 801, "open": 802, "ends": 803, "bottom": 804, "just": 805, "reaching": 806, "lands": 807, "reaches": 808, "piano": 809, "sings": 810, "preparing": 811, "sing": 812, "singer": 813, "music": 814, "bares": 815, "teeth": 816, "tongue": 817, "expanse": 818, "curly": 819, "violin": 820, "walls": 821, "posters": 822, "rubbing": 823, "chin": 824, "be": 825, "cover": 826, "teenager": 827, "struggles": 828, "carry": 829, "piggy": 830, "paved": 831, "covering": 832, "arab": 833, "turban": 834, "facial": 835, "style": 836, "headscarf": 837, "grey": 838, "bird": 839, "waves": 840, "roll": 841, "crane": 842, "upon": 843, "onto": 844, "pole": 845, "swings": 846, "silver": 847, "swinging": 848, "bounces": 849, "leotard": 850, "split": 851, "hello": 852, "kitty": 853, "does": 854, "leg": 855, "splits": 856, "leans": 857, "board": 858, "platform": 859, "skateboarders": 860, "eating": 861, "seeds": 862, "eats": 863, "clinging": 864, "stone": 865, "friend": 866, "helps": 867, "help": 868, "learning": 869, "how": 870, "helping": 871, "sweatshirt": 872, "not": 873, "far": 874, "racer": 875, "slightly": 876, "competition": 877, "circuit": 878, "cheering": 879, "grabbing": 880, "ankle": 881, "oklahoma": 882, "score": 883, "fans": 884, "cheer": 885, "athlete": 886, "performs": 887, "herself": 888, "bar": 889, "vault": 890, "vaulting": 891, "upside": 892, "attempts": 893, "feet": 894, "do": 895, "gap": 896, "flip": 897, "blocks": 898, "concrete": 899, "carefully": 900, "crosses": 901, "reflection": 902, "caught": 903, "pajamas": 904, "hall": 905, "hardwood": 906, "floors": 907, "floored": 908, "lit": 909, "toddler": 910, "chairs": 911, "kneel": 912, "folding": 913, "row": 914, "talking": 915, "spotted": 916, "fast": 917, "seated": 918, "stadium": 919, "event": 920, "fill": 921, "packed": 922, "indoor": 923, "dig": 924, "duck": 925, "pet": 926, "chased": 927, "parachute": 928, "lifting": 929, "attached": 930, "parasail": 931, "suspended": 932, "silhouetted": 933, "sunny": 934, "gliding": 935, "backed": 936, "sun": 937, "skateboarding": 938, "slides": 939, "railing": 940, "shines": 941, "skating": 942, "stair": 943, "stairs": 944, "have": 945, "pillow": 946, "having": 947, "skates": 948, "neck": 949, "grazes": 950, "flight": 951, "boats": 952, "boxing": 953, "boxers": 954, "fighting": 955, "box": 956, "faces": 957, "funny": 958, "makes": 959, "glasses": 960, "rusty": 961, "barks": 962, "barking": 963, "bark": 964, "hose": 965, "pouring": 966, "drinking": 967, "which": 968, "squirted": 969, "wings": 970, "swimsuits": 971, "lacrosse": 972, "players": 973, "struggling": 974, "control": 975, "team": 976, "breaks": 977, "hit": 978, "hockey": 979, "type": 980, "sport": 981, "like": 982, "sticks": 983, "guys": 984, "all": 985, "saying": 986, "free": 987, "dinner": 988, "cement": 989, "skate": 990, "prepares": 991, "hugging": 992, "embracing": 993, "hugs": 994, "dunks": 995, "basketball": 996, "make": 997, "basket": 998, "goal": 999, "dunking": 1000, "grinding": 1001, "snowboarding": 1002, "steel": 1003, "homemade": 1004, "made": 1005, "piece": 1006, "log": 1007, "mid": 1008, "leap": 1009, "crystal": 1010, "clear": 1011, "wrapped": 1012, "tape": 1013, "brother": 1014, "superman": 1015, "cape": 1016, "cast": 1017, "material": 1018, "super": 1019, "airplane": 1020, "plane": 1021, "jet": 1022, "runway": 1023, "seen": 1024, "windows": 1025, "headfirst": 1026, "digging": 1027, "poking": 1028, "brush": 1029, "digs": 1030, "greenhouse": 1031, "tools": 1032, "work": 1033, "unfinished": 1034, "fishes": 1035, "rod": 1036, "jetty": 1037, "boys": 1038, "living": 1039, "shown": 1040, "wear": 1041, "shirts": 1042, "computer": 1043, "monitor": 1044, "eat": 1045, "cream": 1046, "adult": 1047, "sat": 1048, "world": 1049, "college": 1050, "tackled": 1051, "jerseys": 1052, "tackle": 1053, "tackling": 1054, "uniformed": 1055, "try": 1056, "fan": 1057, "painting": 1058, "paint": 1059, "within": 1060, "painted": 1061, "bleachers": 1062, "3": 1063, "dalmatians": 1064, "spots": 1065, "palm": 1066, "route": 1067, "stretch": 1068, "displaying": 1069, "paintings": 1070, "framed": 1071, "artwork": 1072, "sale": 1073, "desolate": 1074, "fallen": 1075, "dead": 1076, "church": 1077, "bicyclist": 1078, "aerial": 1079, "scooter": 1080, "suburban": 1081, "neighborhood": 1082, "center": 1083, "rollerblades": 1084, "rollerblader": 1085, "narrow": 1086, "roller": 1087, "grinds": 1088, "rollerblading": 1089, "participate": 1090, "martial": 1091, "arts": 1092, "tournament": 1093, "mat": 1094, "protective": 1095, "gear": 1096, "spar": 1097, "helmets": 1098, "sparring": 1099, "skateboards": 1100, "skater": 1101, "flag": 1102, "headband": 1103, "vest": 1104, "indoors": 1105, "baring": 1106, "handrail": 1107, "foot": 1108, "indian": 1109, "crossed": 1110, "folded": 1111, "ethnic": 1112, "outdoor": 1113, "market": 1114, "shining": 1115, "graffiti": 1116, "plank": 1117, "no": 1118, "hung": 1119, "gallery": 1120, "naked": 1121, "individuals": 1122, "chinese": 1123, "ladies": 1124, "bank": 1125, "lockers": 1126, "retrieves": 1127, "seaweed": 1128, "coming": 1129, "mossy": 1130, "hiker": 1131, "descends": 1132, "hiking": 1133, "bringing": 1134, "leafy": 1135, "gravel": 1136, "amidst": 1137, "autumn": 1138, "others": 1139, "laugh": 1140, "touching": 1141, "laughs": 1142, "fun": 1143, "splash": 1144, "wading": 1145, "shallows": 1146, "bald": 1147, "drag": 1148, "dresses": 1149, "matching": 1150, "jewelry": 1151, "clothes": 1152, "formally": 1153, "jumped": 1154, "sniff": 1155, "somthing": 1156, "walkway": 1157, "biker": 1158, "moving": 1159, "furry": 1160, "doorway": 1161, "leading": 1162, "plants": 1163, "patio": 1164, "potted": 1165, "hairy": 1166, "step": 1167, "cars": 1168, "leave": 1169, "start": 1170, "twilight": 1171, "beginning": 1172, "see": 1173, "racers": 1174, "sled": 1175, "harness": 1176, "pack": 1177, "winter": 1178, "soaking": 1179, "wheelie": 1180, "terrain": 1181, "clutching": 1182, "turquoise": 1183, "guard": 1184, "thin": 1185, "markings": 1186, "ears": 1187, "somersault": 1188, "throwing": 1189, "tossing": 1190, "touches": 1191, "challenging": 1192, "teams": 1193, "quickly": 1194, "meadow": 1195, "surfer": 1196, "crashing": 1197, "follows": 1198, "surfboard": 1199, "laying": 1200, "power": 1201, "lines": 1202, "unusual": 1203, "shaped": 1204, "stump": 1205, "teammates": 1206, "referee": 1207, "breaking": 1208, "artist": 1209, "canvas": 1210, "overalls": 1211, "easel": 1212, "shady": 1213, "travels": 1214, "heavily": 1215, "fangs": 1216, "german": 1217, "shepherd": 1218, "leash": 1219, "somebody": 1220, "leashed": 1221, "falling": 1222, "backward": 1223, "urban": 1224, "partially": 1225, "guitars": 1226, "suits": 1227, "electric": 1228, "musicians": 1229, "curtain": 1230, "frolics": 1231, "mini": 1232, "moped": 1233, "heels": 1234, "pumps": 1235, "lap": 1236, "steers": 1237, "suv": 1238, "drivers": 1239, "jeep": 1240, "cords": 1241, "abseiling": 1242, "repelling": 1243, "flock": 1244, "birds": 1245, "mural": 1246, "pigeons": 1247, "bride": 1248, "newly": 1249, "wife": 1250, "held": 1251, "polo": 1252, "hides": 1253, "overgrown": 1254, "bushes": 1255, "grasses": 1256, "cushion": 1257, "spider": 1258, "patterned": 1259, "base": 1260, "close": 1261, "incoming": 1262, "dirty": 1263, "crashes": 1264, "beyond": 1265, "relaxing": 1266, "trench": 1267, "waits": 1268, "sculpture": 1269, "bikini": 1270, "pulls": 1271, "taller": 1272, "skaters": 1273, "rink": 1274, "scenery": 1275, "bounce": 1276, "filled": 1277, "bouncy": 1278, "castle": 1279, "swimsuit": 1280, "tightrope": 1281, "students": 1282, "campus": 1283, "quad": 1284, "but": 1285, "falls": 1286, "practice": 1287, "asking": 1288, "daughter": 1289, "nice": 1290, "pretending": 1291, "biting": 1292, "limb": 1293, "branch": 1294, "gnawing": 1295, "plant": 1296, "uncut": 1297, "barrel": 1298, "buckets": 1299, "poles": 1300, "seven": 1301, "teenagers": 1302, "wait": 1303, "gate": 1304, "puts": 1305, "gesture": 1306, "raised": 1307, "newspaper": 1308, "kitchen": 1309, "owner": 1310, "retrieving": 1311, "dining": 1312, "paper": 1313, "tile": 1314, "sharing": 1315, "kiss": 1316, "cold": 1317, "kissing": 1318, "taxi": 1319, "passes": 1320, "teen": 1321, "coats": 1322, "tunnel": 1323, "halloween": 1324, "eyes": 1325, "crawls": 1326, "beautiful": 1327, "tube": 1328, "leads": 1329, "trucks": 1330, "five": 1331, "obstacle": 1332, "course": 1333, "elephant": 1334, "draped": 1335, "fabric": 1336, "trunk": 1337, "colorful": 1338, "foliage": 1339, "swimmers": 1340, "move": 1341, "mud": 1342, "puddle": 1343, "nearly": 1344, "identical": 1345, "shirtless": 1346, "couch": 1347, "beaded": 1348, "disk": 1349, "show": 1350, "support": 1351, "wheeled": 1352, "tricycle": 1353, "i": 1354, "pedal": 1355, "wheeler": 1356, "2": 1357, "wheels": 1358, "carnival": 1359, "worker": 1360, "animal": 1361, "prizes": 1362, "amongst": 1363, "shelf": 1364, "riverbank": 1365, "camping": 1366, "patches": 1367, "supplies": 1368, "ditch": 1369, "pain": 1370, "dribbles": 1371, "moves": 1372, "tea": 1373, "coffee": 1374, "gather": 1375, "beverages": 1376, "home": 1377, "outstreached": 1378, "begins": 1379, "spreads": 1380, "plaid": 1381, "print": 1382, "touchdown": 1383, "rescue": 1384, "follow": 1385, "lead": 1386, "flips": 1387, "canoe": 1388, "still": 1389, "fat": 1390, "traveling": 1391, "stepping": 1392, "shaggy": 1393, "foreground": 1394, "inline": 1395, "public": 1396, "dust": 1397, "sooners": 1398, "lifted": 1399, "same": 1400, "athletes": 1401, "mannequins": 1402, "waterfall": 1403, "massive": 1404, "waterfalls": 1405, "guns": 1406, "shooting": 1407, "beige": 1408, "athletic": 1409, "coach": 1410, "nike": 1411, "whistle": 1412, "beagle": 1413, "basset": 1414, "hound": 1415, "costumes": 1416, "frame": 1417, "unseen": 1418, "multi": 1419, "excited": 1420, "audience": 1421, "parallel": 1422, "kicks": 1423, "opponent": 1424, "compete": 1425, "karate": 1426, "belts": 1427, "clapping": 1428, "ran": 1429, "lift": 1430, "skiers": 1431, "shakes": 1432, "shaking": 1433, "hoodie": 1434, "horse": 1435, "jockey": 1436, "performer": 1437, "stage": 1438, "spectators": 1439, "onstage": 1440, "presentation": 1441, "mixing": 1442, "aged": 1443, "dj": 1444, "deck": 1445, "pretends": 1446, "younger": 1447, "tables": 1448, "meet": 1449, "goggles": 1450, "swimmer": 1451, "competitive": 1452, "apple": 1453, "place": 1454, "bag": 1455, "hips": 1456, "backpack": 1457, "tattoo": 1458, "clown": 1459, "safely": 1460, "parachutes": 1461, "landed": 1462, "para": 1463, "practicing": 1464, "craft": 1465, "exercises": 1466, "roughly": 1467, "clearing": 1468, "rough": 1469, "collars": 1470, "extreme": 1471, "backpacker": 1472, "carring": 1473, "uses": 1474, "camcorder": 1475, "films": 1476, "tosses": 1477, "empty": 1478, "bottle": 1479, "dug": 1480, "closely": 1481, "volleyball": 1482, "talk": 1483, "bikinis": 1484, "marketplace": 1485, "ascending": 1486, "horizontal": 1487, "shephard": 1488, "pair": 1489, "spray": 1490, "fountain": 1491, "splashed": 1492, "sprayed": 1493, "closeup": 1494, "screen": 1495, "life": 1496, "unicycle": 1497, "lining": 1498, "coastline": 1499, "location": 1500, "ferry": 1501, "shoreline": 1502, "docked": 1503, "dolphin": 1504, "pokes": 1505, "shoe": 1506, "horseshoe": 1507, "horseshoes": 1508, "corner": 1509, "shoulders": 1510, "these": 1511, "noses": 1512, "numbers": 1513, "dune": 1514, "watercraft": 1515, "swing": 1516, "strikes": 1517, "gestures": 1518, "closed": 1519, "poodle": 1520, "sticking": 1521, "butt": 1522, "inspects": 1523, "tattooed": 1524, "gentleman": 1525, "half": 1526, "relaxes": 1527, "where": 1528, "propped": 1529, "parked": 1530, "topless": 1531, "snowbank": 1532, "kneeling": 1533, "ridge": 1534, "ancient": 1535, "muscle": 1536, "pass": 1537, "tourist": 1538, "sleeveless": 1539, "floppy": 1540, "spaniel": 1541, "shawl": 1542, "cigarette": 1543, "blanket": 1544, "hooded": 1545, "pipe": 1546, "incense": 1547, "bubble": 1548, "machine": 1549, "bites": 1550, "blowing": 1551, "swims": 1552, "chunk": 1553, "mittens": 1554, "throw": 1555, "snowball": 1556, "grins": 1557, "paddles": 1558, "vertical": 1559, "approaching": 1560, "backs": 1561, "tank": 1562, "official": 1563, "marsh": 1564, "mostly": 1565, "swampy": 1566, "rubber": 1567, "chickens": 1568, "chicken": 1569, "eight": 1570, "balls": 1571, "bottoms": 1572, "pushes": 1573, "stroller": 1574, "brunette": 1575, "pushing": 1576, "carriage": 1577, "females": 1578, "hurdle": 1579, "lay": 1580, "hang": 1581, "snowman": 1582, "flower": 1583, "figure": 1584, "costume": 1585, "party": 1586, "blood": 1587, "branches": 1588, "bends": 1589, "alley": 1590, "pairs": 1591, "alleyway": 1592, "cowboy": 1593, "lasso": 1594, "twirling": 1595, "cart": 1596, "(": 1597, ")": 1598, "carrier": 1599, "rack": 1600, "shopping": 1601, "apart": 1602, "camouflage": 1603, "blow": 1604, "varying": 1605, "breeds": 1606, "sledding": 1607, "snowsuit": 1608, "dogsled": 1609, "town": 1610, "leashes": 1611, "strap": 1612, "bite": 1613, "strip": 1614, "steering": 1615, "ship": 1616, "navigating": 1617, "partly": 1618, "cloudy": 1619, "skies": 1620, "sailor": 1621, "sailboat": 1622, "puppet": 1623, "camper": 1624, "van": 1625, "rv": 1626, "peek": 1627, "snake": 1628, "lean": 1629, "different": 1630, "tub": 1631, "bin": 1632, "multicolored": 1633, "pen": 1634, "enclosure": 1635, "pit": 1636, "cut": 1637, "balloon": 1638, "only": 1639, "underwear": 1640, "marker": 1641, "drawing": 1642, "markers": 1643, "safety": 1644, "neon": 1645, "ribbons": 1646, "competing": 1647, "covers": 1648, "vests": 1649, "you": 1650, "hi": 1651, "viz": 1652, "jackets": 1653, "carved": 1654, "returns": 1655, "horses": 1656, "pony": 1657, "stare": 1658, "crumbling": 1659, "structures": 1660, "reflections": 1661, "feild": 1662, "shop": 1663, "mall": 1664, "food": 1665, "court": 1666, "art": 1667, "turns": 1668, "ladder": 1669, "reached": 1670, "mesh": 1671, "headed": 1672, "popsicle": 1673, "frozen": 1674, "treat": 1675, "eyed": 1676, "curious": 1677, "ahead": 1678, "gymnastic": 1679, "ropes": 1680, "harnesses": 1681, "aid": 1682, "balance": 1683, "dangling": 1684, "straps": 1685, "kicking": 1686, "miami": 1687, "university": 1688, "scene": 1689, "classic": 1690, "well": 1691, "kept": 1692, "thrown": 1693, "dancers": 1694, "dance": 1695, "prepare": 1696, "hillside": 1697, "kayaker": 1698, "kayaking": 1699, "kayak": 1700, "spot": 1701, "warmly": 1702, "blurry": 1703, "tag": 1704, "touch": 1705, "riders": 1706, "shoulder": 1707, "motorbikes": 1708, "farm": 1709, "sheep": 1710, "goat": 1711, "places": 1712, "slab": 1713, "twisting": 1714, "graffitied": 1715, "hollywood": 1716, "stars": 1717, "examines": 1718, "bridal": 1719, "wedding": 1720, "ten": 1721, "groom": 1722, "except": 1723, "groomsmen": 1724, "bridesmaids": 1725, "launch": 1726, "cane": 1727, "lays": 1728, "rests": 1729, "stream": 1730, "eye": 1731, "quarter": 1732, "bush": 1733, "brownish": 1734, "photographs": 1735, "canal": 1736, "muzzled": 1737, "#": 1738, "numbered": 1739, "prisoner": 1740, "police": 1741, "officer": 1742, "cop": 1743, "fake": 1744, "mask": 1745, "tie": 1746, "handing": 1747, "papers": 1748, "button": 1749, "masked": 1750, "business": 1751, "attire": 1752, "straight": 1753, "profile": 1754, "blurred": 1755, "barefooted": 1756, "bicycler": 1757, "raced": 1758, "pop": 1759, "weather": 1760, "happily": 1761, "poised": 1762, "candles": 1763, "candle": 1764, "lighting": 1765, "elephants": 1766, "ridden": 1767, "colors": 1768, "turbans": 1769, "eastern": 1770, "give": 1771, "chubby": 1772, "remote": 1773, "peers": 1774, "device": 1775, "pad": 1776, "note": 1777, "writes": 1778, "motorcyclist": 1779, "speed": 1780, "sharp": 1781, "crouches": 1782, "rounds": 1783, "pops": 1784, "bmx": 1785, "without": 1786, "boarding": 1787, "trotting": 1788, "stuck": 1789, "trails": 1790, "pine": 1791, "4": 1792, "atv": 1793, "drive": 1794, "net": 1795, "olympics": 1796, "logo": 1797, "skimpy": 1798, "soaring": 1799, "underneath": 1800, "batting": 1801, "sweaters": 1802, "wade": 1803, "military": 1804, "speak": 1805, "crying": 1806, "bending": 1807, "assist": 1808, "checking": 1809, "dappled": 1810, "walked": 1811, "owners": 1812, "hits": 1813, "bat": 1814, "catcher": 1815, "teal": 1816, "japanese": 1817, "aqua": 1818, "pocket": 1819, "handstand": 1820, "sheets": 1821, "lab": 1822, "below": 1823, "headphones": 1824, "curvy": 1825, "elevator": 1826, "kites": 1827, "capped": 1828, "range": 1829, "doghouse": 1830, "youn": 1831, "camel": 1832, "camels": 1833, "trots": 1834, "agility": 1835, "lifts": 1836, "bare": 1837, "chested": 1838, "launches": 1839, "asleep": 1840, "machines": 1841, "atm": 1842, "money": 1843, "sleeps": 1844, "maroon": 1845, "dalmatian": 1846, "dalmation": 1847, "chews": 1848, "hind": 1849, "lambs": 1850, "counter": 1851, "buying": 1852, "handbag": 1853, "lunch": 1854, "grocery": 1855, "corn": 1856, "cob": 1857, "nibbling": 1858, "ear": 1859, "listens": 1860, "mobile": 1861, "come": 1862, "sets": 1863, "horizon": 1864, "cricket": 1865, "participating": 1866, "opposing": 1867, "pick": 1868, "watermelon": 1869, "picking": 1870, "watermelons": 1871, "flowery": 1872, "stretches": 1873, "mats": 1874, "circle": 1875, "brightly": 1876, "teaches": 1877, "yoga": 1878, "festival": 1879, "learn": 1880, "stretching": 1881, "upwards": 1882, "legged": 1883, "plain": 1884, "puppies": 1885, "slam": 1886, "interested": 1887, "grab": 1888, "donut": 1889, "intertube": 1890, "ties": 1891, "share": 1892, "toast": 1893, "alongside": 1894, "sister": 1895, "mickey": 1896, "mouse": 1897, "angle": 1898, "non": 1899, "professional": 1900, "plate": 1901, "guarding": 1902, "runner": 1903, "softball": 1904, "block": 1905, "khaki": 1906, "pours": 1907, "wine": 1908, "dim": 1909, "shrubbery": 1910, "column": 1911, "belongings": 1912, "pillar": 1913, "time": 1914, "lot": 1915, "balding": 1916, "choppy": 1917, "wetsuit": 1918, "rapids": 1919, "rafting": 1920, "reacts": 1921, "earth": 1922, "loop": 1923, "leaving": 1924, "single": 1925, "file": 1926, "exhibit": 1927, "class": 1928, "columns": 1929, "barbed": 1930, "porch": 1931, "clad": 1932, "cowgirl": 1933, "scantily": 1934, "special": 1935, "dish": 1936, "towel": 1937, "cloth": 1938, "boxer": 1939, "pitbull": 1940, "uphill": 1941, "watched": 1942, "sneakers": 1943, "highway": 1944, "retreiver": 1945, "item": 1946, "ribbon": 1947, "elegant": 1948, "horns": 1949, "heard": 1950, "goats": 1951, "cluster": 1952, "earphones": 1953, "amused": 1954, "surprised": 1955, "foggy": 1956, "strollers": 1957, "pushed": 1958, "first": 1959, "bikes": 1960, "chat": 1961, "fist": 1962, "gloves": 1963, "giant": 1964, "circular": 1965, "perched": 1966, "opening": 1967, "atop": 1968, "kneels": 1969, "sprays": 1970, "push": 1971, "policeman": 1972, "crosswalk": 1973, "directs": 1974, "traffic": 1975, "pedestrian": 1976, "carves": 1977, "chainsaw": 1978, "carving": 1979, "saw": 1980, "reflective": 1981, "racquet": 1982, "blacktop": 1983, "parking": 1984, "printed": 1985, "attempt": 1986, "goalie": 1987, "dreadlocks": 1988, "bowls": 1989, "liquid": 1990, "mug": 1991, "oxford": 1992, "beer": 1993, "sipping": 1994, "lunges": 1995, "irish": 1996, "setter": 1997, "balcony": 1998, "fleece": 1999, "bouncing": 2000, "gated": 2001, "housing": 2002, "hopping": 2003, "magazines": 2004, "interracial": 2005, "master": 2006, "fruit": 2007, "farmers": 2008, "fish": 2009, "connected": 2010, "bamboo": 2011, "paddle": 2012, "father": 2013, "tossed": 2014, "monkey": 2015, "bars": 2016, "jungle": 2017, "gym": 2018, "messily": 2019, "pasta": 2020, "spaghetti": 2021, "mess": 2022, "skiiers": 2023, "dachshund": 2024, "shine": 2025, "customer": 2026, "cones": 2027, "observe": 2028, "pads": 2029, "really": 2030, "closes": 2031, "wand": 2032, "speaks": 2033, "cup": 2034, "dad": 2035, "birthday": 2036, "web": 2037, "cam": 2038, "celebrate": 2039, "cake": 2040, "wintry": 2041, "seating": 2042, "inflated": 2043, "parents": 2044, "skull": 2045, "motorcycles": 2046, "motorcyclists": 2047, "interesting": 2048, "formations": 2049, "shapes": 2050, "licking": 2051, "sniffing": 2052, "desk": 2053, "meeting": 2054, "modern": 2055, "decorated": 2056, "gathered": 2057, "clouds": 2058, "lots": 2059, "raising": 2060, "screaming": 2061, "parachuting": 2062, "ponytail": 2063, "paying": 2064, "lips": 2065, "tool": 2066, "rug": 2067, "carpet": 2068, "checkered": 2069, "tugs": 2070, "tiger": 2071, "picnic": 2072, "hops": 2073, "hike": 2074, "barren": 2075, "bend": 2076, "gymnastics": 2077, "seashore": 2078, "shows": 2079, "fur": 2080, "balances": 2081, "staircase": 2082, "scaling": 2083, "repels": 2084, "spandex": 2085, "studio": 2086, "tops": 2087, "clothed": 2088, "soars": 2089, "slopes": 2090, "surrounding": 2091, "smaller": 2092, "tail": 2093, "sniffs": 2094, "batman": 2095, "makeup": 2096, "streaked": 2097, "giving": 2098, "panel": 2099, "upturned": 2100, "celebrating": 2101, "beret": 2102, "mustache": 2103, "conversation": 2104, "joy": 2105, "name": 2106, "badge": 2107, "chain": 2108, "passenger": 2109, "transportation": 2110, "camo": 2111, "photographed": 2112, "sort": 2113, "darkened": 2114, "sound": 2115, "colander": 2116, "clowns": 2117, "bowl": 2118, "hikers": 2119, "backpacking": 2120, "mountaineers": 2121, "sweat": 2122, "santa": 2123, "sofa": 2124, "claus": 2125, "stony": 2126, "mirror": 2127, "surfs": 2128, "leaning": 2129, "stairway": 2130, "rushing": 2131, "garbage": 2132, "can": 2133, "trash": 2134, "searching": 2135, "cans": 2136, "trashcan": 2137, "cardboard": 2138, "boxes": 2139, "instrument": 2140, "musician": 2141, "&": 2142, "chatting": 2143, "tire": 2144, "rocking": 2145, "deep": 2146, "overhanging": 2147, "overhang": 2148, "phrase": 2149, "springs": 2150, "motion": 2151, "storefront": 2152, "entrance": 2153, "jagged": 2154, "peach": 2155, "enjoys": 2156, "stomach": 2157, "wakeboard": 2158, "read": 2159, "fit": 2160, "miniature": 2161, "pacifier": 2162, "sucking": 2163, "gloved": 2164, "mitt": 2165, "maneuver": 2166, "similarly": 2167, "executes": 2168, "combat": 2169, "facility": 2170, "soldier": 2171, "shaved": 2172, "punch": 2173, "temple": 2174, "spotters": 2175, "boulder": 2176, "approaches": 2177, "showering": 2178, "shower": 2179, "watering": 2180, "source": 2181, "bounding": 2182, "museum": 2183, "zip": 2184, "harnessed": 2185, "cable": 2186, "pulley": 2187, "arena": 2188, "jumpsuit": 2189, "walker": 2190, "bent": 2191, "sloping": 2192, "dunes": 2193, "congregate": 2194, "jockeys": 2195, "keeping": 2196, "wrestle": 2197, "warm": 2198, "intersection": 2199, "beanie": 2200, "aim": 2201, "blocked": 2202, "members": 2203, "fetching": 2204, "teammate": 2205, "quarterback": 2206, "possession": 2207, "spring": 2208, "snows": 2209, "trimmed": 2210, "frog": 2211, "rise": 2212, "summit": 2213, "rails": 2214, "monk": 2215, "wrap": 2216, "robes": 2217, "tattoos": 2218, "bow": 2219, "plushie": 2220, "picks": 2221, "airport": 2222, "overhead": 2223, "casino": 2224, "amusement": 2225, "arcade": 2226, "blindfolds": 2227, "blindfolded": 2228, "o": 2229, "written": 2230, "spanish": 2231, "burning": 2232, "handles": 2233, "wheelbarrow": 2234, "whose": 2235, "been": 2236, "turning": 2237, "tiny": 2238, "cow": 2239, "necklace": 2240, "cliffs": 2241, "boulders": 2242, "band": 2243, "dollar": 2244, "bill": 2245, "bills": 2246, "instruments": 2247, "breath": 2248, "underwater": 2249, "fingers": 2250, "smeared": 2251, "chocolate": 2252, "act": 2253, "silly": 2254, "gas": 2255, "curve": 2256, "placed": 2257, "british": 2258, "union": 2259, "jack": 2260, "novelty": 2261, "backstroke": 2262, "energizer": 2263, "bunny": 2264, "attraction": 2265, "13": 2266, "defenders": 2267, "hurdles": 2268, "tri": 2269, "tents": 2270, "barrier": 2271, "fuzzy": 2272, "creature": 2273, "crab": 2274, "cups": 2275, "marathon": 2276, "runners": 2277, "lane": 2278, "rag": 2279, "dried": 2280, "containing": 2281, "cage": 2282, "pig": 2283, "crawling": 2284, "waring": 2285, "pieces": 2286, "gymnast": 2287, "gymnasium": 2288, "workout": 2289, "flooring": 2290, "infant": 2291, "squatting": 2292, "beverage": 2293, "overweight": 2294, "yawning": 2295, "shoeless": 2296, "homeless": 2297, "winnie": 2298, "pooh": 2299, "sheet": 2300, "returning": 2301, "serve": 2302, "clay": 2303, "signing": 2304, "portrait": 2305, "action": 2306, "created": 2307, "boardwalk": 2308, "too": 2309, "village": 2310, "masks": 2311, "embrace": 2312, "acting": 2313, "garb": 2314, "enjoying": 2315, "downhill": 2316, "cats": 2317, "pug": 2318, "mohawk": 2319, "drums": 2320, "feathers": 2321, "drum": 2322, "reindeer": 2323, "tussle": 2324, "deflated": 2325, "chats": 2326, "string": 2327, "traditional": 2328, "kimonos": 2329, "elaborate": 2330, "butterfly": 2331, "order": 2332, "posed": 2333, "fashion": 2334, "stretched": 2335, "higher": 2336, "jog": 2337, "jogging": 2338, "waterfront": 2339, "winding": 2340, "pitch": 2341, "winds": 2342, "more": 2343, "rugby": 2344, "hula": 2345, "hoops": 2346, "puddles": 2347, "maneuvers": 2348, "link": 2349, "nap": 2350, "tutu": 2351, "waterskies": 2352, "waterskiing": 2353, "waterskier": 2354, "colourful": 2355, "snowboards": 2356, "snowmobile": 2357, "blocking": 2358, "assistance": 2359, "grinning": 2360, "army": 2361, "bra": 2362, "afro": 2363, "fairy": 2364, "raft": 2365, "innertube": 2366, "dinghy": 2367, "monument": 2368, "mountaintop": 2369, "pyramid": 2370, "directions": 2371, "mouths": 2372, "lies": 2373, "completely": 2374, "hide": 2375, "coaster": 2376, "flipped": 2377, "multiple": 2378, "crests": 2379, "motorized": 2380, "wheelchair": 2381, "shelves": 2382, "shops": 2383, "distant": 2384, "spiky": 2385, "things": 2386, "crevice": 2387, "floral": 2388, "wagon": 2389, "dresser": 2390, "applying": 2391, "salon": 2392, "done": 2393, "sparse": 2394, "tattered": 2395, "umbrellas": 2396, "beads": 2397, "billboards": 2398, "pigtails": 2399, "heart": 2400, "fancy": 2401, "robe": 2402, "frowning": 2403, "bagpipe": 2404, "medieval": 2405, "similar": 2406, "trumpet": 2407, "marching": 2408, "fog": 2409, "balancing": 2410, "tractor": 2411, "squirrel": 2412, "drift": 2413, "melting": 2414, "mound": 2415, "upset": 2416, "streaming": 2417, "heavy": 2418, "rowboat": 2419, "rowing": 2420, "boards": 2421, "boogie": 2422, "wake": 2423, "surfboards": 2424, "ninja": 2425, "nude": 2426, "security": 2427, "series": 2428, "beam": 2429, "ad": 2430, "snowboarders": 2431, "cheers": 2432, "dancer": 2433, "batter": 2434, "protects": 2435, "dusk": 2436, "wakeboarder": 2437, "emerges": 2438, "dandelion": 2439, "toe": 2440, "suspenders": 2441, "spread": 2442, "tourists": 2443, "injured": 2444, "color": 2445, "fireworks": 2446, "glow": 2447, "necklaces": 2448, "sparklers": 2449, "china": 2450, "kisses": 2451, "ejected": 2452, "scarves": 2453, "cows": 2454, "herding": 2455, "skeleton": 2456, "decoration": 2457, "sail": 2458, "receives": 2459, "thumb": 2460, "ticket": 2461, "dimly": 2462, "singing": 2463, "club": 2464, "guitarist": 2465, "microphone": 2466, "listening": 2467, "chopsticks": 2468, "pot": 2469, "pan": 2470, "overturned": 2471, "items": 2472, "kilt": 2473, "scottish": 2474, "burgundy": 2475, "own": 2476, "soda": 2477, "sunlight": 2478, "afghan": 2479, "patch": 2480, "shades": 2481, "shadow": 2482, "post": 2483, "teens": 2484, "prom": 2485, "fellow": 2486, "formal": 2487, "skinned": 2488, "lighter": 2489, "protect": 2490, "scratching": 2491, "filling": 2492, "watery": 2493, "approach": 2494, "canoeing": 2495, "fights": 2496, "pouncing": 2497, "boot": 2498, "casting": 2499, "playful": 2500, "heading": 2501, "labeled": 2502, "musher": 2503, "construction": 2504, "drilling": 2505, "works": 2506, "active": 2507, "drill": 2508, "breed": 2509, "smooth": 2510, "iron": 2511, "hunched": 2512, "mans": 2513, "encouraging": 2514, "handed": 2515, "rundown": 2516, "warehouse": 2517, "so": 2518, "waterway": 2519, "wrinkled": 2520, "sight": 2521, "cookie": 2522, "cheered": 2523, "crowds": 2524, "complete": 2525, "finish": 2526, "benches": 2527, "had": 2528, "bandaged": 2529, "checked": 2530, "descending": 2531, "knees": 2532, "piggyback": 2533, "adorned": 2534, "intently": 2535, "travelling": 2536, "crouching": 2537, "defensive": 2538, "position": 2539, "goalkeeper": 2540, "stripped": 2541, "extends": 2542, "skips": 2543, "avoid": 2544, "located": 2545, "archway": 2546, "midst": 2547, "part": 2548, "overlooks": 2549, "valley": 2550, "forested": 2551, "batsman": 2552, "misses": 2553, "unique": 2554, "ravine": 2555, "crevasse": 2556, "nips": 2557, "bared": 2558, "period": 2559, "european": 2560, "stoop": 2561, "trainer": 2562, "wrestling": 2563, "cotton": 2564, "candy": 2565, "sandal": 2566, "cloud": 2567, "footballers": 2568, "victory": 2569, "baseman": 2570, "league": 2571, "rival": 2572, "second": 2573, "hangs": 2574, "flannel": 2575, "streets": 2576, "opponents": 2577, "progress": 2578, "bit": 2579, "bluejeans": 2580, "cameraman": 2581, "pounces": 2582, "males": 2583, "vintage": 2584, "admire": 2585, "fashioned": 2586, "admiring": 2587, "cowboys": 2588, "rodeo": 2589, "bucking": 2590, "bulls": 2591, "props": 2592, "fisherman": 2593, "mist": 2594, "whitewater": 2595, "kayaks": 2596, "pitching": 2597, "practices": 2598, "punches": 2599, "trains": 2600, "husky": 2601, "emerging": 2602, "opens": 2603, "horseback": 2604, "moon": 2605, "cooking": 2606, "bound": 2607, "colliding": 2608, "dinosaur": 2609, "lime": 2610, "corgi": 2611, "obedience": 2612, "concert": 2613, "parasailing": 2614, "workers": 2615, "cash": 2616, "register": 2617, "tips": 2618, "jar": 2619, "tip": 2620, "knit": 2621, "kart": 2622, "feathered": 2623, "caution": 2624, "feather": 2625, "lolly": 2626, "purses": 2627, "cheek": 2628, "tracks": 2629, "pale": 2630, "casual": 2631, "growls": 2632, "tugging": 2633, "playhouse": 2634, "cabin": 2635, "focus": 2636, "oars": 2637, "shoot": 2638, "diner": 2639, "played": 2640, "8": 2641, "huddle": 2642, "musical": 2643, "juggles": 2644, "manicured": 2645, "growling": 2646, "seem": 2647, "doberman": 2648, "twig": 2649, "guards": 2650, "brings": 2651, "buried": 2652, "stones": 2653, "pointy": 2654, "quietly": 2655, "appear": 2656, "navigates": 2657, "sloped": 2658, "himself": 2659, "greenery": 2660, "cookies": 2661, "socializing": 2662, "smokes": 2663, "whist": 2664, "pickup": 2665, "headset": 2666, "visor": 2667, "tags": 2668, "creating": 2669, "billowing": 2670, "crash": 2671, "clings": 2672, "demonstrates": 2673, "pedals": 2674, "upright": 2675, "rear": 2676, "retrieve": 2677, "summer": 2678, "hitting": 2679, "videotaped": 2680, "canyon": 2681, "ceremony": 2682, "pauses": 2683, "alert": 2684, "skinny": 2685, "corridor": 2686, "jumper": 2687, "completes": 2688, "records": 2689, "bungee": 2690, "cord": 2691, "weeds": 2692, "we": 2693, "because": 2694, "rainbow": 2695, "kite": 2696, "raincoat": 2697, "nears": 2698, "elevation": 2699, "juice": 2700, "fencing": 2701, "bundled": 2702, "characters": 2703, "billboard": 2704, "itself": 2705, "everywhere": 2706, "wasteland": 2707, "pebble": 2708, "pebbles": 2709, "rounding": 2710, "artists": 2711, "stopped": 2712, "flooded": 2713, "monster": 2714, "admires": 2715, "bandage": 2716, "hairstyle": 2717, "plaza": 2718, "square": 2719, "training": 2720, "asphalt": 2721, "return": 2722, "sprints": 2723, "leaf": 2724, "geyser": 2725, "casts": 2726, "shrubs": 2727, "wipes": 2728, "wiping": 2729, "belly": 2730, "lush": 2731, "countryside": 2732, "treks": 2733, "peaks": 2734, "flailing": 2735, "teaching": 2736, "officers": 2737, "policemen": 2738, "law": 2739, "speaking": 2740, "formation": 2741, "moss": 2742, "bumpy": 2743, "hay": 2744, "pumpkin": 2745, "star": 2746, "sparkler": 2747, "routine": 2748, "skirts": 2749, "auditorium": 2750, "23": 2751, "barriers": 2752, "aisle": 2753, "shoveling": 2754, "shovels": 2755, "praying": 2756, "hear": 2757, "bands": 2758, "windsurfing": 2759, "snack": 2760, "caps": 2761, "speeds": 2762, "early": 2763, "wristbands": 2764, "overpass": 2765, "attention": 2766, "saris": 2767, "tabby": 2768, "sunshade": 2769, "tropical": 2770, "resort": 2771, "dummy": 2772, "environment": 2773, "human": 2774, "doll": 2775, "gesturing": 2776, "driven": 2777, "boarder": 2778, "geese": 2779, "lower": 2780, "carpeted": 2781, "awning": 2782, "licks": 2783, "tuxedo": 2784, "smock": 2785, "extended": 2786, "scary": 2787, "skills": 2788, "fencers": 2789, "fair": 2790, "egret": 2791, "reeds": 2792, "upper": 2793, "driveway": 2794, "amid": 2795, "spilled": 2796, "sparks": 2797, "bearing": 2798, "gazes": 2799, "slalom": 2800, "poodles": 2801, "interact": 2802, "bone": 2803, "artificial": 2804, "hard": 2805, "tethered": 2806, "flowing": 2807, "wrestler": 2808, "wrestlers": 2809, "bedroom": 2810, "bonnets": 2811, "tuxedos": 2812, "arched": 2813, "pathway": 2814, "son": 2815, "motorboat": 2816, "determined": 2817, "expression": 2818, "mountainous": 2819, "tow": 2820, "becomes": 2821, "waterskis": 2822, "brooms": 2823, "forefront": 2824, "mom": 2825, "depicting": 2826, "jesus": 2827, "shrine": 2828, "carried": 2829, "waterski": 2830, "wakeboarding": 2831, "velvet": 2832, "basement": 2833, "song": 2834, "serious": 2835, "dribbling": 2836, "rafts": 2837, "coaching": 2838, "sprinting": 2839, "bread": 2840, "glides": 2841, "babies": 2842, "lagoon": 2843, "fixing": 2844, "swooping": 2845, "beak": 2846, "vehicles": 2847, "newspapers": 2848, "faucet": 2849, "spigot": 2850, "tap": 2851, "community": 2852, "rollerskating": 2853, "mock": 2854, "headdress": 2855, "earrings": 2856, "kayakers": 2857, "canoes": 2858, "sees": 2859, "wires": 2860, "junk": 2861, "rubble": 2862, "bushy": 2863, "courtyard": 2864, "slacks": 2865, "arabian": 2866, "offstage": 2867, "cycle": 2868, "shouting": 2869, "rimmed": 2870, "award": 2871, "baskets": 2872, "podium": 2873, "awaiting": 2874, "houses": 2875, "boston": 2876, "viewer": 2877, "shiny": 2878, "current": 2879, "soft": 2880, "wilderness": 2881, "nature": 2882, "decorative": 2883, "spraying": 2884, "squirting": 2885, "telescope": 2886, "gun": 2887, "office": 2888, "tickets": 2889, "screams": 2890, "bows": 2891, "yawns": 2892, "groceries": 2893, "rows": 2894, "kicker": 2895, "scenic": 2896, "peaceful": 2897, "morning": 2898, "skyline": 2899, "bracelet": 2900, "sleeves": 2901, "stall": 2902, "shelter": 2903, "burlap": 2904, "sack": 2905, "shadows": 2906, "cartwheel": 2907, "ascends": 2908, "sleds": 2909, "dragging": 2910, "pirate": 2911, "hug": 2912, "vegetables": 2913, "costumed": 2914, "spout": 2915, "skatepark": 2916, "upraised": 2917, "doors": 2918, "spikes": 2919, "ambulance": 2920, "seattle": 2921, "observes": 2922, "hotel": 2923, "solitary": 2924, "moment": 2925, "flops": 2926, "tied": 2927, "carying": 2928, "hardhat": 2929, "length": 2930, "surfboarder": 2931, "dyed": 2932, "industrial": 2933, "terrace": 2934, "waterside": 2935, "snowmobiles": 2936, "helmeted": 2937, "coverings": 2938, "good": 2939, "reception": 2940, "reddish": 2941, "necked": 2942, "knife": 2943, "supports": 2944, "tails": 2945, "sooner": 2946, "marks": 2947, "lipstick": 2948, "torso": 2949, "afternoon": 2950, "reflecting": 2951, "soldiers": 2952, "ignoring": 2953, "checks": 2954, "operating": 2955, "speaker": 2956, "late": 2957, "secured": 2958, "footprints": 2959, "demonstrating": 2960, "massage": 2961, "scuba": 2962, "diver": 2963, "travel": 2964, "polaris": 2965, "cargo": 2966, "causing": 2967, "spiral": 2968, "sporting": 2969, "bib": 2970, "toss": 2971, "labrador": 2972, "called": 2973, "lounge": 2974, "abandoned": 2975, "lamp": 2976, "working": 2977, "weird": 2978, "snowing": 2979, "stripe": 2980, "glacier": 2981, "contents": 2982, "marked": 2983, "gathering": 2984, "computers": 2985, "teacher": 2986, "filmed": 2987, "lecture": 2988, "dotted": 2989, "dot": 2990, "sledge": 2991, "lobby": 2992, "lens": 2993, "ridding": 2994, "shirted": 2995, "blown": 2996, "acrobatic": 2997, "huddled": 2998, "grin": 2999, "romp": 3000, "tulips": 3001, "adjusting": 3002, "incline": 3003, "crouched": 3004, "squat": 3005, "multicolor": 3006, "lie": 3007, "individual": 3008, "crown": 3009, "photographers": 3010, "when": 3011, "festive": 3012, "lease": 3013, "dumps": 3014, "curved": 3015, "pedaling": 3016, "worn": 3017, "wraps": 3018, "objects": 3019, "headscarfs": 3020, "packages": 3021, "traverses": 3022, "test": 3023, "performance": 3024, "zara": 3025, "strike": 3026, "goofy": 3027, "bigger": 3028, "passed": 3029, "halter": 3030, "rushes": 3031, "handlebars": 3032, "observing": 3033, "paints": 3034, "belt": 3035, "whom": 3036, "kicked": 3037, "curiously": 3038, "chewed": 3039, "vacant": 3040, "retrievers": 3041, "bricks": 3042, "laid": 3043, "fairground": 3044, "darker": 3045, "icicle": 3046, "stripy": 3047, "canon": 3048, "oar": 3049, "wild": 3050, "motor": 3051, "campground": 3052, "campsite": 3053, "blazing": 3054, "cobbled": 3055, "than": 3056, "storm": 3057, "drain": 3058, "torn": 3059, "scruffy": 3060, "love": 3061, "language": 3062, "australian": 3063, "pets": 3064, "apartment": 3065, "fedora": 3066, "spiked": 3067, "cycling": 3068, "wrapping": 3069, "spreading": 3070, "flapping": 3071, "tinkerbell": 3072, "jacked": 3073, "muscular": 3074, "quilt": 3075, "saber": 3076, "sword": 3077, "puffy": 3078, "tired": 3079, "grown": 3080, "logs": 3081, "skipping": 3082, "swords": 3083, "height": 3084, "tray": 3085, "pillows": 3086, "spiderman": 3087, "furniture": 3088, "identically": 3089, "bananas": 3090, "collide": 3091, "tumbling": 3092, "posts": 3093, "main": 3094, "sails": 3095, "sailing": 3096, "removing": 3097, "garter": 3098, "piercing": 3099, "earring": 3100, "jogs": 3101, "everyone": 3102, "sad": 3103, "calf": 3104, "labs": 3105, "size": 3106, "sling": 3107, "either": 3108, "nighttime": 3109, "laptops": 3110, "sundress": 3111, "wrestles": 3112, "battling": 3113, "examining": 3114, "soaked": 3115, "keeps": 3116, "rollerskates": 3117, "overlook": 3118, "livestock": 3119, "accompanied": 3120, "telephone": 3121, "booths": 3122, "booth": 3123, "brushes": 3124, "fingerpaints": 3125, "apron": 3126, "decorations": 3127, "squirts": 3128, "strapped": 3129, "dragged": 3130, "droplets": 3131, "oxen": 3132, "performers": 3133, "dive": 3134, "ollie": 3135, "dodges": 3136, "gowns": 3137, "terriers": 3138, "sandbox": 3139, "parasails": 3140, "windsurfer": 3141, "was": 3142, "fell": 3143, "labradoodle": 3144, "rollerskater": 3145, "fort": 3146, "built": 3147, "mouthed": 3148, "bay": 3149, "facepaint": 3150, "parent": 3151, "suitcase": 3152, "themed": 3153, "scratches": 3154, "kind": 3155, "cleaning": 3156, "container": 3157, "pail": 3158, "slip": 3159, "golf": 3160, "india": 3161, "bedspread": 3162, "petting": 3163, "strapless": 3164, "toddlers": 3165, "playpen": 3166, "padded": 3167, "placing": 3168, "jogger": 3169, "louis": 3170, "vuitton": 3171, "bath": 3172, "bathtub": 3173, "bathroom": 3174, "mechanical": 3175, "rabbit": 3176, "goatee": 3177, "floatation": 3178, "florida": 3179, "rally": 3180, "conference": 3181, "battle": 3182, "submerges": 3183, "missing": 3184, "tooth": 3185, "powder": 3186, "mowed": 3187, "call": 3188, "tackles": 3189, "paraglider": 3190, "residential": 3191, "fountains": 3192, "mexican": 3193, "aims": 3194, "engaged": 3195, "sox": 3196, "railroad": 3197, "firefighter": 3198, "hood": 3199, "fireman": 3200, "engine": 3201, "zigzag": 3202, "atvs": 3203, "descent": 3204, "western": 3205, "youth": 3206, "leggings": 3207, "revealing": 3208, "camp": 3209, "thick": 3210, "feeding": 3211, "eyebrows": 3212, "moustache": 3213, "cigars": 3214, "marx": 3215, "equestrian": 3216, "tee": 3217, "theme": 3218, "raise": 3219, "phones": 3220, "cellphones": 3221, "crack": 3222, "symbol": 3223, "change": 3224, "led": 3225, "goose": 3226, "cuts": 3227, "fresh": 3228, "youngsters": 3229, "exercise": 3230, "hugged": 3231, "cooks": 3232, "nightclub": 3233, "form": 3234, "stacks": 3235, "washed": 3236, "apples": 3237, "retaining": 3238, "gondola": 3239, "rowers": 3240, "skyscraper": 3241, "stunts": 3242, "occupied": 3243, "nipple": 3244, "piercings": 3245, "bulldog": 3246, "movie": 3247, "squats": 3248, "member": 3249, "frisbees": 3250, "aquarium": 3251, "seal": 3252, "squeezing": 3253, "surround": 3254, "seems": 3255, "self": 3256, "cheeks": 3257, "seats": 3258, "safari": 3259, "cracked": 3260, "clears": 3261, "ramps": 3262, "arch": 3263, "sweatshirts": 3264, "squirt": 3265, "pasture": 3266, "diaper": 3267, "rods": 3268, "begging": 3269, "whispering": 3270, "gathers": 3271, "firetruck": 3272, "firefighters": 3273, "strips": 3274, "fields": 3275, "hilly": 3276, "attacking": 3277, "greyish": 3278, "mark": 3279, "check": 3280, "kissed": 3281, "blues": 3282, "brothers": 3283, "striking": 3284, "haircut": 3285, "coated": 3286, "happening": 3287, "tv": 3288, "balck": 3289, "relax": 3290, "converse": 3291, "pursued": 3292, "obstacles": 3293, "stride": 3294, "crocodile": 3295, "obama": 3296, "keep": 3297, "misty": 3298, "chalk": 3299, "shocked": 3300, "messenger": 3301, "ink": 3302, "trailing": 3303, "treads": 3304, "images": 3305, "alligator": 3306, "mean": 3307, "bears": 3308, "passerby": 3309, "expressions": 3310, "concerned": 3311, "squinting": 3312, "stuff": 3313, "shoots": 3314, "trip": 3315, "grasps": 3316, "bowler": 3317, "marble": 3318, "attack": 3319, "cook": 3320, "stove": 3321, "intense": 3322, "speeding": 3323, "double": 3324, "captured": 3325, "domino": 3326, "pizza": 3327, "tower": 3328, "licked": 3329, "galloping": 3330, "paddled": 3331, "fighters": 3332, "recently": 3333, "instructor": 3334, "drenched": 3335, "electrical": 3336, "games": 3337, "pattern": 3338, "bales": 3339, "cobblestone": 3340, "island": 3341, "speedo": 3342, "panting": 3343, "sacks": 3344, "u": 3345, "crouch": 3346, "wakeboards": 3347, "saddle": 3348, "trailer": 3349, "shed": 3350, "dramatically": 3351, "vending": 3352, "purchasing": 3353, "lack": 3354, "hundreds": 3355, "defends": 3356, "portable": 3357, "toilets": 3358, "port": 3359, "potties": 3360, "blossoms": 3361, "flowering": 3362, "bodies": 3363, "much": 3364, "exiting": 3365, "tights": 3366, "books": 3367, "blossoming": 3368, "topped": 3369, "loaded": 3370, "footballer": 3371, "wolf": 3372, "if": 3373, "spoon": 3374, "vast": 3375, "find": 3376, "mets": 3377, "oriental": 3378, "skin": 3379, "thatched": 3380, "penske": 3381, "downtown": 3382, "story": 3383, "pinata": 3384, "collared": 3385, "contest": 3386, "awkwardly": 3387, "cardigan": 3388, "standard": 3389, "restaraunt": 3390, "stores": 3391, "brindle": 3392, "forehead": 3393, "paperwork": 3394, "n": 3395, "pro": 3396, "america": 3397, "explosion": 3398, "dusty": 3399, "glider": 3400, "pelican": 3401, "toilet": 3402, "drummer": 3403, "saxophones": 3404, "saxophone": 3405, "married": 3406, "gown": 3407, "7": 3408, "gift": 3409, "refrigerator": 3410, "firing": 3411, "puck": 3412, "site": 3413, "jug": 3414, "swan": 3415, "coloring": 3416, "menus": 3417, "waiter": 3418, "menu": 3419, "unhappy": 3420, "elevated": 3421, "minivan": 3422, "evening": 3423, "signal": 3424, "floaties": 3425, "say": 3426, "cards": 3427, "vine": 3428, "harbor": 3429, "tugboat": 3430, "bagpipes": 3431, "most": 3432, "plates": 3433, "needle": 3434, "alike": 3435, "letter": 3436, "ducks": 3437, "produce": 3438, "starring": 3439, "russell": 3440, "corndogs": 3441, "frames": 3442, "winks": 3443, "winking": 3444, "touched": 3445, "drapped": 3446, "soap": 3447, "tether": 3448, "displayed": 3449, "mime": 3450, "suited": 3451, "oversized": 3452, "presents": 3453, "scared": 3454, "apparatus": 3455, "ornate": 3456, "barricade": 3457, "colorfully": 3458, "tiled": 3459, "carousel": 3460, "model": 3461, "biplane": 3462, "helicopter": 3463, "sizes": 3464, "purchase": 3465, "product": 3466, "pinned": 3467, "displays": 3468, "awards": 3469, "ragged": 3470, "obscured": 3471, "wielding": 3472, "sleigh": 3473, "feature": 3474, "otherwise": 3475, "collection": 3476, "garage": 3477, "howling": 3478, "royal": 3479, "basketballs": 3480, "frolicking": 3481, "loose": 3482, "groups": 3483, "huskies": 3484, "greenish": 3485, "prancing": 3486, "finished": 3487, "rises": 3488, "fiery": 3489, "windy": 3490, "foothills": 3491, "piles": 3492, "prairie": 3493, "numerous": 3494, "suds": 3495, "clean": 3496, "embraces": 3497, "draft": 3498, "waist": 3499, "diners": 3500, "aiming": 3501, "defending": 3502, "crossbones": 3503, "pirates": 3504, "curled": 3505, "local": 3506, "tracksuit": 3507, "swans": 3508, "messy": 3509, "else": 3510, "trim": 3511, "huts": 3512, "boa": 3513, "grilling": 3514, "buy": 3515, "roadway": 3516, "cries": 3517, "sumo": 3518, "tandem": 3519, "yorkie": 3520, "trophy": 3521, "winner": 3522, "fatigues": 3523, "assisting": 3524, "boundary": 3525, "inground": 3526, "devil": 3527, "bug": 3528, "insect": 3529, "washing": 3530, "americans": 3531, "confused": 3532, "scooters": 3533, "tricycles": 3534, "cheerleading": 3535, "sari": 3536, "involving": 3537, "theater": 3538, "dome": 3539, "speckled": 3540, "pins": 3541, "native": 3542, "prize": 3543, "embankment": 3544, "cartwheels": 3545, "were": 3546, "establishment": 3547, "pre": 3548, "cliffside": 3549, "dragon": 3550, "god": 3551, "diapers": 3552, "capes": 3553, "freshly": 3554, "cone": 3555, "roadside": 3556, "clap": 3557, "deer": 3558, "ollies": 3559, "badminton": 3560, "dove": 3561, "applies": 3562, "tiara": 3563, "wig": 3564, "hooping": 3565, "engulfed": 3566, "12": 3567, "offers": 3568, "firemen": 3569, "laps": 3570, "tangled": 3571, "railings": 3572, "ultimate": 3573, "straddles": 3574, "balanced": 3575, "zoo": 3576, "sprint": 3577, "dye": 3578, "laundry": 3579, "chains": 3580, "struggle": 3581, "entering": 3582, "mop": 3583, "newborn": 3584, "television": 3585, "litter": 3586, "prevent": 3587, "plush": 3588, "presses": 3589, "strolls": 3590, "chops": 3591, "handgun": 3592, "pistol": 3593, "used": 3594, "protesters": 3595, "rush": 3596, "gull": 3597, "seagull": 3598, "sorts": 3599, "shadowed": 3600, "will": 3601, "wheat": 3602, "flaming": 3603, "ballet": 3604, "draw": 3605, "crib": 3606, "accordion": 3607, "priest": 3608, "chili": 3609, "escape": 3610, "tiles": 3611, "steam": 3612, "keyboard": 3613, "feeds": 3614, "lollipop": 3615, "eggs": 3616, "easter": 3617, "noodles": 3618, "march": 3619, "turkeys": 3620, "hawaiian": 3621, "carts": 3622, "twin": 3623, "michael": 3624, "jackson": 3625, "trekking": 3626, "hooking": 3627, "railway": 3628, "banks": 3629, "browses": 3630, "ornamental": 3631, "washes": 3632, "nets": 3633, "pierced": 3634, "penguins": 3635, "zombie": 3636, "bouquet": 3637, "cutout": 3638, "barn": 3639, "words": 3640, "28": 3641, "sidelines": 3642, "marking": 3643, "clover": 3644, "rocket": 3645, "armbands": 3646, "thorugh": 3647, "squeeze": 3648, "downward": 3649, "lounging": 3650, "rubs": 3651, "cutting": 3652, "views": 3653, "department": 3654, "st": 3655, "french": 3656, "pigeon": 3657, "crocs": 3658, "activity": 3659, "film": 3660, "uno": 3661, "bass": 3662, "wetsuits": 3663, "oppose": 3664, "clause": 3665, "odd": 3666, "limo": 3667, "limousine": 3668, "figures": 3669, "punk": 3670, "potato": 3671, "chip": 3672, "unknown": 3673, "overcoat": 3674, "campfire": 3675, "sequined": 3676, "serving": 3677, "hummer": 3678, "towed": 3679, "beat": 3680, "broom": 3681, "conversations": 3682, "yelling": 3683, "ornament": 3684, "us": 3685, "vegetation": 3686, "shots": 3687, "crank": 3688, "target": 3689, "protection": 3690, "fires": 3691, "marches": 3692, "hovers": 3693, "hovering": 3694, "siting": 3695, "nursing": 3696, "milk": 3697, "political": 3698, "plains": 3699, "sunrise": 3700, "fries": 3701, "hearts": 3702, "tutus": 3703, "ballerinas": 3704, "wicker": 3705, "robot": 3706, "emitting": 3707, "conversing": 3708, "guiding": 3709, "astride": 3710, "guided": 3711, "wrists": 3712, "smartly": 3713, "statues": 3714, "smelling": 3715, "examine": 3716, "littered": 3717, "sponge": 3718, "bats": 3719, "ages": 3720, "ponchos": 3721, "sparkling": 3722, "lighthouse": 3723, "tussling": 3724, "missed": 3725, "wheelers": 3726, "ok": 3727, "scrubby": 3728, "rappelling": 3729, "participates": 3730, "sunshine": 3731, "sheltie": 3732, "sippy": 3733, "marina": 3734, "mountaineer": 3735, "gazing": 3736, "scuffle": 3737, "swung": 3738, "pride": 3739, "ipod": 3740, "speech": 3741, "armenian": 3742, "genocide": 3743, "priests": 3744, "mixed": 3745, "submerged": 3746, "unison": 3747, "united": 3748, "states": 3749, "spin": 3750, "seesaw": 3751, "inspecting": 3752, "faded": 3753, "legos": 3754, "memorial": 3755, "flinging": 3756, "notes": 3757, "copper": 3758, "leaped": 3759, "effort": 3760, "leaned": 3761, "exhaust": 3762, "merchandise": 3763, "claps": 3764, "blankets": 3765, "mardi": 3766, "gras": 3767, "waking": 3768, "lassie": 3769, "squares": 3770, "leafs": 3771, "shell": 3772, "cooling": 3773, "pretend": 3774, "bale": 3775, "pumpkins": 3776, "stack": 3777, "magazine": 3778, "kangaroo": 3779, "mingling": 3780, "stained": 3781, "tongues": 3782, "rooftop": 3783, "paintball": 3784, "shack": 3785, "whit": 3786, "bookstore": 3787, "investigate": 3788, "streaks": 3789, "company": 3790, "contraption": 3791, "herd": 3792, "chained": 3793, "map": 3794, "burn": 3795, "dragsters": 3796, "lion": 3797, "scrambling": 3798, "chess": 3799, "hawk": 3800, "lip": 3801, "gaze": 3802, "flung": 3803, "streamers": 3804, "sucks": 3805, "dew": 3806, "meter": 3807, "locking": 3808, "cutouts": 3809, "region": 3810, "heron": 3811, "countertop": 3812, "snap": 3813, "laden": 3814, "bodyboard": 3815, "evil": 3816, "punching": 3817, "interviews": 3818, "tide": 3819, "syrup": 3820, "interviewed": 3821, "mr": 3822, "letters": 3823, "p": 3824, "fully": 3825, "crate": 3826, "picket": 3827, "rifle": 3828, "library": 3829, "segway": 3830, "substance": 3831, "backgroud": 3832, "arrow": 3833, "patiently": 3834, "extremely": 3835, "snarling": 3836, "drawn": 3837, "cricketer": 3838, "search": 3839, "condoms": 3840, "twirls": 3841, "extravagant": 3842, "bmw": 3843, "eagle": 3844, "join": 3845, "frolic": 3846, "25": 3847, "rafters": 3848, "bee": 3849, "mascot": 3850, "build": 3851, "bathe": 3852, "wigs": 3853, "roses": 3854, "feed": 3855, "tilted": 3856, "sidecar": 3857, "corners": 3858, "renaissance": 3859, "directly": 3860, "washington": 3861, "melted": 3862, "persons": 3863, "tackler": 3864, "radio": 3865, "flyer": 3866, "favorite": 3867, "hamburgers": 3868, "19": 3869, "swoops": 3870, "hydrant": 3871, "umpire": 3872, "bowling": 3873, "barely": 3874, "observed": 3875, "real": 3876, "shoppers": 3877, "products": 3878, "guide": 3879, "frying": 3880, "lobster": 3881, "present": 3882, "seagulls": 3883, "cannon": 3884, "centipede": 3885, "rugged": 3886, "bitten": 3887, "redbull": 3888, "shake": 3889, "spotlight": 3890, "binoculars": 3891, "hospital": 3892, "perspective": 3893, "horn": 3894, "nt": 3895, "judge": 3896, "rover": 3897, "earpiece": 3898, "photographing": 3899, "serves": 3900, "opened": 3901, "ruins": 3902, "goth": 3903, "buggy": 3904, "microphones": 3905, "rolled": 3906, "llama": 3907, "beachgoers": 3908, "flop": 3909, "completing": 3910, "derby": 3911, "advertisements": 3912, "medals": 3913, "classroom": 3914, "mounds": 3915, "dandelions": 3916, "steer": 3917, "care": 3918, "amish": 3919, "mattress": 3920, "sleeve": 3921, "veil": 3922, "rocker": 3923, "sash": 3924, "lanterns": 3925, "peanut": 3926, "butter": 3927, "slipper": 3928, "acoustic": 3929, "gigolo": 3930, "parrot": 3931, "bounced": 3932, "fives": 3933, "tour": 3934, "angels": 3935, "lodge": 3936, "barber": 3937, "straining": 3938, "lavender": 3939, "nurses": 3940, "loading": 3941, "lemonade": 3942, "brides": 3943, "sundown": 3944, "ringing": 3945, "hopper": 3946, "nails": 3947, "trade": 3948, "poem": 3949, "offering": 3950, "toothbrush": 3951, "jeeps": 3952, "groucho": 3953, "bout": 3954, "served": 3955, "cigarettes": 3956, "carton": 3957, "dumbbell": 3958, "wharf": 3959, "recorder": 3960, "breeze": 3961, "tagged": 3962, "desks": 3963, "hopscotch": 3964, "cry": 3965, "dc": 3966, "bathrobe": 3967, "vw": 3968, "fighter": 3969, "parasailer": 3970, "controller": 3971, "hell": 3972, "spell": 3973, "pajama": 3974, "those": 3975, "windsurfs": 3976, "escalator": 3977, "officials": 3978, "comic": 3979, "henna": 3980, "tubing": 3981, "bernard": 3982, "muslim": 3983, "jukebox": 3984, "cracker": 3985, "potty": 3986, "sewing": 3987, "impeach": 3988, "flings": 3989, "bunk": 3990, "chalkboard": 3991, "tunic": 3992, "demonstration": 3993, "highland": 3994, "africans": 3995, "graduation": 3996, "leapfrog": 3997, "looked": 3998, "dolphins": 3999, "shipping": 4000, "cigar": 4001, "chairlift": 4002, "sink": 4003, "barrels": 4004, "lizards": 4005, "donkeys": 4006, "beating": 4007, "dunk": 4008, "kennel": 4009, "donkey": 4010, "whales": 4011, "einstein": 4012, "rollerbladers": 4013, "bases": 4014, "noodle": 4015, "pharmacy": 4016, "sunbathe": 4017, "milkshake": 4018, "turkey": 4019, "dumpster": 4020, "stools": 4021, "": 4022, "": 4023, "": 4024, "": 0} --------------------------------------------------------------------------------