├── modules
├── __init__.py
├── metrics.py
├── Beam_search.py
├── model.py
├── ori_models.py
└── custom_callbacks.py
├── web_app
├── modules
│ ├── __init__.py
│ ├── Beam_search.py
│ └── model.py
├── tmp
│ ├── dog.jpeg
│ ├── fair.jpeg
│ ├── trans.jpg
│ ├── Iron-Man.jpg
│ ├── download.jpeg
│ ├── profpic.jpeg
│ ├── download (1).jpeg
│ ├── maxresdefault.jpg
│ ├── cc_seedling_250.jpg
│ ├── running_cover_1.jpg
│ ├── modi-2014-i-day_file-photo.jpg
│ ├── 578211-gettyimages-542930526.jpg
│ ├── 1200px-Kaldari_Stellaria_media_01.jpg
│ ├── 1800x1200_cat_relaxing_on_patio_other.jpg
│ ├── 01-guina-nationalgeographic_2750749.adapt.1900.1.jpg
│ └── Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg
├── templates
│ ├── base.html
│ └── index.html
├── static
│ ├── css
│ │ └── main.css
│ ├── js
│ │ └── main.js
│ └── Vocab_5_cap_per_img_2_min_word_freq.json
├── application.py
├── Beam_search.py
└── ori_models.py
├── snapshots
├── Z_t.png
├── eval.jpeg
├── model.png
├── Z_t copy.png
├── lr_find.png
├── beam_search.png
├── caption_gen.gif
├── loss_stage1.png
├── loss_stage2.png
├── encoded_image_vec.png
├── encoded_image_vec copy.png
├── attention_and_alpha_scores.png
└── attention_and_alpha_scores copy.png
├── .gitignore
├── fastext_wordembedding.py
├── requirements.txt
├── notebook2script.py
└── readme.md
/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/web_app/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/snapshots/Z_t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/Z_t.png
--------------------------------------------------------------------------------
/snapshots/eval.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/eval.jpeg
--------------------------------------------------------------------------------
/snapshots/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/model.png
--------------------------------------------------------------------------------
/web_app/tmp/dog.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/dog.jpeg
--------------------------------------------------------------------------------
/snapshots/Z_t copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/Z_t copy.png
--------------------------------------------------------------------------------
/snapshots/lr_find.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/lr_find.png
--------------------------------------------------------------------------------
/web_app/tmp/fair.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/fair.jpeg
--------------------------------------------------------------------------------
/web_app/tmp/trans.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/trans.jpg
--------------------------------------------------------------------------------
/snapshots/beam_search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/beam_search.png
--------------------------------------------------------------------------------
/snapshots/caption_gen.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/caption_gen.gif
--------------------------------------------------------------------------------
/snapshots/loss_stage1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/loss_stage1.png
--------------------------------------------------------------------------------
/snapshots/loss_stage2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/loss_stage2.png
--------------------------------------------------------------------------------
/web_app/tmp/Iron-Man.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/Iron-Man.jpg
--------------------------------------------------------------------------------
/web_app/tmp/download.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/download.jpeg
--------------------------------------------------------------------------------
/web_app/tmp/profpic.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/profpic.jpeg
--------------------------------------------------------------------------------
/web_app/tmp/download (1).jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/download (1).jpeg
--------------------------------------------------------------------------------
/web_app/tmp/maxresdefault.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/maxresdefault.jpg
--------------------------------------------------------------------------------
/snapshots/encoded_image_vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/encoded_image_vec.png
--------------------------------------------------------------------------------
/web_app/tmp/cc_seedling_250.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/cc_seedling_250.jpg
--------------------------------------------------------------------------------
/web_app/tmp/running_cover_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/running_cover_1.jpg
--------------------------------------------------------------------------------
/snapshots/encoded_image_vec copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/encoded_image_vec copy.png
--------------------------------------------------------------------------------
/snapshots/attention_and_alpha_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/attention_and_alpha_scores.png
--------------------------------------------------------------------------------
/web_app/tmp/modi-2014-i-day_file-photo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/modi-2014-i-day_file-photo.jpg
--------------------------------------------------------------------------------
/snapshots/attention_and_alpha_scores copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/snapshots/attention_and_alpha_scores copy.png
--------------------------------------------------------------------------------
/web_app/tmp/578211-gettyimages-542930526.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/578211-gettyimages-542930526.jpg
--------------------------------------------------------------------------------
/web_app/tmp/1200px-Kaldari_Stellaria_media_01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/1200px-Kaldari_Stellaria_media_01.jpg
--------------------------------------------------------------------------------
/web_app/tmp/1800x1200_cat_relaxing_on_patio_other.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/1800x1200_cat_relaxing_on_patio_other.jpg
--------------------------------------------------------------------------------
/web_app/tmp/01-guina-nationalgeographic_2750749.adapt.1900.1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/01-guina-nationalgeographic_2750749.adapt.1900.1.jpg
--------------------------------------------------------------------------------
/web_app/tmp/Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skumarr53/Image-Caption-Generation-using-Fastai/HEAD/web_app/tmp/Charlock_2010_05_12_Chesterfield_CuckooWAy_KivertonPark_061p2.jpg
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .Coo_dataset
3 | .ipynb_checkpoints
4 | .vscode
5 | caption_datasets
6 | exp
7 | flicker8k-dataset
8 | models
9 | exp
10 | modules_old
11 | runs
12 | web_app/models
13 | BEST_checkpoint_Vocab_5_cap_per_img_2_min_word_freq_withoutFineTuning.pth.tar
14 | checkpoint_Vocab_5_cap_per_img_2_min_word_freq_yesturday.pth.tar
15 | checkpoint_Vocab_5_cap_per_img_2_min_word_freq.pth.tar
16 | Main-checkpoint.ipynb
17 | main-Finalized_LanguageModel.ipynb
18 | nb_prepare_WordEmb.py
19 | BEST_checkpoint_Vocab_5_cap_per_img_2_min_word_freq.pth.tar
20 | Article.md
21 | Untitled.ipynb
22 |
--------------------------------------------------------------------------------
/fastext_wordembedding.py:
--------------------------------------------------------------------------------
1 | from tqdm import tqdm
2 | import numpy as np
3 | import fasttext as ft
4 | import json
5 | import pickle
6 | from pdb import set_trace
7 |
8 | emb_path = '/home/skumar/.nlp_wordembeddings/cc.en.300.bin'
9 | word_map_path = '/home/skumar/DataScience/Projects_Section/Projects_Working/Image_Captioning_Pytorch/flicker8k-dataset/Vocab_5_cap_per_img_2_min_word_freq.json' # word map index dict path
10 | embExport_pkl_path = '/home/skumar/DataScience/Projects_Section/Projects_Working/Image_Captioning_Pytorch/flicker8k-dataset/Fastext_embedd_wordMap.pkl'
11 | emb_dim = 300
12 |
13 | # get words in the wordmap with index
14 |
15 | with open(word_map_path,'r') as j:
16 | word_map = json.load(j)
17 |
18 | # create a dictionary of words and corresponding verctor array
19 | word_emb = np.zeros((len(word_map),emb_dim))
20 | missing = ['NA']*len(word_map)
21 |
22 | # load fasttext word vectors
23 | en_vecs = ft.load_model(str(emb_path))
24 |
25 | for i,k in tqdm(enumerate(word_map)):
26 | if k in en_vecs.get_words():
27 | word_emb[i] = en_vecs.get_word_vector(k)
28 | else:
29 | missing[i] = k
30 |
31 | print(missing)
32 |
33 | with open(embExport_pkl_path,'wb') as f:
34 | pickle.dump(word_emb, f, 2)
--------------------------------------------------------------------------------
/web_app/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Caption Generator Demo
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
23 |
24 |
{% block content %}{% endblock %}
25 |
26 |
27 |
--------------------------------------------------------------------------------
/web_app/static/css/main.css:
--------------------------------------------------------------------------------
1 | .img-preview {
2 | margin-left: auto;
3 | margin-right: auto;
4 | width: 256px;
5 | height: 256px;
6 | position: relative;
7 | border: 5px solid #F8F8F8;
8 | box-shadow: 0px 2px 4px 0px rgba(0, 0, 0, 0.1);
9 | margin-top: 1em;
10 | margin-bottom: 1em;
11 | }
12 |
13 | .img-preview>div {
14 | margin-left: auto;
15 | margin-right: auto;
16 | width: 100%;
17 | height: 100%;
18 | background-size: 256px 256px;
19 | background-repeat: no-repeat;
20 | background-position: center;
21 | }
22 |
23 | input[type="file"] {
24 | display: none;
25 | }
26 |
27 | .upload-label{
28 | display: inline-block;
29 | padding: 12px 30px;
30 | border-radius: .4rem;
31 | background: cadetblue;
32 | color: #fff;
33 | font-size: 1em;
34 | transition: all .4s;
35 | cursor: pointer;
36 | }
37 |
38 | .upload-label:hover{
39 | background: #34495E;
40 | color: darkgray;
41 | font-weight: bold;
42 | }
43 |
44 | .loader {
45 | border: 8px solid #f3f3f3; /* Light grey */
46 | border-top: 8px solid maroon; /* Blue */
47 | border-radius: 50%;
48 | width: 50px;
49 | height: 50px;
50 | animation: spin 1s linear infinite;
51 | }
52 |
53 | @keyframes spin {
54 | 0% { transform: rotate(0deg); }
55 | 100% { transform: rotate(360deg); }
56 | }
--------------------------------------------------------------------------------
/web_app/static/js/main.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function () {
2 | // Init
3 | $('.image-section').hide();
4 | $('.loader').hide();
5 | $('#result').hide();
6 |
7 | // Upload Preview
8 | function readURL(input) {
9 | if (input.files && input.files[0]) {
10 | var reader = new FileReader();
11 | reader.onload = function (e) {
12 | $('#imagePreview').css('background-image', 'url(' + e.target.result + ')');
13 | $('#imagePreview').hide();
14 | $('#imagePreview').fadeIn(650);
15 | }
16 | reader.readAsDataURL(input.files[0]);
17 | }
18 | }
19 | $("#imageUpload").change(function () {
20 | $('.image-section').show();
21 | $('#btn-predict').show();
22 | $('#result').text('');
23 | $('#result').hide();
24 | readURL(this);
25 | });
26 |
27 | // Predict
28 | $('#btn-predict').click(function () {
29 | var form_data = new FormData($('#upload-file')[0]);
30 |
31 | // Show loading animation
32 | $(this).hide();
33 | $('.loader').show();
34 |
35 | // Make prediction by calling api /predict
36 | $.ajax({
37 | type: 'POST',
38 | url: '/predict',
39 | data: form_data,
40 | contentType: false,
41 | cache: false,
42 | processData: false,
43 | async: true,
44 | success: function (data) {
45 | // Get and display the result
46 | $('.loader').hide();
47 | $('#result').fadeIn(600);
48 | $('#result').text(data);
49 | console.log('Success!');
50 | },
51 | });
52 | });
53 |
54 | });
55 |
--------------------------------------------------------------------------------
/web_app/application.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, render_template, flash, request, jsonify, Markup
2 | import os
3 | import torch
4 | from pathlib2 import Path
5 | import logging
6 | from modules.Beam_search import *
7 | from PIL import Image
8 | import json
9 | from torchvision import transforms
10 | #import settings
11 |
12 | #set devise
13 | device = 'cpu'
14 | #defaults.device = torch.device('cpu')
15 |
16 | # Load model architecture and parameters
17 | path = Path()
18 |
19 | checkpoint_path = "models/Best_model.pth"
20 |
21 | # Vocab dict loading
22 | vocab = json.load((path/"static/Vocab_5_cap_per_img_2_min_word_freq.json").open('rb'))
23 | ind_str = dict(zip(vocab.values(),vocab.keys()))
24 |
25 | # tranformation
26 | transformations = transforms.Compose([
27 | transforms.Resize((224,224)),
28 | transforms.ToTensor(),
29 | transforms.Normalize([0.5238, 0.5003, 0.4718], [0.3159, 0.3091, 0.3216])])
30 |
31 |
32 | # set flask params
33 | app = Flask(__name__)
34 |
35 |
36 | app.config["IMAGE_UPLOADS"] = 'tmp'
37 |
38 | @app.errorhandler(500)
39 | def server_error(e):
40 | logging.exception('some eror')
41 | return """
42 | And internal error {}
43 | """.format(e), 500
44 |
45 | @app.route("/", methods=['GET'])
46 | def startup():
47 | return render_template('index.html') # pred_class
48 |
49 | @app.route('/predict', methods=["GET",'POST'])
50 | def predict():
51 | if request.method == "POST":
52 | # if not request.files:
53 | image = request.files['file']
54 | upload_path = os.path.join(app.config["IMAGE_UPLOADS"], image.filename)
55 | image.save(upload_path)
56 | caps = beam_search(checkpoint_path,img_path = upload_path, beam_size = 5, vocab = vocab, transforms = transformations,device=device)
57 | caps = [ind_str[x] for x in caps]
58 | return ' '.join(caps)#jsonify(predict=str(pred_class))
59 | #app.logger.info("Image %s classified as %s" % (url, pred_class))
60 | return None
61 |
62 |
63 |
64 | if __name__ == '__main__':
65 | app.run(debug=True)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | attrs==19.3.0
2 | backcall==0.2.0
3 | beautifulsoup4==4.9.1
4 | blis==0.2.4
5 | Bottleneck==1.3.2
6 | brotlipy==0.7.0
7 | certifi==2020.6.20
8 | cffi==1.14.0
9 | chardet==3.0.4
10 | click==7.1.2
11 | conda==4.8.3
12 | conda-package-handling==1.7.0
13 | cryptography==3.2
14 | cycler==0.10.0
15 | cymem==2.0.2
16 | dataclasses==0.6
17 | decorator==4.4.2
18 | fastai==1.0.61
19 | fastprogress==0.2.2
20 | Flask==1.1.2
21 | idna @ file:///tmp/build/80754af9/idna_1593446292537/work
22 | importlib-metadata @ file:///tmp/build/80754af9/importlib-metadata_1593446433964/work
23 | ipykernel @ file:///tmp/build/80754af9/ipykernel_1596206602906/work/dist/ipykernel-5.3.4-py3-none-any.whl
24 | ipython @ file:///tmp/build/80754af9/ipython_1593447367857/work
25 | ipython-genutils==0.2.0
26 | itsdangerous==1.1.0
27 | jedi @ file:///tmp/build/80754af9/jedi_1592841914522/work
28 | Jinja2==2.11.2
29 | joblib @ file:///tmp/build/80754af9/joblib_1594236160679/work
30 | jsonschema==3.2.0
31 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1594826976318/work
32 | jupyter-core==4.6.3
33 | kiwisolver==1.2.0
34 | MarkupSafe==1.1.1
35 | matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-base_1595353103837/work
36 | murmurhash==1.0.2
37 | nltk @ file:///tmp/build/80754af9/nltk_1592496090529/work
38 | numexpr==2.7.1
39 | numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1596233718326/work
40 | olefile==0.46
41 | packaging==20.4
42 | pandas @ file:///tmp/build/80754af9/pandas_1592841660978/work
43 | parso==0.7.0
44 | pathlib2==2.3.5
45 | pexpect==4.8.0
46 | pickleshare==0.7.5
47 | Pillow @ file:///tmp/build/80754af9/pillow_1594307298074/work
48 | plac @ file:///tmp/build/80754af9/plac_1594261902054/work
49 | preshed==2.0.1
50 | prompt-toolkit==3.0.5
51 | protobuf==3.12.4
52 | ptyprocess==0.6.0
53 | pycosat==0.6.3
54 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
55 | Pygments==2.6.1
56 | pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1594392929924/work
57 | pyparsing==2.4.7
58 | pyrsistent==0.16.0
59 | PySocks==1.7.1
60 | python-dateutil==2.8.1
61 | pytz==2020.1
62 | PyYAML==5.3.1
63 | pyzmq==19.0.1
64 | regex @ file:///tmp/build/80754af9/regex_1593435572336/work
65 | requests @ file:///tmp/build/80754af9/requests_1592841827918/work
66 | ruamel-yaml==0.15.87
67 | scikit-learn @ file:///tmp/build/80754af9/scikit-learn_1592502866951/work
68 | scipy @ file:///tmp/build/80754af9/scipy_1592930497347/work
69 | six==1.15.0
70 | soupsieve==2.0.1
71 | spacy==2.1.8
72 | srsly==0.1.0
73 | tensorboardX==2.1
74 | thinc==7.0.8
75 | threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl
76 | torch==1.4.0
77 | torchvision==0.5.0
78 | tornado==6.0.4
79 | tqdm @ file:///tmp/build/80754af9/tqdm_1593446365756/work
80 | traitlets==4.3.3
81 | urllib3==1.25.9
82 | wasabi==0.2.2
83 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
84 | Werkzeug==1.0.1
85 | zipp==3.1.0
86 |
--------------------------------------------------------------------------------
/web_app/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %} {% block content %}
2 |
3 | Image Caption Generator
4 |
5 |
6 |
7 |
14 |
15 |
16 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
88 |
89 | {% endblock %}
90 |
--------------------------------------------------------------------------------
/notebook2script.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import json,fire,re
4 | from pathlib import Path
5 | import io
6 |
7 | def is_export(cell):
8 | if cell['cell_type'] != 'code': return False
9 | src = cell['source']
10 | if len(src) == 0 or len(src[0]) < 7: return False
11 | #import pdb; pdb.set_trace()
12 | return re.match(r'^\s*#\s*export\s*$', src[0], re.IGNORECASE) is not None
13 |
14 | def getSortedFiles(allFiles, upTo=None):
15 | '''Returns all the notebok files sorted by name.
16 | allFiles = True : returns all files
17 | = '*_*.ipynb' : returns this pattern
18 | upTo = None : no upper limit
19 | = filter : returns all files up to 'filter' included
20 | The sorting optioj is important to ensure that the notebok are executed in correct order.
21 | '''
22 | import glob
23 | ret = []
24 | if (allFiles==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
25 | if (isinstance(allFiles,str)): ret = glob.glob(allFiles)
26 | if 0==len(ret):
27 | print('WARNING: No files found')
28 | return ret
29 | if upTo is not None: ret = [f for f in ret if str(f)<=str(upTo)]
30 | return sorted(ret)
31 |
32 | def notebook2script(fname=None, allFiles=None, upTo=None):
33 | '''Finds cells starting with `#export` and puts them into a new module
34 | + allFiles: convert all files in the folder
35 | + upTo: convert files up to specified one included
36 |
37 | ES:
38 | notebook2script --allFiles=True # Parse all files
39 | notebook2script --allFiles=nb* # Parse all files starting with nb*
40 | notebook2script --upTo=10 # Parse all files with (name<='10')
41 | notebook2script --allFiles=*_*.ipynb --upTo=10 # Parse all files with an '_' and (name<='10')
42 | '''
43 | # initial checks
44 | if (allFiles is None) and (upTo is not None): allFiles=True # Enable allFiles if upTo is present
45 | if (fname is None) and (not allFiles): print('Should provide a file name')
46 | if not allFiles: notebook2scriptSingle(fname)
47 | else:
48 | print('Begin...')
49 | [notebook2scriptSingle(f) for f in getSortedFiles(allFiles,upTo)]
50 | print('...End')
51 |
52 |
53 | def notebook2scriptSingle(fname):
54 | "Finds cells starting with `#export` and puts them into a new module"
55 | fname = Path(fname)
56 | fname_out = f"nb_{fname.stem.split('_')[0]}.py"
57 | main_dic = json.load(open(fname,'r',encoding="utf-8"))
58 | code_cells = [c for c in main_dic['cells'] if is_export(c)]
59 | module = f'''
60 | #################################################
61 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
62 | #################################################
63 | # file to edit: dev_nb/{fname.name}
64 |
65 | '''
66 | for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n'
67 | # remove trailing spaces
68 | module = re.sub(r' +$', '', module, flags=re.MULTILINE)
69 | if not (fname.parent/'exp').exists(): (fname.parent/'exp').mkdir()
70 | output_path = fname.parent/'exp'/fname_out
71 | with io.open(output_path, "w", encoding="utf-8") as f:
72 | f.write(module[:-2])
73 | print(f"Converted {fname} to {output_path}")
74 |
75 | if __name__ == '__main__': fire.Fire(notebook2script)
76 |
77 |
--------------------------------------------------------------------------------
/modules/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from pdb import set_trace
4 |
5 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6 |
7 | def beam_search(learn, img,vocab = None, beam_size = 5):
8 | with torch.no_grad():
9 | k = beam_size
10 |
11 | ## imput tensor preparation
12 | img = img.unsqueeze(0) #treating as batch of size 1
13 |
14 | ## model prepartion
15 | mod = learn.model
16 |
17 | # encoder output
18 | encoder_out = mod.encoder(img)
19 | encoder_dim = encoder_out.size(-1)
20 | num_pixels = encoder_out.size(1)
21 |
22 | # expand or repeat 'k' time
23 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim)
24 |
25 | # Tensor to store top k previous words at each step; now they're just
26 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1)
27 |
28 | # Tensor to store top k sequences; now they're just
29 | seqs = k_prev_words # (k, 1)
30 |
31 | # Tensor to store top k sequences' scores; now they're just 0
32 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1)
33 |
34 | # Lists to store completed sequences and scores
35 | complete_seqs = list()
36 | complete_seqs_scores = list()
37 |
38 | # Start decoding
39 | step = 1
40 | h, c = mod.decoder.init_hidden_state(encoder_out)
41 |
42 | references = list()
43 | hypotheses = list()
44 |
45 | # s is a number less than or equal to k, because sequences are removed from this process once they hit
46 | while True:
47 | embeddings = mod.decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim)
48 | awe, _ = mod.decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels)
49 | gate = mod.decoder.sigmoid(mod.decoder.f_beta(h))
50 | awe = (gate * awe)
51 |
52 | h, c = mod.decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c))
53 | scores = mod.decoder.fc(h)
54 | scores = F.log_softmax(scores, dim=1)
55 |
56 |
57 | # Add scores to prev scores
58 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size)
59 |
60 | # For the first step, all k points will have the same scores (since same k previous words, h, c)
61 | if step == 1:
62 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s)
63 | else:
64 | # Unroll and find top scores, and their unrolled indices
65 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s)
66 |
67 | # Convert unrolled indices to actual indices of scores
68 | prev_word_inds = top_k_words / len(vocab) # (s)
69 | next_word_inds = top_k_words % len(vocab) # (s)
70 |
71 | # Add new words to sequences
72 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words
73 |
74 | # Which sequences are incomplete (didn't reach )?
75 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
76 | next_word != vocab['']]
77 |
78 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
79 |
80 | # Set aside complete sequences
81 | if len(complete_inds) > 0:
82 | complete_seqs.extend(seqs[complete_inds].tolist())
83 | complete_seqs_scores.extend(top_k_scores[complete_inds])
84 | k -= len(complete_inds) # reduce beam length accordingly
85 |
86 | # Proceed with incomplete sequences
87 | if k == 0:
88 | break
89 | seqs = seqs[incomplete_inds]
90 | h = h[prev_word_inds[incomplete_inds]]
91 | c = c[prev_word_inds[incomplete_inds]]
92 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
93 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
94 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
95 |
96 |
97 | # Break if things have been going on too long
98 | if step > 50:
99 | break
100 | step += 1
101 |
102 | i = complete_seqs_scores.index(max(complete_seqs_scores))
103 | seq = complete_seqs[i]
104 |
105 | # Hypotheses
106 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}])
107 |
108 | return hypotheses
109 |
--------------------------------------------------------------------------------
/web_app/Beam_search.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from modules.model import Encoder, DecoderWithAttention
4 | from nltk.translate.bleu_score import corpus_bleu
5 | from PIL import Image
6 | import torch.nn.functional as F
7 | from pdb import set_trace
8 |
9 |
10 | def beam_search(checkpoint_path, img_path, beam_size = 5, vocab = None, transforms = None, device = None):
11 |
12 | k = beam_size
13 | ## imput tensor preparation
14 | img = Image.open(img_path)
15 | if transforms is not None: img = transforms(img)
16 | img = img.unsqueeze(0) #treating as batch of size 1
17 |
18 | # Checkpoint loading
19 | checkpoint = torch.load(checkpoint_path, map_location=str(device))
20 | decoder = checkpoint['decoder']
21 | decoder = decoder.to(device)
22 | decoder.eval()
23 | encoder = checkpoint['encoder']
24 | encoder = encoder.to(device)
25 | encoder.eval()
26 |
27 |
28 | # encoder output
29 | encoder_out = encoder(img)
30 | encoder_dim = encoder_out.size(-1)
31 | num_pixels = encoder_out.size(1)
32 |
33 | # expand or repeat 'k' time
34 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim)
35 |
36 | # Tensor to store top k previous words at each step; now they're just
37 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1)
38 |
39 | # Tensor to store top k sequences; now they're just
40 | seqs = k_prev_words # (k, 1)
41 |
42 | # Tensor to store top k sequences' scores; now they're just 0
43 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1)
44 |
45 | # Lists to store completed sequences and scores
46 | complete_seqs = list()
47 | complete_seqs_scores = list()
48 |
49 | # Start decoding
50 | step = 1
51 | h, c = decoder.init_hidden_state(encoder_out)
52 |
53 | hypotheses = list()
54 |
55 | # s is a number less than or equal to k, because sequences are removed from this process once they hit
56 | while True:
57 |
58 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim)
59 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels)
60 | gate = decoder.sigmoid(decoder.f_beta(h))
61 | awe = (gate * awe)
62 |
63 | h, c = decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c))
64 | scores = decoder.fc(h)
65 | scores = F.log_softmax(scores, dim=1)
66 |
67 |
68 | # Add scores to prev scores
69 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size)
70 |
71 | # For the first step, all k points will have the same scores (since same k previous words, h, c)
72 | if step == 1:
73 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s)
74 | else:
75 | # Unroll and find top scores, and their unrolled indices
76 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s)
77 |
78 | # Convert unrolled indices to actual indices of scores
79 | prev_word_inds = top_k_words / len(vocab) # (s)
80 | next_word_inds = top_k_words % len(vocab) # (s)
81 |
82 | # Add new words to sequences
83 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words
84 |
85 | # Which sequences are incomplete (didn't reach )?
86 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
87 | next_word != vocab['']]
88 |
89 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
90 |
91 | # Set aside complete sequences
92 | if len(complete_inds) > 0:
93 | complete_seqs.extend(seqs[complete_inds].tolist())
94 | complete_seqs_scores.extend(top_k_scores[complete_inds])
95 | k -= len(complete_inds) # reduce beam length accordingly
96 |
97 | # Proceed with incomplete sequences
98 | if k == 0:
99 | break
100 | seqs = seqs[incomplete_inds]
101 | h = h[prev_word_inds[incomplete_inds]]
102 | c = c[prev_word_inds[incomplete_inds]]
103 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
104 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
105 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
106 |
107 |
108 |
109 | # Break if things have been going on too long
110 | if step > 50:
111 | break
112 | step += 1
113 |
114 |
115 | i = complete_seqs_scores.index(max(complete_seqs_scores))
116 | seq = complete_seqs[i]
117 |
118 | # Hypotheses
119 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}])
120 | hypotheses = hypotheses[0]
121 | return hypotheses
--------------------------------------------------------------------------------
/modules/Beam_search.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from nltk.translate.bleu_score import corpus_bleu
4 | from PIL import Image
5 | import torch.nn.functional as F
6 | from pdb import set_trace
7 |
8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9 |
10 | def beam_search(learn, img_path, beam_size = 5, vocab = None, transforms = None):
11 |
12 | k = beam_size
13 | ## imput tensor preparation
14 | img = Image.open(img_path)
15 | if transforms is not None: img = transforms(img)
16 | img = img.unsqueeze(0) #treating as batch of size 1
17 | img = img.to(device)
18 |
19 | # Checkpoint loading
20 | decoder = learn.model.decoder
21 | decoder = decoder.to(device)
22 | decoder.eval()
23 | encoder = learn.model.encoder
24 | encoder = encoder.to(device)
25 | encoder.eval()
26 |
27 |
28 | # encoder output
29 | encoder_out = encoder(img)
30 | encoder_dim = encoder_out.size(-1)
31 | encoder_out = encoder_out.view(1, -1, encoder_dim)
32 |
33 | # expand or repeat 'k' time
34 | num_pixels = encoder_out.size(1)
35 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim)
36 |
37 | # Tensor to store top k previous words at each step; now they're just
38 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1)
39 |
40 | # Tensor to store top k sequences; now they're just
41 | seqs = k_prev_words # (k, 1)
42 |
43 | # Tensor to store top k sequences' scores; now they're just 0
44 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1)
45 |
46 | # Lists to store completed sequences and scores
47 | complete_seqs = list()
48 | complete_seqs_scores = list()
49 |
50 | # Start decoding
51 | step = 1
52 | h, c = decoder.init_hidden_state(encoder_out)
53 |
54 | hypotheses = list()
55 |
56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit
57 | while True:
58 |
59 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim)
60 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels)
61 | gate = decoder.sigmoid(decoder.f_beta(h))
62 | awe = (gate * awe)
63 |
64 | h, c = decoder.decode_step(torch.cat([embeddings, awe], dim=1), (h, c))
65 | scores = decoder.fc(h)
66 | scores = F.log_softmax(scores, dim=1)
67 |
68 |
69 | # Add scores to prev scores
70 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size)
71 |
72 | # For the first step, all k points will have the same scores (since same k previous words, h, c)
73 | if step == 1:
74 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s)
75 | else:
76 | # Unroll and find top scores, and their unrolled indices
77 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s)
78 |
79 | # Convert unrolled indices to actual indices of scores
80 | prev_word_inds = top_k_words / len(vocab) # (s)
81 | next_word_inds = top_k_words % len(vocab) # (s)
82 |
83 | # Add new words to sequences
84 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words
85 |
86 | # Which sequences are incomplete (didn't reach )?
87 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
88 | next_word != vocab['']]
89 |
90 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
91 |
92 | # Set aside complete sequences
93 | if len(complete_inds) > 0:
94 | complete_seqs.extend(seqs[complete_inds].tolist())
95 | complete_seqs_scores.extend(top_k_scores[complete_inds])
96 | k -= len(complete_inds) # reduce beam length accordingly
97 |
98 | # Proceed with incomplete sequences
99 | if k == 0:
100 | break
101 | seqs = seqs[incomplete_inds]
102 | h = h[prev_word_inds[incomplete_inds]]
103 | c = c[prev_word_inds[incomplete_inds]]
104 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
105 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
106 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
107 |
108 |
109 |
110 | # Break if things have been going on too long
111 | if step > 50:
112 | break
113 | step += 1
114 |
115 |
116 | i = complete_seqs_scores.index(max(complete_seqs_scores))
117 | seq = complete_seqs[i]
118 |
119 | # Hypotheses
120 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}])
121 | hypotheses = hypotheses[0]
122 | return hypotheses
--------------------------------------------------------------------------------
/web_app/modules/Beam_search.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from ori_models import Encoder, DecoderWithAttention
4 | from nltk.translate.bleu_score import corpus_bleu
5 | from PIL import Image
6 | import torch.nn.functional as F
7 | from pdb import set_trace
8 |
9 |
10 | def beam_search(checkpoint_path, img_path, beam_size = 5, vocab = None, transforms = None, device = None):
11 |
12 | k = beam_size
13 | ## imput tensor preparation
14 | img = Image.open(img_path)
15 | if transforms is not None: img = transforms(img)
16 | img = img.unsqueeze(0) #treating as batch of size 1
17 |
18 | # Checkpoint loading
19 | checkpoint = torch.load(checkpoint_path, map_location=str(device))
20 | decoder = checkpoint['decoder']
21 | decoder = decoder.to(device)
22 | decoder.eval()
23 | encoder = checkpoint['encoder']
24 | encoder = encoder.to(device)
25 | encoder.eval()
26 |
27 |
28 | # encoder output
29 | encoder_out = encoder(img)
30 | encoder_dim = encoder_out.size(-1)
31 | encoder_out = encoder_out.view(1, -1, encoder_dim)
32 |
33 | # expand or repeat 'k' time
34 | num_pixels = encoder_out.size(1)
35 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim)
36 |
37 | # Tensor to store top k previous words at each step; now they're just
38 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1)
39 |
40 | # Tensor to store top k sequences; now they're just
41 | seqs = k_prev_words # (k, 1)
42 |
43 | # Tensor to store top k sequences' scores; now they're just 0
44 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1)
45 |
46 | # Lists to store completed sequences and scores
47 | complete_seqs = list()
48 | complete_seqs_scores = list()
49 |
50 | # Start decoding
51 | step = 1
52 | h, c = decoder.init_hidden_state(encoder_out)
53 |
54 | hypotheses = list()
55 |
56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit
57 | while True:
58 |
59 | embeddings = decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim)
60 | awe, _ = decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels)
61 | gate = decoder.sigmoid(decoder.f_beta(h))
62 | awe = (gate * awe)
63 |
64 | h, c = decoder.decode_step(torch.cat([embeddings, awe], dim=1), (h, c))
65 | scores = decoder.fc(h)
66 | scores = F.log_softmax(scores, dim=1)
67 |
68 |
69 | # Add scores to prev scores
70 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size)
71 |
72 | # For the first step, all k points will have the same scores (since same k previous words, h, c)
73 | if step == 1:
74 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s)
75 | else:
76 | # Unroll and find top scores, and their unrolled indices
77 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s)
78 |
79 | # Convert unrolled indices to actual indices of scores
80 | prev_word_inds = top_k_words / len(vocab) # (s)
81 | next_word_inds = top_k_words % len(vocab) # (s)
82 |
83 | # Add new words to sequences
84 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words
85 |
86 | # Which sequences are incomplete (didn't reach )?
87 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
88 | next_word != vocab['']]
89 |
90 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
91 |
92 | # Set aside complete sequences
93 | if len(complete_inds) > 0:
94 | complete_seqs.extend(seqs[complete_inds].tolist())
95 | complete_seqs_scores.extend(top_k_scores[complete_inds])
96 | k -= len(complete_inds) # reduce beam length accordingly
97 |
98 | # Proceed with incomplete sequences
99 | if k == 0:
100 | break
101 | seqs = seqs[incomplete_inds]
102 | h = h[prev_word_inds[incomplete_inds]]
103 | c = c[prev_word_inds[incomplete_inds]]
104 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
105 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
106 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
107 |
108 |
109 |
110 | # Break if things have been going on too long
111 | if step > 50:
112 | break
113 | step += 1
114 |
115 |
116 | i = complete_seqs_scores.index(max(complete_seqs_scores))
117 | seq = complete_seqs[i]
118 |
119 | # Hypotheses
120 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}])
121 | hypotheses = hypotheses[0]
122 | return hypotheses
--------------------------------------------------------------------------------
/modules/model.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F, init
3 | from torchvision import transforms, models
4 | import torch
5 | import random
6 | from pdb import set_trace
7 |
8 |
9 | device =torch.device("cuda" if torch.cuda.is_available() else "cpu")
10 |
11 |
12 |
13 | # create a embedding layer
14 | def create_emb(embedding_array):
15 | emb = nn.Embedding(len(word_map),embedding_dim)
16 | emb.weight.data = torch.from_numpy(embedding_array).float()
17 | return emb
18 |
19 | class Encoder(nn.Module):
20 | def __init__(self,encode_img_size, fine_tune = False):
21 | super(Encoder, self).__init__()
22 | self.enc_imgsize = encode_img_size
23 | resnet = models.resnet101(pretrained=True)
24 | self.encoder = nn.Sequential(*list(resnet.children())[:-2]) # removing final Linear layer
25 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encode_img_size,encode_img_size))
26 | self.fine_tune = fine_tune
27 | self.fine_tune_h()
28 |
29 | def fine_tune_h(self):
30 | """
31 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder.
32 | :param fine_tune: Allow?
33 | """
34 | for p in self.encoder.parameters():
35 | p.requires_grad = False
36 |
37 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4
38 | for c in list(self.encoder.children())[5:]:
39 | for p in c.parameters():
40 | p.requires_grad = self.fine_tune
41 |
42 | def forward(self,X):
43 | out = self.encoder(X) # X is tensor of size (batch size, 3 (RGB), input height, width)
44 | out = self.adaptive_pool(out) # output (batch_size, 2048, encoded_image_size, encoded_image_size)
45 | out = out.permute(0, 2, 3, 1)
46 | out = out.view(out.size(0), -1, out.size(3))
47 | return out
48 |
49 | class Decoder(nn.Module):
50 | def __init__(self,attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5, pretrained_embedding = None,teacher_forcing_ratio = 0):
51 | super(Decoder, self).__init__()
52 | self.attention_dim = attention_dim
53 | self.embed_dim = embed_dim
54 | self.decoder_dim = decoder_dim
55 | self.encoder_dim = encoder_dim
56 | self.vocab_size = vocab_size
57 | self.teacher_forcing_ratio = teacher_forcing_ratio
58 | self.dropout = nn.Dropout(dropout)
59 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim)
60 | self.embedding = nn.Embedding(vocab_size,embed_dim)
61 | self.lstm = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) #use
62 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell
63 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell
64 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # gate
65 | self.pretrained_embedding = pretrained_embedding
66 | self.sigmoid = nn.Sigmoid()
67 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary
68 | self.init_weights()
69 |
70 | def init_weights(self):
71 | """
72 | Initilizes some parametes with values from the uniform Dist
73 |
74 | """
75 | self.embedding.weight.data.uniform_(0.1, 0.1)
76 | self.fc.bias.data.fill_(0)
77 | self.fc.weight.data.uniform_(-0.1,0.1)
78 |
79 | # Kaiming initialization
80 | #init.kaiming_normal_(self.init_h.weight, mode='fan_in')
81 | #init.kaiming_normal_(self.init_c.weight, mode='fan_in')
82 | #init.kaiming_normal_(self.f_beta.weight, mode='fan_in')
83 | #init.kaiming_normal_(self.fc.weight, mode='fan_in')
84 |
85 | def pretrained(self):
86 | if self.pretrained_embedding is not None:
87 | self.embedding.weight.data = torch.from_numpy(self.pretrained_embedding)
88 |
89 | def init_hidden_state(self, encoder_out):
90 |
91 | mean_encoder_out = encoder_out.mean(dim=1)
92 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim)
93 | c = self.init_c(mean_encoder_out)
94 | return h, c
95 |
96 | def forward(self,encoder_out, encoded_captions,decode_lengths,inds):
97 | batch_size = encoder_out.size(0)
98 | encoder_dim = encoder_out.size(-1)
99 | vocab_size = self.vocab_size
100 | num_pixels = encoder_out.size(1)
101 | #embeddings = self.embedding(encoded_captions)
102 |
103 | ## initililize hidden encoding
104 | h, c = self.init_hidden_state(encoder_out)
105 |
106 | #dec_out = torch.zeros(1,batch_size,self.decoder_dim).to(device) #uncomment for teacher forcing
107 |
108 | decode_lengths = decode_lengths - 1
109 |
110 | max_len = max(decode_lengths).item()
111 |
112 |
113 | # Create tensors to hold word predicion scores and alphas
114 | predictions = torch.zeros(batch_size, max_len, vocab_size)
115 | alphas = torch.zeros(batch_size, max_len, num_pixels)
116 |
117 | for t in range(max_len):
118 | batch_size_t = sum([l.item() > t for l in decode_lengths])
119 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t],
120 | h[:batch_size_t])
121 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim)
122 | attention_weighted_encoding = gate * attention_weighted_encoding
123 |
124 | # teacher forcing
125 | use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False
126 |
127 |
128 | inp_emb = self.embedding(encoded_captions[:batch_size_t,t]).float() if (use_teacher_forcing or t==0) else self.embedding(prev_word[:batch_size_t]).float()
129 | #self.emb2dec_dim((embeddings[:batch_size_t, t, :]).float()) use syntax for teacher forcing
130 | #inp_emb = inp_emb if (use_teacher_forcing or t==0) else dec_out.squeeze(0)[:batch_size_t] #uncomment to add teacher forcing
131 |
132 | h, c = self.lstm(
133 | torch.cat([inp_emb, attention_weighted_encoding], dim=1),
134 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim)
135 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size)
136 | predictions[:batch_size_t,t , :] = preds
137 | alphas[:batch_size_t, t, :] = alpha
138 |
139 | _,prev_word = preds.max(dim=-1)
140 | return predictions,decode_lengths, alphas, inds
141 |
142 | class Attention(nn.Module):
143 | def __init__(self, encoder_dim, decoder_dim, attention_dim):
144 | super(Attention, self).__init__()
145 |
146 | self.enc_att = nn.Linear(encoder_dim,attention_dim)
147 | self.dec_att = nn.Linear(decoder_dim,attention_dim)
148 | self.att = nn.Linear(attention_dim,1)
149 | self.relu = nn.ReLU()
150 | self.softmax = nn.Softmax(dim=1)
151 |
152 | # kaiming
153 | #init.kaiming_normal_(self.enc_att.weight, mode='fan_in')
154 | #init.kaiming_normal_(self.dec_att.weight, mode='fan_in')
155 | #init.kaiming_normal_(self.att.weight, mode='fan_in')
156 |
157 | def forward(self,encoder_out, decoder_hidden):
158 | encoder_att = self.enc_att(encoder_out)
159 | decoder_att = self.dec_att(decoder_hidden)
160 | att = self.att(self.relu(encoder_att + decoder_att.unsqueeze(1))).squeeze(2) #testing added batchnorm
161 | alpha = self.softmax(att)
162 | attention_weighted_encoding = (encoder_out*alpha.unsqueeze(2)).sum(dim=1)
163 |
164 | return attention_weighted_encoding, alpha
--------------------------------------------------------------------------------
/modules/ori_models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torchvision
4 | from pdb import set_trace
5 |
6 |
7 | class Encoder(nn.Module):
8 | """
9 | Encoder.
10 | """
11 |
12 | def __init__(self, encoded_image_size=14):
13 | super(Encoder, self).__init__()
14 | self.enc_image_size = encoded_image_size
15 |
16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101
17 |
18 | # Remove linear and pool layers (since we're not doing classification)
19 | modules = list(resnet.children())[:-2]
20 | self.resnet = nn.Sequential(*modules)
21 |
22 | # Resize image to fixed size to allow input images of variable size
23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size))
24 |
25 | self.fine_tune()
26 |
27 | def forward(self, images):
28 | """
29 | Forward propagation.
30 |
31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size)
32 | :return: encoded images
33 | """
34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32)
35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size)
36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048)
37 | return out
38 |
39 | def fine_tune(self, fine_tune=True):
40 | """
41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder.
42 |
43 | :param fine_tune: Allow?
44 | """
45 | for p in self.resnet.parameters():
46 | p.requires_grad = False
47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4
48 | for c in list(self.resnet.children())[5:]:
49 | for p in c.parameters():
50 | p.requires_grad = fine_tune
51 |
52 |
53 | class Attention(nn.Module):
54 | """
55 | Attention Network.
56 | """
57 |
58 | def __init__(self, encoder_dim, decoder_dim, attention_dim):
59 | """
60 | :param encoder_dim: feature size of encoded images
61 | :param decoder_dim: size of decoder's RNN
62 | :param attention_dim: size of the attention network
63 | """
64 | super(Attention, self).__init__()
65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image
66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output
67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed
68 | self.relu = nn.ReLU()
69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights
70 |
71 | def forward(self, encoder_out, decoder_hidden):
72 | """
73 | Forward propagation.
74 |
75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim)
77 | :return: attention weighted encoding, weights
78 | """
79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim)
80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim)
81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels)
82 | alpha = self.softmax(att) # (batch_size, num_pixels)
83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim)
84 |
85 | return attention_weighted_encoding, alpha
86 |
87 |
88 | class DecoderWithAttention(nn.Module):
89 | """
90 | Decoder.
91 | """
92 |
93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5):
94 | """
95 | :param attention_dim: size of attention network
96 | :param embed_dim: embedding size
97 | :param decoder_dim: size of decoder's RNN
98 | :param vocab_size: size of vocabulary
99 | :param encoder_dim: feature size of encoded images
100 | :param dropout: dropout
101 | """
102 | super(DecoderWithAttention, self).__init__()
103 |
104 | self.encoder_dim = encoder_dim
105 | self.attention_dim = attention_dim
106 | self.embed_dim = embed_dim
107 | self.decoder_dim = decoder_dim
108 | self.vocab_size = vocab_size
109 | self.dropout = dropout
110 |
111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network
112 |
113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer
114 | self.dropout = nn.Dropout(p=self.dropout)
115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell
116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell
117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell
118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate
119 | self.sigmoid = nn.Sigmoid()
120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary
121 | self.init_weights() # initialize some layers with the uniform distribution
122 |
123 | def init_weights(self):
124 | """
125 | Initializes some parameters with values from the uniform distribution, for easier convergence.
126 | """
127 | self.embedding.weight.data.uniform_(-0.1, 0.1)
128 | self.fc.bias.data.fill_(0)
129 | self.fc.weight.data.uniform_(-0.1, 0.1)
130 |
131 | def load_pretrained_embeddings(self, embeddings):
132 | """
133 | Loads embedding layer with pre-trained embeddings.
134 |
135 | :param embeddings: pre-trained embeddings
136 | """
137 | self.embedding.weight = nn.Parameter(embeddings)
138 |
139 | def fine_tune_embeddings(self, fine_tune=True):
140 | """
141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings).
142 |
143 | :param fine_tune: Allow?
144 | """
145 | for p in self.embedding.parameters():
146 | p.requires_grad = fine_tune
147 |
148 | def init_hidden_state(self, encoder_out):
149 | """
150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images.
151 |
152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
153 | :return: hidden state, cell state
154 | """
155 | mean_encoder_out = encoder_out.mean(dim=1)
156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim)
157 | c = self.init_c(mean_encoder_out)
158 | return h, c
159 |
160 | def forward(self, encoder_out, encoded_captions, caption_lengths):
161 | """
162 | Forward propagation.
163 |
164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim)
165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length)
166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1)
167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices
168 | """
169 |
170 | batch_size = encoder_out.size(0)
171 | encoder_dim = encoder_out.size(-1)
172 | vocab_size = self.vocab_size
173 |
174 | # Flatten image
175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim)
176 | num_pixels = encoder_out.size(1)
177 |
178 | # Sort input data by decreasing lengths; why? apparent below
179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
180 | encoder_out = encoder_out[sort_ind]
181 | encoded_captions = encoded_captions[sort_ind]
182 |
183 | # Embedding
184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim)
185 |
186 | # Initialize LSTM state
187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim)
188 |
189 | # We won't decode at the position, since we've finished generating as soon as we generate
190 | # So, decoding lengths are actual lengths - 1
191 | decode_lengths = (caption_lengths - 1).tolist()
192 |
193 | # Create tensors to hold word predicion scores and alphas
194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device)
195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device)
196 |
197 | # At each time-step, decode by
198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output
199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding
200 | for t in range(max(decode_lengths)):
201 | batch_size_t = sum([l > t for l in decode_lengths])
202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t],
203 | h[:batch_size_t])
204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim)
205 | attention_weighted_encoding = gate * attention_weighted_encoding
206 | h, c = self.decode_step(
207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1),
208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim)
209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size)
210 | predictions[:batch_size_t, t, :] = preds
211 | alphas[:batch_size_t, t, :] = alpha
212 |
213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind
214 |
--------------------------------------------------------------------------------
/web_app/modules/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torchvision
4 | from pdb import set_trace
5 |
6 |
7 | class Encoder(nn.Module):
8 | """
9 | Encoder.
10 | """
11 |
12 | def __init__(self, encoded_image_size=14):
13 | super(Encoder, self).__init__()
14 | self.enc_image_size = encoded_image_size
15 |
16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101
17 |
18 | # Remove linear and pool layers (since we're not doing classification)
19 | modules = list(resnet.children())[:-2]
20 | self.resnet = nn.Sequential(*modules)
21 |
22 | # Resize image to fixed size to allow input images of variable size
23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size))
24 |
25 | self.fine_tune()
26 |
27 | def forward(self, images):
28 | """
29 | Forward propagation.
30 |
31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size)
32 | :return: encoded images
33 | """
34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32)
35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size)
36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048)
37 | return out
38 |
39 | def fine_tune(self, fine_tune=True):
40 | """
41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder.
42 |
43 | :param fine_tune: Allow?
44 | """
45 | for p in self.resnet.parameters():
46 | p.requires_grad = False
47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4
48 | for c in list(self.resnet.children())[5:]:
49 | for p in c.parameters():
50 | p.requires_grad = fine_tune
51 |
52 |
53 | class Attention(nn.Module):
54 | """
55 | Attention Network.
56 | """
57 |
58 | def __init__(self, encoder_dim, decoder_dim, attention_dim):
59 | """
60 | :param encoder_dim: feature size of encoded images
61 | :param decoder_dim: size of decoder's RNN
62 | :param attention_dim: size of the attention network
63 | """
64 | super(Attention, self).__init__()
65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image
66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output
67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed
68 | self.relu = nn.ReLU()
69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights
70 |
71 | def forward(self, encoder_out, decoder_hidden):
72 | """
73 | Forward propagation.
74 |
75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim)
77 | :return: attention weighted encoding, weights
78 | """
79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim)
80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim)
81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels)
82 | alpha = self.softmax(att) # (batch_size, num_pixels)
83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim)
84 |
85 | return attention_weighted_encoding, alpha
86 |
87 |
88 | class DecoderWithAttention(nn.Module):
89 | """
90 | Decoder.
91 | """
92 |
93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5):
94 | """
95 | :param attention_dim: size of attention network
96 | :param embed_dim: embedding size
97 | :param decoder_dim: size of decoder's RNN
98 | :param vocab_size: size of vocabulary
99 | :param encoder_dim: feature size of encoded images
100 | :param dropout: dropout
101 | """
102 | super(DecoderWithAttention, self).__init__()
103 |
104 | self.encoder_dim = encoder_dim
105 | self.attention_dim = attention_dim
106 | self.embed_dim = embed_dim
107 | self.decoder_dim = decoder_dim
108 | self.vocab_size = vocab_size
109 | self.dropout = dropout
110 |
111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network
112 |
113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer
114 | self.dropout = nn.Dropout(p=self.dropout)
115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell
116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell
117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell
118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate
119 | self.sigmoid = nn.Sigmoid()
120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary
121 | self.init_weights() # initialize some layers with the uniform distribution
122 |
123 | def init_weights(self):
124 | """
125 | Initializes some parameters with values from the uniform distribution, for easier convergence.
126 | """
127 | self.embedding.weight.data.uniform_(-0.1, 0.1)
128 | self.fc.bias.data.fill_(0)
129 | self.fc.weight.data.uniform_(-0.1, 0.1)
130 |
131 | def load_pretrained_embeddings(self, embeddings):
132 | """
133 | Loads embedding layer with pre-trained embeddings.
134 |
135 | :param embeddings: pre-trained embeddings
136 | """
137 | self.embedding.weight = nn.Parameter(embeddings)
138 |
139 | def fine_tune_embeddings(self, fine_tune=True):
140 | """
141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings).
142 |
143 | :param fine_tune: Allow?
144 | """
145 | for p in self.embedding.parameters():
146 | p.requires_grad = fine_tune
147 |
148 | def init_hidden_state(self, encoder_out):
149 | """
150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images.
151 |
152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
153 | :return: hidden state, cell state
154 | """
155 | mean_encoder_out = encoder_out.mean(dim=1)
156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim)
157 | c = self.init_c(mean_encoder_out)
158 | return h, c
159 |
160 | def forward(self, encoder_out, encoded_captions, caption_lengths):
161 | """
162 | Forward propagation.
163 |
164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim)
165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length)
166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1)
167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices
168 | """
169 |
170 | batch_size = encoder_out.size(0)
171 | encoder_dim = encoder_out.size(-1)
172 | vocab_size = self.vocab_size
173 |
174 | # Flatten image
175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim)
176 | num_pixels = encoder_out.size(1)
177 |
178 | # Sort input data by decreasing lengths; why? apparent below
179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
180 | encoder_out = encoder_out[sort_ind]
181 | encoded_captions = encoded_captions[sort_ind]
182 |
183 | # Embedding
184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim)
185 |
186 | # Initialize LSTM state
187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim)
188 |
189 | # We won't decode at the position, since we've finished generating as soon as we generate
190 | # So, decoding lengths are actual lengths - 1
191 | decode_lengths = (caption_lengths - 1).tolist()
192 |
193 | # Create tensors to hold word predicion scores and alphas
194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device)
195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device)
196 |
197 | # At each time-step, decode by
198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output
199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding
200 | for t in range(max(decode_lengths)):
201 | batch_size_t = sum([l > t for l in decode_lengths])
202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t],
203 | h[:batch_size_t])
204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim)
205 | attention_weighted_encoding = gate * attention_weighted_encoding
206 | h, c = self.decode_step(
207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1),
208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim)
209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size)
210 | predictions[:batch_size_t, t, :] = preds
211 | alphas[:batch_size_t, t, :] = alpha
212 |
213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind
214 |
--------------------------------------------------------------------------------
/web_app/ori_models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torchvision
4 | from pdb import set_trace
5 |
6 |
7 | class Encoder(nn.Module):
8 | """
9 | Encoder.
10 | """
11 |
12 | def __init__(self, encoded_image_size=14):
13 | super(Encoder, self).__init__()
14 | self.enc_image_size = encoded_image_size
15 |
16 | resnet = torchvision.models.resnet101(pretrained=True) # pretrained ImageNet ResNet-101
17 |
18 | # Remove linear and pool layers (since we're not doing classification)
19 | modules = list(resnet.children())[:-2]
20 | self.resnet = nn.Sequential(*modules)
21 |
22 | # Resize image to fixed size to allow input images of variable size
23 | self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size))
24 |
25 | self.fine_tune()
26 |
27 | def forward(self, images):
28 | """
29 | Forward propagation.
30 |
31 | :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size)
32 | :return: encoded images
33 | """
34 | out = self.resnet(images) # (batch_size, 2048, image_size/32, image_size/32)
35 | out = self.adaptive_pool(out) # (batch_size, 2048, encoded_image_size, encoded_image_size)
36 | out = out.permute(0, 2, 3, 1) # (batch_size, encoded_image_size, encoded_image_size, 2048)
37 | return out
38 |
39 | def fine_tune(self, fine_tune=True):
40 | """
41 | Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder.
42 |
43 | :param fine_tune: Allow?
44 | """
45 | for p in self.resnet.parameters():
46 | p.requires_grad = False
47 | # If fine-tuning, only fine-tune convolutional blocks 2 through 4
48 | for c in list(self.resnet.children())[5:]:
49 | for p in c.parameters():
50 | p.requires_grad = fine_tune
51 |
52 |
53 | class Attention(nn.Module):
54 | """
55 | Attention Network.
56 | """
57 |
58 | def __init__(self, encoder_dim, decoder_dim, attention_dim):
59 | """
60 | :param encoder_dim: feature size of encoded images
61 | :param decoder_dim: size of decoder's RNN
62 | :param attention_dim: size of the attention network
63 | """
64 | super(Attention, self).__init__()
65 | self.encoder_att = nn.Linear(encoder_dim, attention_dim) # linear layer to transform encoded image
66 | self.decoder_att = nn.Linear(decoder_dim, attention_dim) # linear layer to transform decoder's output
67 | self.full_att = nn.Linear(attention_dim, 1) # linear layer to calculate values to be softmax-ed
68 | self.relu = nn.ReLU()
69 | self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights
70 |
71 | def forward(self, encoder_out, decoder_hidden):
72 | """
73 | Forward propagation.
74 |
75 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
76 | :param decoder_hidden: previous decoder output, a tensor of dimension (batch_size, decoder_dim)
77 | :return: attention weighted encoding, weights
78 | """
79 | att1 = self.encoder_att(encoder_out) # (batch_size, num_pixels, attention_dim)
80 | att2 = self.decoder_att(decoder_hidden) # (batch_size, attention_dim)
81 | att = self.full_att(self.relu(att1 + att2.unsqueeze(1))).squeeze(2) # (batch_size, num_pixels)
82 | alpha = self.softmax(att) # (batch_size, num_pixels)
83 | attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1) # (batch_size, encoder_dim)
84 |
85 | return attention_weighted_encoding, alpha
86 |
87 |
88 | class DecoderWithAttention(nn.Module):
89 | """
90 | Decoder.
91 | """
92 |
93 | def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5):
94 | """
95 | :param attention_dim: size of attention network
96 | :param embed_dim: embedding size
97 | :param decoder_dim: size of decoder's RNN
98 | :param vocab_size: size of vocabulary
99 | :param encoder_dim: feature size of encoded images
100 | :param dropout: dropout
101 | """
102 | super(DecoderWithAttention, self).__init__()
103 |
104 | self.encoder_dim = encoder_dim
105 | self.attention_dim = attention_dim
106 | self.embed_dim = embed_dim
107 | self.decoder_dim = decoder_dim
108 | self.vocab_size = vocab_size
109 | self.dropout = dropout
110 |
111 | self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network
112 |
113 | self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer
114 | self.dropout = nn.Dropout(p=self.dropout)
115 | self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell
116 | self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell
117 | self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell
118 | self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate
119 | self.sigmoid = nn.Sigmoid()
120 | self.fc = nn.Linear(decoder_dim, vocab_size) # linear layer to find scores over vocabulary
121 | self.init_weights() # initialize some layers with the uniform distribution
122 |
123 | def init_weights(self):
124 | """
125 | Initializes some parameters with values from the uniform distribution, for easier convergence.
126 | """
127 | self.embedding.weight.data.uniform_(-0.1, 0.1)
128 | self.fc.bias.data.fill_(0)
129 | self.fc.weight.data.uniform_(-0.1, 0.1)
130 |
131 | def load_pretrained_embeddings(self, embeddings):
132 | """
133 | Loads embedding layer with pre-trained embeddings.
134 |
135 | :param embeddings: pre-trained embeddings
136 | """
137 | self.embedding.weight = nn.Parameter(embeddings)
138 |
139 | def fine_tune_embeddings(self, fine_tune=True):
140 | """
141 | Allow fine-tuning of embedding layer? (Only makes sense to not-allow if using pre-trained embeddings).
142 |
143 | :param fine_tune: Allow?
144 | """
145 | for p in self.embedding.parameters():
146 | p.requires_grad = fine_tune
147 |
148 | def init_hidden_state(self, encoder_out):
149 | """
150 | Creates the initial hidden and cell states for the decoder's LSTM based on the encoded images.
151 |
152 | :param encoder_out: encoded images, a tensor of dimension (batch_size, num_pixels, encoder_dim)
153 | :return: hidden state, cell state
154 | """
155 | mean_encoder_out = encoder_out.mean(dim=1)
156 | h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim)
157 | c = self.init_c(mean_encoder_out)
158 | return h, c
159 |
160 | def forward(self, encoder_out, encoded_captions, caption_lengths):
161 | """
162 | Forward propagation.
163 |
164 | :param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim)
165 | :param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length)
166 | :param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1)
167 | :return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices
168 | """
169 |
170 | batch_size = encoder_out.size(0)
171 | encoder_dim = encoder_out.size(-1)
172 | vocab_size = self.vocab_size
173 |
174 | # Flatten image
175 | encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim)
176 | num_pixels = encoder_out.size(1)
177 |
178 | # Sort input data by decreasing lengths; why? apparent below
179 | caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
180 | encoder_out = encoder_out[sort_ind]
181 | encoded_captions = encoded_captions[sort_ind]
182 |
183 | # Embedding
184 | embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim)
185 |
186 | # Initialize LSTM state
187 | h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim)
188 |
189 | # We won't decode at the position, since we've finished generating as soon as we generate
190 | # So, decoding lengths are actual lengths - 1
191 | decode_lengths = (caption_lengths - 1).tolist()
192 |
193 | # Create tensors to hold word predicion scores and alphas
194 | predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device)
195 | alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device)
196 |
197 | # At each time-step, decode by
198 | # attention-weighing the encoder's output based on the decoder's previous hidden state output
199 | # then generate a new word in the decoder with the previous word and the attention weighted encoding
200 | for t in range(max(decode_lengths)):
201 | batch_size_t = sum([l > t for l in decode_lengths])
202 | attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t],
203 | h[:batch_size_t])
204 | gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim)
205 | attention_weighted_encoding = gate * attention_weighted_encoding
206 | h, c = self.decode_step(
207 | torch.cat([embeddings[:batch_size_t, t, :].float(), attention_weighted_encoding], dim=1),
208 | (h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim)
209 | preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size)
210 | predictions[:batch_size_t, t, :] = preds
211 | alphas[:batch_size_t, t, :] = alpha
212 |
213 | return predictions, encoded_captions, decode_lengths, alphas, sort_ind
214 |
--------------------------------------------------------------------------------
/modules/custom_callbacks.py:
--------------------------------------------------------------------------------
1 | from statistics import mean
2 | from fastai.callback import Callback
3 | import copy as cp
4 | from torch import nn
5 | from fastai.vision import *
6 | from pathlib import Path, posixpath
7 | from pdb import set_trace
8 | from nltk.translate.bleu_score import corpus_bleu
9 | from torch.nn.utils.rnn import pack_padded_sequence
10 |
11 |
12 |
13 |
14 |
15 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16 |
17 |
18 | def beam_search(mod, img,vocab = None, beam_size = 5):
19 | with torch.no_grad():
20 | k = beam_size
21 |
22 | ## imput tensor preparation
23 | img = img.unsqueeze(0) #treating as batch of size 1
24 |
25 | ## model prepartion
26 | #mod = learn.model
27 |
28 | # encoder output
29 | encoder_out = mod.encoder(img)
30 | encoder_dim = encoder_out.size(-1)
31 | num_pixels = encoder_out.size(1)
32 |
33 | # expand or repeat 'k' time
34 | encoder_out = encoder_out.expand(k, num_pixels, encoder_dim) # (k, num_pixels, encoder_dim)
35 |
36 | # Tensor to store top k previous words at each step; now they're just
37 | k_prev_words = torch.LongTensor([[vocab['']]] * k).to(device) # (k, 1)
38 |
39 | # Tensor to store top k sequences; now they're just
40 | seqs = k_prev_words # (k, 1)
41 |
42 | # Tensor to store top k sequences' scores; now they're just 0
43 | top_k_scores = torch.zeros(k, 1).to(device) # (k, 1)
44 |
45 | # Lists to store completed sequences and scores
46 | complete_seqs = list()
47 | complete_seqs_scores = list()
48 |
49 | # Start decoding
50 | step = 1
51 | h, c = mod.decoder.init_hidden_state(encoder_out)
52 |
53 | references = list()
54 | hypotheses = list()
55 |
56 | # s is a number less than or equal to k, because sequences are removed from this process once they hit
57 | while True:
58 | embeddings = mod.decoder.embedding(k_prev_words).squeeze(1).float() # (s, embed_dim)
59 | awe, _ = mod.decoder.attention(encoder_out, h) # (s, encoder_dim), (s, num_pixels)
60 | gate = mod.decoder.sigmoid(mod.decoder.f_beta(h))
61 | awe = (gate * awe)
62 |
63 | h, c = mod.decoder.lstm(torch.cat([embeddings, awe], dim=1), (h, c))
64 | scores = mod.decoder.fc(h)
65 | scores = F.log_softmax(scores, dim=1)
66 |
67 |
68 | # Add scores to prev scores
69 | scores = top_k_scores.expand_as(scores) + scores # (s, vocab_size)
70 |
71 | # For the first step, all k points will have the same scores (since same k previous words, h, c)
72 | if step == 1:
73 | top_k_scores, top_k_words = scores[0].topk(k, 0, True, True) # (s)
74 | else:
75 | # Unroll and find top scores, and their unrolled indices
76 | top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True) # (s)
77 |
78 | # Convert unrolled indices to actual indices of scores
79 | prev_word_inds = top_k_words / len(vocab) # (s)
80 | next_word_inds = top_k_words % len(vocab) # (s)
81 |
82 | # Add new words to sequences
83 | seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1) # (s, step+1) stroes indices of words
84 |
85 | # Which sequences are incomplete (didn't reach )?
86 | incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
87 | next_word != vocab['']]
88 |
89 | complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
90 |
91 | # Set aside complete sequences
92 | if len(complete_inds) > 0:
93 | complete_seqs.extend(seqs[complete_inds].tolist())
94 | complete_seqs_scores.extend(top_k_scores[complete_inds])
95 | k -= len(complete_inds) # reduce beam length accordingly
96 |
97 | # Proceed with incomplete sequences
98 | if k == 0:
99 | break
100 | seqs = seqs[incomplete_inds]
101 | h = h[prev_word_inds[incomplete_inds]]
102 | c = c[prev_word_inds[incomplete_inds]]
103 | encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
104 | top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
105 | k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
106 |
107 |
108 | # Break if things have been going on too long
109 | if step > 50:
110 | break
111 | step += 1
112 |
113 | i = complete_seqs_scores.index(max(complete_seqs_scores))
114 | seq = complete_seqs[i]
115 |
116 | # Hypotheses
117 | hypotheses.append([w for w in seq if w not in {vocab[''], vocab[''], vocab['']}])
118 |
119 | return hypotheses
120 |
121 |
122 | # Loss Function
123 | def loss_func(input,targets, lamb=1):
124 | pred, decode_lengths, alphas,_ = input
125 | pred = pack_padded_sequence(pred, decode_lengths, batch_first=True).to(device)
126 | targs = pack_padded_sequence(targets, decode_lengths, batch_first=True).to(device)
127 | loss = nn.CrossEntropyLoss().to(device)(pred.data, targs.data.long())
128 | loss += (lamb*((1. - alphas.sum(dim=1)) ** 2.).mean()).to(device) #stochastic attention
129 | return loss #loss(pred.data.long(), targets.data.long())
130 |
131 |
132 |
133 | def topK_accuracy(input, targets, k=5):
134 | """
135 | Computes top-k accuracy, from predicted and true labels.
136 | :param scores: scores from the model
137 | :param targets: true labels
138 | :param k: k in top-k accuracy
139 | :return: top-k accuracy
140 | """
141 | pred, decode_lengths, alphas,_ = input
142 | batch_size = targets.size(0)
143 | scores = pack_padded_sequence(pred, decode_lengths, batch_first=True).to(device)
144 | targ = pack_padded_sequence(targets, decode_lengths, batch_first=True).to(device)
145 | batch_size = targ.data.size(0)
146 | _, ind = scores.data.topk(k, 1, True, True)
147 | correct = ind.eq(targ.data.view(-1, 1).expand_as(ind))
148 | correct_total = correct.view(-1).float().sum() # 0D tensor
149 | return correct_total * (100.0 / batch_size)
150 |
151 |
152 | class TeacherForcingCallback(Callback):
153 | def __init__(self, learn:Learner):
154 | super().__init__()
155 | self.learn = learn
156 |
157 | def on_batch_begin(self, epoch,**kwargs):
158 | self.learn.model.decoder.teacher_forcing_ratio = (10 - epoch) * 0.1 if epoch < 10 else 0
159 |
160 | def on_batch_end(self,**kwargs):
161 | self.learn.model.decoder.teacher_forcing_ratio = 0.
162 |
163 | class GradientClipping(LearnerCallback):
164 | "Gradient clipping during training."
165 | def __init__(self, learn:Learner, clip:float = 0.3):
166 | super().__init__(learn)
167 | self.clip = clip
168 |
169 | def on_backward_end(self, **kwargs):
170 | "Clip the gradient before the optimizer step."
171 | if self.clip: nn.utils.clip_grad_norm_(self.learn.model.parameters(), self.clip)
172 |
173 |
174 |
175 | class BleuMetric(Callback):
176 | def __init__(self,metadata = None, vocab = None):
177 | super().__init__()
178 | self.vocab = vocab
179 | self.metadata = metadata
180 |
181 | def on_epoch_begin(self, **kwargs):
182 | self.bleureferences = list()
183 | self.bleucandidates = list()
184 |
185 |
186 | def on_batch_end(self, last_output, last_target, **kwargs):
187 | pred, decode_lengths,_,inds = last_output
188 | references = self.metadata.numericalized_ref.loc[inds.tolist()]
189 | _,pred_words = pred.max(dim=-1)
190 | pred_words, decode_lengths,references = list(pred_words), decode_lengths, list(references)
191 | hypotheses = list()
192 | for i,cap in enumerate(pred_words): hypotheses.append([x for x in cap.tolist()[:decode_lengths[i]] if x not in {self.vocab[''], self.vocab[''], self.vocab['']}])
193 | #for i,cap in enumerate(pred_words): hypotheses.append([x for x in cap.tolist() if x not in {self.vocab['xxunk'], self.vocab['xxpad'], self.vocab['xxbos'], self.vocab['xxeos'],self.vocab['xxfld'],self.vocab['xxmaj'],self.vocab['xxup'],self.vocab['xxrep'],self.vocab['xxwrep']}])
194 | self.bleureferences.extend(references)
195 | self.bleucandidates.extend(hypotheses)
196 |
197 |
198 |
199 |
200 | def on_epoch_end(self, last_metrics, **kwargs):
201 | assert len(self.bleureferences) == len(self.bleucandidates)
202 | # print('\n'+' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[0]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[0][0]]))
203 | # print(' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[25]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[25][0]]))
204 | # print(' '.join([list(self.vocab.keys())[i-1] for i in self.bleucandidates[99]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.bleureferences[99][0]])+'\n')
205 |
206 | bleu4 = corpus_bleu(self.bleureferences, self.bleucandidates)
207 | return add_metrics(last_metrics,bleu4)
208 |
209 |
210 | class BeamSearchBleu4(LearnerCallback):
211 | def __init__(self,learn:Learner,metadata = None, vocab = None, beam_fn = beam_search):
212 | super().__init__(learn)
213 | self.beam_fn = beam_fn
214 | self.vocab = vocab
215 | self.metadata = metadata
216 |
217 | def on_epoch_begin(self, **kwargs):
218 | self.beamreferences = list()
219 | self.beamcandidates = list()
220 |
221 | def on_batch_end(self,last_input, last_target, **kwargs):
222 | model_copy = cp.deepcopy(self.learn.model)
223 | imgs,_,_,inds = last_input
224 | references = self.metadata.numericalized_ref.loc[inds.tolist()]
225 | references = list(references)
226 | hypotheses = list()
227 | for img in imgs: hypotheses.append(self.beam_fn(model_copy,img,self.vocab)[0])
228 | self.beamreferences.extend(references)
229 | self.beamcandidates.extend(hypotheses)
230 |
231 | def on_epoch_end(self, last_metrics, **kwargs):
232 | assert len(self.beamreferences) == len(self.beamcandidates)
233 | print(' '.join([list(self.vocab.keys())[i-1] for i in self.beamcandidates[8]])+' | '+' '.join([list(self.vocab.keys())[i-1] for i in self.beamreferences[8][0]]))
234 | return add_metrics(last_metrics,corpus_bleu(self.beamreferences, self.beamcandidates))
235 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Image Caption Generation
2 |
3 | #### Implementation of ***Show, Attend and Tell*** paper
4 |
5 | - [Image Caption Generation](#image-caption-generation)
6 | - [Demo](#demo)
7 | - [What's in this repo?](#whats-in-this-repo)
8 | - [Dataset Description](#dataset-description)
9 | - [Input files preparation](#input-files-preparation)
10 | - [Model architecture](#model-architecture)
11 | - [Encoder](#encoder)
12 | - [Attention Layer](#attention-layer)
13 | - [Decoder](#decoder)
14 | - [Training (using Fastai)](#training-using-fastai)
15 | - [Fastai utilities](#fastai-utilities)
16 | - [Training in Stages](#training-in-stages)
17 | - [Model intrpretation](#model-intrpretation)
18 | - [Technology used](#technology-used)
19 | - [Credits](#credits)
20 | - [Creator](#creator)
21 |
22 |
23 | ## Demo
24 | 
25 |
26 | ## What's in this repo?
27 | * [main-Finalized.ipynb](main-Finalized.ipynb) - Notebook with all the preprocessing, data prepartion, and model building training steps.
28 | * [modules/model.py](modules/model.py) - Pytorch implementation of model architecture.
29 | * [modules/custom_callbacks.py](modules/custom_callbacks.py) - Fastai Callback utilities such as Teacher forcing, gradient clipping, loss and validation metric functions.
30 | * [web_app](web_app) - This directory contains model deployment setup files.
31 |
32 | ## Dataset Description
33 |
34 | https://www.kaggle.com/ming666/flicker8k-dataset
35 |
36 | **Flickr8k** Dataset consisting of around 8,000 images that are each paired with five different captions which provide clear descriptions of the salient entities and events. The images were chosen from six different Flickr groups, and tend not to contain any well-known people or locations, but were manually selected to depict a variety of scenes and situations. 6000 are used for training, 1000 for test and 1000 for development.
37 |
38 |
39 |
40 | ## Input files preparation
41 |
42 | #### 1. preparation of vocabulary dictionary.
43 |
44 | The caption labels needs to be converted into numbers as a network does not accept strings as labels. we need a look-up dictionary and store word to numeric mappings in it.
45 |
46 | Along with it, caption lengths are also computed. Caption lengths are used for optimizing training (discussed in detail in the training part).
47 |
48 |
49 | #### 2. Create Dataset class
50 |
51 | In PyTorch, for Deep learning tasks, inputs are fed in batches because of memory constraints. To facilitate this we should create a class called **Dataset** that facilitates batch creation and loading.
52 |
53 | The primary function of Dataset is stores the input paths. This class will be used by Pytorch's *DataLoader()* for loading images in batches.
54 |
55 | #### 3. Create Dataloader object
56 |
57 | The purpose of the **Dataloader** is to load a batch of inputs and labels pairs to be fed into the network.
58 |
59 | It is always a good idea to sort by order of captions length for faster computation. On validation set, **SortSampler** funtion from *Fastai* is used which is built on top of PyTorch's **Sampler**. On the training set, **SortishSampler** that sorts data by order of length with a bit of randomness is used. The sampler return iterator of indices.
60 |
61 |
62 | #### 4. Create Pad_collate function
63 | Since the captions lengths are of different lengths, padding should be added for shorter captions to bring them to same length as PyTorch expects caption lengths to be of the same size.
64 |
65 | Funtion collect samples and return labels tensor with padding. This funtion is passed as an argment( ```collate_fn``` ) while creating ```DataLoader``` object.
66 |
67 | ## Model architecture
68 |
69 | The network architecture consists of three components i.e encoder, Attention, and decoder.
70 |
71 | ### Encoder
72 |
73 | The encoder is a convolution neural network that takes in raw images as input and outputs extracted features as encoded images. The extractor produces **L** (no of output convolution layers) vectors each of **D**-dimension (no of pixels) corresponds to part of the image thus indicates **L** different features at different locations have been identified.
74 |
75 | For the encoder part, I have used **Resnet-101** architecture pre-trained on **Imagenet**. Since Resnet is trained for classifying different objects last Linear layer outputs 1-d prbability tensor. But, our objective is to get feature images so we have to retain only convolution layers and drop the last feed-forward layers.
76 |
77 | ### Attention Layer
78 |
79 | The attention model generates attention weights at every step based on previous step (**h[t-1]**) hidden state vector it receives from decoder. Hidden state carries information about context the caption that has been generated so far.
80 |
81 | ### Decoder
82 |
83 | The decoder is the one that generates captions (one word at a step) has LSTM network architecture. The decoder takes attention weighted hidden state which is an improvised version of decoder hidden state at step **t-1** that tells which part of the image should be focused to generate the next word.
84 |
85 | The flow is depicted in the following image:
86 | 
87 |
88 | #### Model architecture dimensions
89 | ```py
90 | embedding input dimension = 300
91 | attention dimension = 512
92 | decoder dimension = 512
93 | decoder dropout = 0.5
94 | encoder output dimension = 2048
95 | ```
96 |
97 |
98 | ## Training (using Fastai)
99 |
100 | As we are using pre-trained weights for the encoder which has been trained on the Imagenet dataset consisting of images of 1000's of different objects, that most likely includes objects found in our dataset. Therefore, the network need not require much of tuning. On the other hand, the decoder has to learn a lot as it starts language modeling from scratch.
101 |
102 | So, it is better to train just decoder part (fine_tune off) for the first few epochs until we bring both of them to the same level then train the entire network for the next few epochs. In this way, we can save computational time involved in encoder's gradient computation while the decoder takes most of the updation in the initial few epochs.
103 |
104 | Training decoder from scratch requires a lot of computation hence more time. Instead, we can use pre-trained word embeddings (word represent as a numeric vector) to train embedding layer output of which is passed into decoder along with the previous hidden state.
105 |
106 |
107 | ### Fastai utilities
108 |
109 | Fastai is deep learning framework built on top of PyTorch with implementation of variuos state of the art methods. It provides a smooth API making it easier for most important deep learning applications.
110 |
111 | * **lr _finder** - It will do a mock training by going over a large range of learning rates, then plot them against the losses. We will pick a value a bit before the minimum, where the loss still improves.
112 |
113 | 
114 |
115 | * **fit_one_cycle** - Method is implementation of one cycle policy. lr goes up to max and comes down for one cycle of passing through all mini-batches. In one fit cycle takes entire input and divides into batches of size 'bs'. then start with lr_min for the first batch increase gradually for next batches and when the batch number reaches 30 percent of total batches, lr reaches lr_max and then starts going down and reaches lr_min again at last batch.
116 |
117 | The original 1cycle policy has three steps:
118 |
119 | 1. We progressively increase our learning rate from lr_max/div_factor to lr_max and at the same time, we progressively decrease our momentum from mom_max to mom_min.
120 | 2. We do the exact opposite: we progressively decrease our learning rate from lr_max to lr_max/div_factor and at the same time, we progressively increase our momentum from mom_min to mom_max.
121 | 3. We further decrease our learning rate from lr_max/div_factor to lr_max/(div_factor x 100) and we keep momentum steady at mom_max.
122 |
123 |
124 | **Clipping gradients**:
125 | * Gradients can vanish because they are continuously multiplied by numbers less than one. This is called the vanishing gradient problem.
126 |
127 | * It has little effect on learning, but if you have a "bad minibatch" that would cause gradients to explode for some reason, the clipping prevents that iteration from messing up your entire model.
128 |
129 | **Early Stopping**
130 |
131 | * The authors of *Show, Attend and Tell paper* observe that correlation between the loss and the BLEU score breaks down after a point, so they recommend to stop training early on when the BLEU score starts degrading or stops improving.
132 |
133 | ### Training in Stages
134 |
135 | In the first stage, the model is trained with encoder part froze i.e only decoder weights allowed to be updated for faster training. The model was run with a batch of ```25``` images for 12 epochs using ```Adam()``` optimizer with a learning rate of ```4e-04```
136 |
137 | **Results**:
138 | epoch | train_loss | valid_loss | topK_accuracy | bleu_metric | time
139 | ------|------------|------------|---------------|-------------|-----
140 | 0 | 4.649515 | 4.511709 | 58.052895 | 0.106774 | 18:29
141 | 1 | 4.234053 | 4.231682 | 62.291264 | 0.125098 | 17:41
142 | 2 | 4.048578 | 4.089489 | 64.173981 | 0.136820 | 17:13
143 | 3 | 3.918362 | 4.001822 | 65.538071 | 0.142155 | 17:17
144 | 4 | 3.820599 | 3.946904 | 66.606972 | 0.147784 | 16:14
145 | 5 | 3.676066 | 3.904321 | 67.152397 | 0.140314 | 16:08
146 | 6 | 3.632400 | 3.884929 | 67.566093 | 0.145791 | 16:08
147 | 7 | 3.533431 | 3.860997 | 68.075752 | 0.154064 | 16:08
148 | 8 | 3.480697 | 3.852596 | 68.334770 | 0.151733 | 16:08
149 | 9 | 3.406797 | 3.853946 | 68.293274 | 0.150269 | 16:08
150 |
151 | 
152 |
153 |
154 | In the second stage, the model is trained with the encoder part unfrozen condition. The model was run with batch of ```5``` images for 10 epochs using ```Adam()``` optimizer with ```1e-04``` learning rate adopting ```one cycle policy``
155 |
156 | **Results**:
157 |
158 | epoch | train_loss | valid_loss | topK_accuracy | bleu_metric | time
159 | ------|------------|------------|---------------|-------------|-----
160 | 0 | 3.547406 | 3.914244 | 67.741348 | 0.134781 | 40:54
161 | 1 | 3.717416 | 3.972998 | 66.951462 | 0.142118 | 42:23
162 | 2 | 3.721014 | 3.950798 | 67.553833 | 0.150034 | 42:25
163 | 3 | 3.566937 | 3.928402 | 68.072418 | 0.155043 | 41:56
164 | 4 | 3.473794 | 3.910442 | 68.245857 | 0.163102 | 40:16
165 | 5 | 3.350647 | 3.915221 | 68.383591 | 0.161378 | 39:18
166 |
167 |
168 | 
169 |
170 | **Evaluation Beam search**
171 |
172 | **Beam search**: Involves selecting words with top ```k```(beam width) scores rather than a word with the best score at each step. Beam Search is useful for any language modeling problem because it finds the most optimal sequence.
173 |
174 | 
175 |
176 |
177 |
178 | **Validation results**
179 |
180 | Beam Size | Test BLEU-4
181 | ----------|-------------
182 | 1 | 21.8
183 | 3 | 23.46
184 | 5 | 23.9
185 |
186 |
187 | ### Model intrpretation
188 |
189 | 
190 |
191 |
192 | ## Technology used
193 |
194 | 
195 |
196 | [
](https://pytorch.org/)
197 | [
](https://www.fast.ai/)
198 | [
](https://flask.palletsprojects.com/en/1.1.x/)
199 | [
]()
200 | [
](https://jquery.com/)
201 |
202 |
203 |
204 |
205 | ## Credits
206 |
207 | 1. [Show, Attend and Tell - paper (arxiv)](https://arxiv.org/abs/1502.03044)
208 |
209 | 2. [Illustrated Guide to LSTM's and GRU's - Medium](https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21)
210 |
211 | 2. [a-PyTorch-Tutorial-to-Image-Captioning - GitHub](https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning)
212 |
213 |
214 |
215 | ------
216 | ## Creator
217 | [
](https://skumar-djangoblog.herokuapp.com/)
218 |
--------------------------------------------------------------------------------
/web_app/static/Vocab_5_cap_per_img_2_min_word_freq.json:
--------------------------------------------------------------------------------
1 | {"a": 1, "black": 2, "dog": 3, "is": 4, "running": 5, "after": 6, "white": 7, "in": 8, "the": 9, "snow": 10, "chasing": 11, "brown": 12, "through": 13, "two": 14, "dogs": 15, "chase": 16, "each": 17, "other": 18, "across": 19, "snowy": 20, "ground": 21, "play": 22, "together": 23, "low": 24, "lying": 25, "body": 26, "of": 27, "water": 28, "little": 29, "baby": 30, "plays": 31, "croquet": 32, "girl": 33, "next": 34, "to": 35, "truck": 36, "child": 37, "playing": 38, "by": 39, "kid": 40, "front": 41, "car": 42, "with": 43, "put": 44, "and": 45, "ball": 46, "boy": 47, "hammer": 48, "beside": 49, "has": 50, "something": 51, "hot": 52, "pink": 53, "its": 54, "mouth": 55, "holding": 56, "hat": 57, "shirt": 58, "carrying": 59, "while": 60, "walking": 61, "looking": 62, "forward": 63, "along": 64, "beach": 65, "wearing": 66, "collar": 67, "walks": 68, "on": 69, "sand": 70, "near": 71, "large": 72, "ocean": 73, "red": 74, "frisbee": 75, "standing": 76, "sandy": 77, "drops": 78, "disc": 79, "flying": 80, "air": 81, "catching": 82, "dropping": 83, "cyclist": 84, "helmet": 85, "riding": 86, "pavement": 87, "bike": 88, "street": 89, "person": 90, "down": 91, "woman": 92, "wears": 93, "blue": 94, "as": 95, "she": 96, "goes": 97, "for": 98, "ride": 99, "shade": 100, "road": 101, "man": 102, "dressed": 103, "purple": 104, "bandanna": 105, "smiles": 106, "at": 107, "people": 108, "watching": 109, "him": 110, "leather": 111, "chaps": 112, "poses": 113, "camera": 114, "stands": 115, "onlookers": 116, "there": 117, "men": 118, "t": 119, "woodland": 120, "runs": 121, "some": 122, "trees": 123, "young": 124, "dancing": 125, "around": 126, "short": 127, "sleeved": 128, "jeans": 129, "stops": 130, "smile": 131, "dress": 132, "back": 133, "smiling": 134, "braids": 135, "looks": 136, "green": 137, "skier": 138, "yellow": 139, "jacket": 140, "airborne": 141, "above": 142, "mountains": 143, "jumps": 144, "high": 145, "view": 146, "skiing": 147, "very": 148, "it": 149, "though": 150, "he": 151, "doing": 152, "ski": 153, "jump": 154, "pants": 155, "appears": 156, "almost": 157, "fly": 158, "into": 159, "sky": 160, "photographer": 161, "over": 162, "hills": 163, "videotaping": 164, "natural": 165, "landscape": 166, "out": 167, "rolling": 168, "tripod": 169, "lady": 170, "her": 171, "set": 172, "up": 173, "field": 174, "record": 175, "bunch": 176, "girls": 177, "cheerleader": 178, "outfits": 179, "group": 180, "cheerleaders": 181, "parade": 182, "perform": 183, "many": 184, "walk": 185, "uniforms": 186, "boat": 187, "canopy": 188, "floating": 189, "calm": 190, "waters": 191, "roof": 192, "middle": 193, "floats": 194, "lake": 195, "catches": 196, "midair": 197, "terrier": 198, "mix": 199, "leaping": 200, "object": 201, "old": 202, "sitting": 203, "an": 204, "advertisement": 205, "asian": 206, "waiting": 207, "underground": 208, "train": 209, "stop": 210, "sits": 211, "transit": 212, "station": 213, "backlit": 214, "subway": 215, "umbrella": 216, "wall": 217, "blond": 218, "trail": 219, "side": 220, "metal": 221, "fence": 222, "coat": 223, "rural": 224, "flute": 225, "parka": 226, "fenced": 227, "past": 228, "enclosed": 229, "area": 230, "family": 231, "nine": 232, "including": 233, "four": 234, "children": 235, "pose": 236, "brick": 237, "fireplace": 238, "christmas": 239, "tree": 240, "posing": 241, "happy": 242, "couples": 243, "kids": 244, "picture": 245, "sweater": 246, "pointing": 247, "one": 248, "arms": 249, "outstretched": 250, "finger": 251, "pointed": 252, "another": 253, "stares": 254, "from": 255, "behind": 256, "who": 257, "his": 258, "look": 259, "toward": 260, "points": 261, "hallway": 262, "medium": 263, "sized": 264, "small": 265, "larger": 266, "grassy": 267, "big": 268, "tall": 269, "grass": 270, "three": 271, "flowers": 272, "stuffed": 273, "kitten": 274, "garden": 275, "among": 276, "wildflowers": 277, "toy": 278, "cat": 279, "long": 280, "raises": 281, "stripes": 282, "signs": 283, "african": 284, "american": 285, "building": 286, "handicapped": 287, "space": 288, "orange": 289, "backwards": 290, "wet": 291, "stick": 292, "shore": 293, "bounds": 294, "splashing": 295, "off": 296, "paws": 297, "carries": 298, "are": 299, "racing": 300, "race": 301, "track": 302, "greyhounds": 303, "muzzles": 304, "inside": 305, "curb": 306, "railed": 307, "dirt": 308, "run": 309, "baseball": 310, "pitcher": 311, "throws": 312, "player": 313, "glove": 314, "pitches": 315, "male": 316, "sports": 317, "outfit": 318, "tries": 319, "catch": 320, "wades": 321, "rock": 322, "shallow": 323, "reach": 324, "outcropping": 325, "light": 326, "paw": 327, "about": 328, "ready": 329, "swim": 330, "5": 331, "school": 332, "cross": 333, "takes": 334, "outside": 335, "photograph": 336, "taking": 337, "pictures": 338, "women": 339, "stand": 340, "jean": 341, "hand": 342, "chest": 343, "nose": 344, "writing": 345, "hands": 346, "video": 347, "store": 348, "denim": 349, "full": 350, "house": 351, "sunglasses": 352, "flowered": 353, "sit": 354, "nearby": 355, "left": 356, "adults": 357, "display": 358, "case": 359, "digital": 360, "glass": 361, "sides": 362, "holds": 363, "sideways": 364, "arm": 365, "snowboarder": 366, "ramp": 367, "snowboard": 368, "performing": 369, "trick": 370, "making": 371, "icy": 372, "mountain": 373, "climbers": 374, "line": 375, "their": 376, "way": 377, "covered": 378, "background": 379, "facing": 380, "jumping": 381, "hoop": 382, "ring": 383, "using": 384, "laptop": 385, "cellphone": 386, "against": 387, "use": 388, "electronics": 389, "bench": 390, "engage": 391, "electronic": 392, "devices": 393, "races": 394, "number": 395, "6": 396, "greyhound": 397, "muzzle": 398, "six": 399, "tan": 400, "gold": 401, "edge": 402, "cliff": 403, "scouts": 404, "rest": 405, "bluff": 406, "overlooking": 407, "top": 408, "couple": 409, "wooded": 410, "them": 411, "lined": 412, "friends": 413, "stroll": 414, "forest": 415, "path": 416, "woods": 417, "sponsored": 418, "smoking": 419, "tires": 420, "drives": 421, "rain": 422, "driving": 423, "headlights": 424, "advertising": 425, "rainy": 426, "lights": 427, "raceway": 428, "bearded": 429, "whilst": 430, "bicycle": 431, "bicycles": 432, "hair": 433, "gives": 434, "peace": 435, "sign": 436, "cap": 437, "crowd": 438, "waving": 439, "flags": 440, "passing": 441, "under": 442, "bridge": 443, "or": 444, "banners": 445, "gets": 446, "pull": 447, "chair": 448, "meal": 449, "table": 450, "cafe": 451, "restaurant": 452, "getting": 453, "grabs": 454, "seat": 455, "racket": 456, "round": 457, "handle": 458, "backyard": 459, "lawn": 460, "clothing": 461, "yard": 462, "toys": 463, "enjoy": 464, "themselves": 465, "wind": 466, "blows": 467, "trampoline": 468, "blonde": 469, "bicyclists": 470, "between": 471, "wire": 472, "fences": 473, "cyclists": 474, "country": 475, "desert": 476, "bikers": 477, "dry": 478, "land": 479, "climbing": 480, "steep": 481, "hill": 482, "climbs": 483, "great": 484, "dane": 485, "spins": 486, "merry": 487, "go": 488, "watch": 489, "spinning": 490, "playground": 491, "that": 492, "skirt": 493, "golden": 494, "retriever": 495, "towards": 496, "away": 497, "greet": 498, "bus": 499, "onward": 500, "door": 501, "vehicle": 502, "window": 503, "passengers": 504, "peaking": 505, "peering": 506, "hiding": 507, "peeking": 508, "striped": 509, "peeks": 510, "hold": 511, "drinks": 512, "point": 513, "bottles": 514, "several": 515, "this": 516, "photo": 517, "beers": 518, "border": 519, "collie": 520, "bed": 521, "tennis": 522, "climber": 523, "peak": 524, "rocks": 525, "snowcapped": 526, "hikes": 527, "following": 528, "surrounded": 529, "distance": 530, "smoke": 531, "comes": 532, "starts": 533, "broken": 534, "during": 535, "racetrack": 536, "bull": 537, "leaves": 538, "bags": 539, "igloo": 540, "cave": 541, "luggage": 542, "stay": 543, "backpacks": 544, "piled": 545, "ice": 546, "visible": 547, "hole": 548, "starting": 549, "steps": 550, "murky": 551, "putting": 552, "legs": 553, "pond": 554, "busy": 555, "city": 556, "silhouette": 557, "buildings": 558, "crowded": 559, "shot": 560, "turned": 561, "chases": 562, "threw": 563, "interacting": 564, "opposite": 565, "direction": 566, "fetch": 567, "also": 568, "outdoors": 569, "cameras": 570, "third": 571, "river": 572, "skis": 573, "backdrop": 574, "face": 575, "rope": 576, "swimming": 577, "creek": 578, "playfully": 579, "rolls": 580, "int": 581, "chewing": 582, "resting": 583, "game": 584, "elderly": 585, "straw": 586, "alone": 587, "park": 588, "haired": 589, "gray": 590, "dark": 591, "beard": 592, "guitar": 593, "attractive": 594, "head": 595, "ceiling": 596, "wide": 597, "drink": 598, "upward": 599, "reading": 600, "paperback": 601, "book": 602, "elder": 603, "scarf": 604, "older": 605, "reads": 606, "motocross": 607, "motorcycle": 608, "tight": 609, "turn": 610, "motorbike": 611, "bright": 612, "someone": 613, "fall": 614, "rider": 615, "cots": 616, "sleeping": 617, "makeshift": 618, "beds": 619, "hanging": 620, "mountainside": 621, "skateboarder": 622, "skateboard": 623, "hopes": 624, "landing": 625, "teenage": 626, "flipping": 627, "tricks": 628, "colored": 629, "protest": 630, "banner": 631, "staring": 632, "religious": 633, "sidewalk": 634, "poster": 635, "mambo": 636, "image": 637, "teddy": 638, "bear": 639, "showing": 640, "humping": 641, "stool": 642, "trying": 643, "sell": 644, "animals": 645, "selling": 646, "various": 647, "vendor": 648, "sells": 649, "s": 650, "inflatable": 651, "guy": 652, "navy": 653, "shorts": 654, "pool": 655, "football": 656, "trunks": 657, "strange": 658, "diving": 659, "plastic": 660, "slide": 661, "equipment": 662, "shaded": 663, "fire": 664, "baton": 665, "day": 666, "juggling": 667, "flames": 668, "bubbles": 669, "float": 670, "popping": 671, "woven": 672, "toboggan": 673, "right": 674, "shoes": 675, "sandals": 676, "tug": 677, "war": 678, "pulling": 679, "end": 680, "rottweiler": 681, "surf": 682, "wave": 683, "surfers": 684, "surfing": 685, "attempting": 686, "sea": 687, "huge": 688, "sunset": 689, "rides": 690, "biking": 691, "muddy": 692, "slope": 693, "crossing": 694, "buses": 695, "hurrying": 696, "heads": 697, "night": 698, "phone": 699, "talks": 700, "pile": 701, "rail": 702, "stunt": 703, "soccer": 704, "uniform": 705, "kick": 706, "match": 707, "watches": 708, "knee": 709, "female": 710, "break": 711, "driver": 712, "wheel": 713, "thumbs": 714, "before": 715, "polka": 716, "dots": 717, "boots": 718, "hip": 719, "kimono": 720, "purse": 721, "followed": 722, "redheaded": 723, "pedestrians": 724, "casually": 725, "barefoot": 726, "cute": 727, "puppy": 728, "fetches": 729, "chew": 730, "fluffy": 731, "rings": 732, "dock": 733, "laughing": 734, "bucket": 735, "splashes": 736, "pier": 737, "beneath": 738, "dances": 739, "room": 740, "balloons": 741, "floor": 742, "socks": 743, "wooden": 744, "strewn": 745, "strings": 746, "confetti": 747, "wood": 748, "mother": 749, "both": 750, "ledge": 751, "scales": 752, "tent": 753, "being": 754, "enter": 755, "fishing": 756, "setting": 757, "hut": 758, "iced": 759, "tarp": 760, "structure": 761, "surface": 762, "few": 763, "foam": 764, "statue": 765, "liberty": 766, "hats": 767, "take": 768, "photos": 769, "new": 770, "york": 771, "get": 772, "taken": 773, "fight": 774, "jersey": 775, "leaps": 776, "dives": 777, "going": 778, "begin": 779, "sheer": 780, "climb": 781, "pretty": 782, "flat": 783, "rocky": 784, "card": 785, "what": 786, "says": 787, "they": 788, "bathing": 789, "suit": 790, "sprinklers": 791, "sprinkler": 792, "sliding": 793, "paddling": 794, "kiddie": 795, "lone": 796, "flies": 797, "leafless": 798, "pulled": 799, "shovel": 800, "distorted": 801, "open": 802, "ends": 803, "bottom": 804, "just": 805, "reaching": 806, "lands": 807, "reaches": 808, "piano": 809, "sings": 810, "preparing": 811, "sing": 812, "singer": 813, "music": 814, "bares": 815, "teeth": 816, "tongue": 817, "expanse": 818, "curly": 819, "violin": 820, "walls": 821, "posters": 822, "rubbing": 823, "chin": 824, "be": 825, "cover": 826, "teenager": 827, "struggles": 828, "carry": 829, "piggy": 830, "paved": 831, "covering": 832, "arab": 833, "turban": 834, "facial": 835, "style": 836, "headscarf": 837, "grey": 838, "bird": 839, "waves": 840, "roll": 841, "crane": 842, "upon": 843, "onto": 844, "pole": 845, "swings": 846, "silver": 847, "swinging": 848, "bounces": 849, "leotard": 850, "split": 851, "hello": 852, "kitty": 853, "does": 854, "leg": 855, "splits": 856, "leans": 857, "board": 858, "platform": 859, "skateboarders": 860, "eating": 861, "seeds": 862, "eats": 863, "clinging": 864, "stone": 865, "friend": 866, "helps": 867, "help": 868, "learning": 869, "how": 870, "helping": 871, "sweatshirt": 872, "not": 873, "far": 874, "racer": 875, "slightly": 876, "competition": 877, "circuit": 878, "cheering": 879, "grabbing": 880, "ankle": 881, "oklahoma": 882, "score": 883, "fans": 884, "cheer": 885, "athlete": 886, "performs": 887, "herself": 888, "bar": 889, "vault": 890, "vaulting": 891, "upside": 892, "attempts": 893, "feet": 894, "do": 895, "gap": 896, "flip": 897, "blocks": 898, "concrete": 899, "carefully": 900, "crosses": 901, "reflection": 902, "caught": 903, "pajamas": 904, "hall": 905, "hardwood": 906, "floors": 907, "floored": 908, "lit": 909, "toddler": 910, "chairs": 911, "kneel": 912, "folding": 913, "row": 914, "talking": 915, "spotted": 916, "fast": 917, "seated": 918, "stadium": 919, "event": 920, "fill": 921, "packed": 922, "indoor": 923, "dig": 924, "duck": 925, "pet": 926, "chased": 927, "parachute": 928, "lifting": 929, "attached": 930, "parasail": 931, "suspended": 932, "silhouetted": 933, "sunny": 934, "gliding": 935, "backed": 936, "sun": 937, "skateboarding": 938, "slides": 939, "railing": 940, "shines": 941, "skating": 942, "stair": 943, "stairs": 944, "have": 945, "pillow": 946, "having": 947, "skates": 948, "neck": 949, "grazes": 950, "flight": 951, "boats": 952, "boxing": 953, "boxers": 954, "fighting": 955, "box": 956, "faces": 957, "funny": 958, "makes": 959, "glasses": 960, "rusty": 961, "barks": 962, "barking": 963, "bark": 964, "hose": 965, "pouring": 966, "drinking": 967, "which": 968, "squirted": 969, "wings": 970, "swimsuits": 971, "lacrosse": 972, "players": 973, "struggling": 974, "control": 975, "team": 976, "breaks": 977, "hit": 978, "hockey": 979, "type": 980, "sport": 981, "like": 982, "sticks": 983, "guys": 984, "all": 985, "saying": 986, "free": 987, "dinner": 988, "cement": 989, "skate": 990, "prepares": 991, "hugging": 992, "embracing": 993, "hugs": 994, "dunks": 995, "basketball": 996, "make": 997, "basket": 998, "goal": 999, "dunking": 1000, "grinding": 1001, "snowboarding": 1002, "steel": 1003, "homemade": 1004, "made": 1005, "piece": 1006, "log": 1007, "mid": 1008, "leap": 1009, "crystal": 1010, "clear": 1011, "wrapped": 1012, "tape": 1013, "brother": 1014, "superman": 1015, "cape": 1016, "cast": 1017, "material": 1018, "super": 1019, "airplane": 1020, "plane": 1021, "jet": 1022, "runway": 1023, "seen": 1024, "windows": 1025, "headfirst": 1026, "digging": 1027, "poking": 1028, "brush": 1029, "digs": 1030, "greenhouse": 1031, "tools": 1032, "work": 1033, "unfinished": 1034, "fishes": 1035, "rod": 1036, "jetty": 1037, "boys": 1038, "living": 1039, "shown": 1040, "wear": 1041, "shirts": 1042, "computer": 1043, "monitor": 1044, "eat": 1045, "cream": 1046, "adult": 1047, "sat": 1048, "world": 1049, "college": 1050, "tackled": 1051, "jerseys": 1052, "tackle": 1053, "tackling": 1054, "uniformed": 1055, "try": 1056, "fan": 1057, "painting": 1058, "paint": 1059, "within": 1060, "painted": 1061, "bleachers": 1062, "3": 1063, "dalmatians": 1064, "spots": 1065, "palm": 1066, "route": 1067, "stretch": 1068, "displaying": 1069, "paintings": 1070, "framed": 1071, "artwork": 1072, "sale": 1073, "desolate": 1074, "fallen": 1075, "dead": 1076, "church": 1077, "bicyclist": 1078, "aerial": 1079, "scooter": 1080, "suburban": 1081, "neighborhood": 1082, "center": 1083, "rollerblades": 1084, "rollerblader": 1085, "narrow": 1086, "roller": 1087, "grinds": 1088, "rollerblading": 1089, "participate": 1090, "martial": 1091, "arts": 1092, "tournament": 1093, "mat": 1094, "protective": 1095, "gear": 1096, "spar": 1097, "helmets": 1098, "sparring": 1099, "skateboards": 1100, "skater": 1101, "flag": 1102, "headband": 1103, "vest": 1104, "indoors": 1105, "baring": 1106, "handrail": 1107, "foot": 1108, "indian": 1109, "crossed": 1110, "folded": 1111, "ethnic": 1112, "outdoor": 1113, "market": 1114, "shining": 1115, "graffiti": 1116, "plank": 1117, "no": 1118, "hung": 1119, "gallery": 1120, "naked": 1121, "individuals": 1122, "chinese": 1123, "ladies": 1124, "bank": 1125, "lockers": 1126, "retrieves": 1127, "seaweed": 1128, "coming": 1129, "mossy": 1130, "hiker": 1131, "descends": 1132, "hiking": 1133, "bringing": 1134, "leafy": 1135, "gravel": 1136, "amidst": 1137, "autumn": 1138, "others": 1139, "laugh": 1140, "touching": 1141, "laughs": 1142, "fun": 1143, "splash": 1144, "wading": 1145, "shallows": 1146, "bald": 1147, "drag": 1148, "dresses": 1149, "matching": 1150, "jewelry": 1151, "clothes": 1152, "formally": 1153, "jumped": 1154, "sniff": 1155, "somthing": 1156, "walkway": 1157, "biker": 1158, "moving": 1159, "furry": 1160, "doorway": 1161, "leading": 1162, "plants": 1163, "patio": 1164, "potted": 1165, "hairy": 1166, "step": 1167, "cars": 1168, "leave": 1169, "start": 1170, "twilight": 1171, "beginning": 1172, "see": 1173, "racers": 1174, "sled": 1175, "harness": 1176, "pack": 1177, "winter": 1178, "soaking": 1179, "wheelie": 1180, "terrain": 1181, "clutching": 1182, "turquoise": 1183, "guard": 1184, "thin": 1185, "markings": 1186, "ears": 1187, "somersault": 1188, "throwing": 1189, "tossing": 1190, "touches": 1191, "challenging": 1192, "teams": 1193, "quickly": 1194, "meadow": 1195, "surfer": 1196, "crashing": 1197, "follows": 1198, "surfboard": 1199, "laying": 1200, "power": 1201, "lines": 1202, "unusual": 1203, "shaped": 1204, "stump": 1205, "teammates": 1206, "referee": 1207, "breaking": 1208, "artist": 1209, "canvas": 1210, "overalls": 1211, "easel": 1212, "shady": 1213, "travels": 1214, "heavily": 1215, "fangs": 1216, "german": 1217, "shepherd": 1218, "leash": 1219, "somebody": 1220, "leashed": 1221, "falling": 1222, "backward": 1223, "urban": 1224, "partially": 1225, "guitars": 1226, "suits": 1227, "electric": 1228, "musicians": 1229, "curtain": 1230, "frolics": 1231, "mini": 1232, "moped": 1233, "heels": 1234, "pumps": 1235, "lap": 1236, "steers": 1237, "suv": 1238, "drivers": 1239, "jeep": 1240, "cords": 1241, "abseiling": 1242, "repelling": 1243, "flock": 1244, "birds": 1245, "mural": 1246, "pigeons": 1247, "bride": 1248, "newly": 1249, "wife": 1250, "held": 1251, "polo": 1252, "hides": 1253, "overgrown": 1254, "bushes": 1255, "grasses": 1256, "cushion": 1257, "spider": 1258, "patterned": 1259, "base": 1260, "close": 1261, "incoming": 1262, "dirty": 1263, "crashes": 1264, "beyond": 1265, "relaxing": 1266, "trench": 1267, "waits": 1268, "sculpture": 1269, "bikini": 1270, "pulls": 1271, "taller": 1272, "skaters": 1273, "rink": 1274, "scenery": 1275, "bounce": 1276, "filled": 1277, "bouncy": 1278, "castle": 1279, "swimsuit": 1280, "tightrope": 1281, "students": 1282, "campus": 1283, "quad": 1284, "but": 1285, "falls": 1286, "practice": 1287, "asking": 1288, "daughter": 1289, "nice": 1290, "pretending": 1291, "biting": 1292, "limb": 1293, "branch": 1294, "gnawing": 1295, "plant": 1296, "uncut": 1297, "barrel": 1298, "buckets": 1299, "poles": 1300, "seven": 1301, "teenagers": 1302, "wait": 1303, "gate": 1304, "puts": 1305, "gesture": 1306, "raised": 1307, "newspaper": 1308, "kitchen": 1309, "owner": 1310, "retrieving": 1311, "dining": 1312, "paper": 1313, "tile": 1314, "sharing": 1315, "kiss": 1316, "cold": 1317, "kissing": 1318, "taxi": 1319, "passes": 1320, "teen": 1321, "coats": 1322, "tunnel": 1323, "halloween": 1324, "eyes": 1325, "crawls": 1326, "beautiful": 1327, "tube": 1328, "leads": 1329, "trucks": 1330, "five": 1331, "obstacle": 1332, "course": 1333, "elephant": 1334, "draped": 1335, "fabric": 1336, "trunk": 1337, "colorful": 1338, "foliage": 1339, "swimmers": 1340, "move": 1341, "mud": 1342, "puddle": 1343, "nearly": 1344, "identical": 1345, "shirtless": 1346, "couch": 1347, "beaded": 1348, "disk": 1349, "show": 1350, "support": 1351, "wheeled": 1352, "tricycle": 1353, "i": 1354, "pedal": 1355, "wheeler": 1356, "2": 1357, "wheels": 1358, "carnival": 1359, "worker": 1360, "animal": 1361, "prizes": 1362, "amongst": 1363, "shelf": 1364, "riverbank": 1365, "camping": 1366, "patches": 1367, "supplies": 1368, "ditch": 1369, "pain": 1370, "dribbles": 1371, "moves": 1372, "tea": 1373, "coffee": 1374, "gather": 1375, "beverages": 1376, "home": 1377, "outstreached": 1378, "begins": 1379, "spreads": 1380, "plaid": 1381, "print": 1382, "touchdown": 1383, "rescue": 1384, "follow": 1385, "lead": 1386, "flips": 1387, "canoe": 1388, "still": 1389, "fat": 1390, "traveling": 1391, "stepping": 1392, "shaggy": 1393, "foreground": 1394, "inline": 1395, "public": 1396, "dust": 1397, "sooners": 1398, "lifted": 1399, "same": 1400, "athletes": 1401, "mannequins": 1402, "waterfall": 1403, "massive": 1404, "waterfalls": 1405, "guns": 1406, "shooting": 1407, "beige": 1408, "athletic": 1409, "coach": 1410, "nike": 1411, "whistle": 1412, "beagle": 1413, "basset": 1414, "hound": 1415, "costumes": 1416, "frame": 1417, "unseen": 1418, "multi": 1419, "excited": 1420, "audience": 1421, "parallel": 1422, "kicks": 1423, "opponent": 1424, "compete": 1425, "karate": 1426, "belts": 1427, "clapping": 1428, "ran": 1429, "lift": 1430, "skiers": 1431, "shakes": 1432, "shaking": 1433, "hoodie": 1434, "horse": 1435, "jockey": 1436, "performer": 1437, "stage": 1438, "spectators": 1439, "onstage": 1440, "presentation": 1441, "mixing": 1442, "aged": 1443, "dj": 1444, "deck": 1445, "pretends": 1446, "younger": 1447, "tables": 1448, "meet": 1449, "goggles": 1450, "swimmer": 1451, "competitive": 1452, "apple": 1453, "place": 1454, "bag": 1455, "hips": 1456, "backpack": 1457, "tattoo": 1458, "clown": 1459, "safely": 1460, "parachutes": 1461, "landed": 1462, "para": 1463, "practicing": 1464, "craft": 1465, "exercises": 1466, "roughly": 1467, "clearing": 1468, "rough": 1469, "collars": 1470, "extreme": 1471, "backpacker": 1472, "carring": 1473, "uses": 1474, "camcorder": 1475, "films": 1476, "tosses": 1477, "empty": 1478, "bottle": 1479, "dug": 1480, "closely": 1481, "volleyball": 1482, "talk": 1483, "bikinis": 1484, "marketplace": 1485, "ascending": 1486, "horizontal": 1487, "shephard": 1488, "pair": 1489, "spray": 1490, "fountain": 1491, "splashed": 1492, "sprayed": 1493, "closeup": 1494, "screen": 1495, "life": 1496, "unicycle": 1497, "lining": 1498, "coastline": 1499, "location": 1500, "ferry": 1501, "shoreline": 1502, "docked": 1503, "dolphin": 1504, "pokes": 1505, "shoe": 1506, "horseshoe": 1507, "horseshoes": 1508, "corner": 1509, "shoulders": 1510, "these": 1511, "noses": 1512, "numbers": 1513, "dune": 1514, "watercraft": 1515, "swing": 1516, "strikes": 1517, "gestures": 1518, "closed": 1519, "poodle": 1520, "sticking": 1521, "butt": 1522, "inspects": 1523, "tattooed": 1524, "gentleman": 1525, "half": 1526, "relaxes": 1527, "where": 1528, "propped": 1529, "parked": 1530, "topless": 1531, "snowbank": 1532, "kneeling": 1533, "ridge": 1534, "ancient": 1535, "muscle": 1536, "pass": 1537, "tourist": 1538, "sleeveless": 1539, "floppy": 1540, "spaniel": 1541, "shawl": 1542, "cigarette": 1543, "blanket": 1544, "hooded": 1545, "pipe": 1546, "incense": 1547, "bubble": 1548, "machine": 1549, "bites": 1550, "blowing": 1551, "swims": 1552, "chunk": 1553, "mittens": 1554, "throw": 1555, "snowball": 1556, "grins": 1557, "paddles": 1558, "vertical": 1559, "approaching": 1560, "backs": 1561, "tank": 1562, "official": 1563, "marsh": 1564, "mostly": 1565, "swampy": 1566, "rubber": 1567, "chickens": 1568, "chicken": 1569, "eight": 1570, "balls": 1571, "bottoms": 1572, "pushes": 1573, "stroller": 1574, "brunette": 1575, "pushing": 1576, "carriage": 1577, "females": 1578, "hurdle": 1579, "lay": 1580, "hang": 1581, "snowman": 1582, "flower": 1583, "figure": 1584, "costume": 1585, "party": 1586, "blood": 1587, "branches": 1588, "bends": 1589, "alley": 1590, "pairs": 1591, "alleyway": 1592, "cowboy": 1593, "lasso": 1594, "twirling": 1595, "cart": 1596, "(": 1597, ")": 1598, "carrier": 1599, "rack": 1600, "shopping": 1601, "apart": 1602, "camouflage": 1603, "blow": 1604, "varying": 1605, "breeds": 1606, "sledding": 1607, "snowsuit": 1608, "dogsled": 1609, "town": 1610, "leashes": 1611, "strap": 1612, "bite": 1613, "strip": 1614, "steering": 1615, "ship": 1616, "navigating": 1617, "partly": 1618, "cloudy": 1619, "skies": 1620, "sailor": 1621, "sailboat": 1622, "puppet": 1623, "camper": 1624, "van": 1625, "rv": 1626, "peek": 1627, "snake": 1628, "lean": 1629, "different": 1630, "tub": 1631, "bin": 1632, "multicolored": 1633, "pen": 1634, "enclosure": 1635, "pit": 1636, "cut": 1637, "balloon": 1638, "only": 1639, "underwear": 1640, "marker": 1641, "drawing": 1642, "markers": 1643, "safety": 1644, "neon": 1645, "ribbons": 1646, "competing": 1647, "covers": 1648, "vests": 1649, "you": 1650, "hi": 1651, "viz": 1652, "jackets": 1653, "carved": 1654, "returns": 1655, "horses": 1656, "pony": 1657, "stare": 1658, "crumbling": 1659, "structures": 1660, "reflections": 1661, "feild": 1662, "shop": 1663, "mall": 1664, "food": 1665, "court": 1666, "art": 1667, "turns": 1668, "ladder": 1669, "reached": 1670, "mesh": 1671, "headed": 1672, "popsicle": 1673, "frozen": 1674, "treat": 1675, "eyed": 1676, "curious": 1677, "ahead": 1678, "gymnastic": 1679, "ropes": 1680, "harnesses": 1681, "aid": 1682, "balance": 1683, "dangling": 1684, "straps": 1685, "kicking": 1686, "miami": 1687, "university": 1688, "scene": 1689, "classic": 1690, "well": 1691, "kept": 1692, "thrown": 1693, "dancers": 1694, "dance": 1695, "prepare": 1696, "hillside": 1697, "kayaker": 1698, "kayaking": 1699, "kayak": 1700, "spot": 1701, "warmly": 1702, "blurry": 1703, "tag": 1704, "touch": 1705, "riders": 1706, "shoulder": 1707, "motorbikes": 1708, "farm": 1709, "sheep": 1710, "goat": 1711, "places": 1712, "slab": 1713, "twisting": 1714, "graffitied": 1715, "hollywood": 1716, "stars": 1717, "examines": 1718, "bridal": 1719, "wedding": 1720, "ten": 1721, "groom": 1722, "except": 1723, "groomsmen": 1724, "bridesmaids": 1725, "launch": 1726, "cane": 1727, "lays": 1728, "rests": 1729, "stream": 1730, "eye": 1731, "quarter": 1732, "bush": 1733, "brownish": 1734, "photographs": 1735, "canal": 1736, "muzzled": 1737, "#": 1738, "numbered": 1739, "prisoner": 1740, "police": 1741, "officer": 1742, "cop": 1743, "fake": 1744, "mask": 1745, "tie": 1746, "handing": 1747, "papers": 1748, "button": 1749, "masked": 1750, "business": 1751, "attire": 1752, "straight": 1753, "profile": 1754, "blurred": 1755, "barefooted": 1756, "bicycler": 1757, "raced": 1758, "pop": 1759, "weather": 1760, "happily": 1761, "poised": 1762, "candles": 1763, "candle": 1764, "lighting": 1765, "elephants": 1766, "ridden": 1767, "colors": 1768, "turbans": 1769, "eastern": 1770, "give": 1771, "chubby": 1772, "remote": 1773, "peers": 1774, "device": 1775, "pad": 1776, "note": 1777, "writes": 1778, "motorcyclist": 1779, "speed": 1780, "sharp": 1781, "crouches": 1782, "rounds": 1783, "pops": 1784, "bmx": 1785, "without": 1786, "boarding": 1787, "trotting": 1788, "stuck": 1789, "trails": 1790, "pine": 1791, "4": 1792, "atv": 1793, "drive": 1794, "net": 1795, "olympics": 1796, "logo": 1797, "skimpy": 1798, "soaring": 1799, "underneath": 1800, "batting": 1801, "sweaters": 1802, "wade": 1803, "military": 1804, "speak": 1805, "crying": 1806, "bending": 1807, "assist": 1808, "checking": 1809, "dappled": 1810, "walked": 1811, "owners": 1812, "hits": 1813, "bat": 1814, "catcher": 1815, "teal": 1816, "japanese": 1817, "aqua": 1818, "pocket": 1819, "handstand": 1820, "sheets": 1821, "lab": 1822, "below": 1823, "headphones": 1824, "curvy": 1825, "elevator": 1826, "kites": 1827, "capped": 1828, "range": 1829, "doghouse": 1830, "youn": 1831, "camel": 1832, "camels": 1833, "trots": 1834, "agility": 1835, "lifts": 1836, "bare": 1837, "chested": 1838, "launches": 1839, "asleep": 1840, "machines": 1841, "atm": 1842, "money": 1843, "sleeps": 1844, "maroon": 1845, "dalmatian": 1846, "dalmation": 1847, "chews": 1848, "hind": 1849, "lambs": 1850, "counter": 1851, "buying": 1852, "handbag": 1853, "lunch": 1854, "grocery": 1855, "corn": 1856, "cob": 1857, "nibbling": 1858, "ear": 1859, "listens": 1860, "mobile": 1861, "come": 1862, "sets": 1863, "horizon": 1864, "cricket": 1865, "participating": 1866, "opposing": 1867, "pick": 1868, "watermelon": 1869, "picking": 1870, "watermelons": 1871, "flowery": 1872, "stretches": 1873, "mats": 1874, "circle": 1875, "brightly": 1876, "teaches": 1877, "yoga": 1878, "festival": 1879, "learn": 1880, "stretching": 1881, "upwards": 1882, "legged": 1883, "plain": 1884, "puppies": 1885, "slam": 1886, "interested": 1887, "grab": 1888, "donut": 1889, "intertube": 1890, "ties": 1891, "share": 1892, "toast": 1893, "alongside": 1894, "sister": 1895, "mickey": 1896, "mouse": 1897, "angle": 1898, "non": 1899, "professional": 1900, "plate": 1901, "guarding": 1902, "runner": 1903, "softball": 1904, "block": 1905, "khaki": 1906, "pours": 1907, "wine": 1908, "dim": 1909, "shrubbery": 1910, "column": 1911, "belongings": 1912, "pillar": 1913, "time": 1914, "lot": 1915, "balding": 1916, "choppy": 1917, "wetsuit": 1918, "rapids": 1919, "rafting": 1920, "reacts": 1921, "earth": 1922, "loop": 1923, "leaving": 1924, "single": 1925, "file": 1926, "exhibit": 1927, "class": 1928, "columns": 1929, "barbed": 1930, "porch": 1931, "clad": 1932, "cowgirl": 1933, "scantily": 1934, "special": 1935, "dish": 1936, "towel": 1937, "cloth": 1938, "boxer": 1939, "pitbull": 1940, "uphill": 1941, "watched": 1942, "sneakers": 1943, "highway": 1944, "retreiver": 1945, "item": 1946, "ribbon": 1947, "elegant": 1948, "horns": 1949, "heard": 1950, "goats": 1951, "cluster": 1952, "earphones": 1953, "amused": 1954, "surprised": 1955, "foggy": 1956, "strollers": 1957, "pushed": 1958, "first": 1959, "bikes": 1960, "chat": 1961, "fist": 1962, "gloves": 1963, "giant": 1964, "circular": 1965, "perched": 1966, "opening": 1967, "atop": 1968, "kneels": 1969, "sprays": 1970, "push": 1971, "policeman": 1972, "crosswalk": 1973, "directs": 1974, "traffic": 1975, "pedestrian": 1976, "carves": 1977, "chainsaw": 1978, "carving": 1979, "saw": 1980, "reflective": 1981, "racquet": 1982, "blacktop": 1983, "parking": 1984, "printed": 1985, "attempt": 1986, "goalie": 1987, "dreadlocks": 1988, "bowls": 1989, "liquid": 1990, "mug": 1991, "oxford": 1992, "beer": 1993, "sipping": 1994, "lunges": 1995, "irish": 1996, "setter": 1997, "balcony": 1998, "fleece": 1999, "bouncing": 2000, "gated": 2001, "housing": 2002, "hopping": 2003, "magazines": 2004, "interracial": 2005, "master": 2006, "fruit": 2007, "farmers": 2008, "fish": 2009, "connected": 2010, "bamboo": 2011, "paddle": 2012, "father": 2013, "tossed": 2014, "monkey": 2015, "bars": 2016, "jungle": 2017, "gym": 2018, "messily": 2019, "pasta": 2020, "spaghetti": 2021, "mess": 2022, "skiiers": 2023, "dachshund": 2024, "shine": 2025, "customer": 2026, "cones": 2027, "observe": 2028, "pads": 2029, "really": 2030, "closes": 2031, "wand": 2032, "speaks": 2033, "cup": 2034, "dad": 2035, "birthday": 2036, "web": 2037, "cam": 2038, "celebrate": 2039, "cake": 2040, "wintry": 2041, "seating": 2042, "inflated": 2043, "parents": 2044, "skull": 2045, "motorcycles": 2046, "motorcyclists": 2047, "interesting": 2048, "formations": 2049, "shapes": 2050, "licking": 2051, "sniffing": 2052, "desk": 2053, "meeting": 2054, "modern": 2055, "decorated": 2056, "gathered": 2057, "clouds": 2058, "lots": 2059, "raising": 2060, "screaming": 2061, "parachuting": 2062, "ponytail": 2063, "paying": 2064, "lips": 2065, "tool": 2066, "rug": 2067, "carpet": 2068, "checkered": 2069, "tugs": 2070, "tiger": 2071, "picnic": 2072, "hops": 2073, "hike": 2074, "barren": 2075, "bend": 2076, "gymnastics": 2077, "seashore": 2078, "shows": 2079, "fur": 2080, "balances": 2081, "staircase": 2082, "scaling": 2083, "repels": 2084, "spandex": 2085, "studio": 2086, "tops": 2087, "clothed": 2088, "soars": 2089, "slopes": 2090, "surrounding": 2091, "smaller": 2092, "tail": 2093, "sniffs": 2094, "batman": 2095, "makeup": 2096, "streaked": 2097, "giving": 2098, "panel": 2099, "upturned": 2100, "celebrating": 2101, "beret": 2102, "mustache": 2103, "conversation": 2104, "joy": 2105, "name": 2106, "badge": 2107, "chain": 2108, "passenger": 2109, "transportation": 2110, "camo": 2111, "photographed": 2112, "sort": 2113, "darkened": 2114, "sound": 2115, "colander": 2116, "clowns": 2117, "bowl": 2118, "hikers": 2119, "backpacking": 2120, "mountaineers": 2121, "sweat": 2122, "santa": 2123, "sofa": 2124, "claus": 2125, "stony": 2126, "mirror": 2127, "surfs": 2128, "leaning": 2129, "stairway": 2130, "rushing": 2131, "garbage": 2132, "can": 2133, "trash": 2134, "searching": 2135, "cans": 2136, "trashcan": 2137, "cardboard": 2138, "boxes": 2139, "instrument": 2140, "musician": 2141, "&": 2142, "chatting": 2143, "tire": 2144, "rocking": 2145, "deep": 2146, "overhanging": 2147, "overhang": 2148, "phrase": 2149, "springs": 2150, "motion": 2151, "storefront": 2152, "entrance": 2153, "jagged": 2154, "peach": 2155, "enjoys": 2156, "stomach": 2157, "wakeboard": 2158, "read": 2159, "fit": 2160, "miniature": 2161, "pacifier": 2162, "sucking": 2163, "gloved": 2164, "mitt": 2165, "maneuver": 2166, "similarly": 2167, "executes": 2168, "combat": 2169, "facility": 2170, "soldier": 2171, "shaved": 2172, "punch": 2173, "temple": 2174, "spotters": 2175, "boulder": 2176, "approaches": 2177, "showering": 2178, "shower": 2179, "watering": 2180, "source": 2181, "bounding": 2182, "museum": 2183, "zip": 2184, "harnessed": 2185, "cable": 2186, "pulley": 2187, "arena": 2188, "jumpsuit": 2189, "walker": 2190, "bent": 2191, "sloping": 2192, "dunes": 2193, "congregate": 2194, "jockeys": 2195, "keeping": 2196, "wrestle": 2197, "warm": 2198, "intersection": 2199, "beanie": 2200, "aim": 2201, "blocked": 2202, "members": 2203, "fetching": 2204, "teammate": 2205, "quarterback": 2206, "possession": 2207, "spring": 2208, "snows": 2209, "trimmed": 2210, "frog": 2211, "rise": 2212, "summit": 2213, "rails": 2214, "monk": 2215, "wrap": 2216, "robes": 2217, "tattoos": 2218, "bow": 2219, "plushie": 2220, "picks": 2221, "airport": 2222, "overhead": 2223, "casino": 2224, "amusement": 2225, "arcade": 2226, "blindfolds": 2227, "blindfolded": 2228, "o": 2229, "written": 2230, "spanish": 2231, "burning": 2232, "handles": 2233, "wheelbarrow": 2234, "whose": 2235, "been": 2236, "turning": 2237, "tiny": 2238, "cow": 2239, "necklace": 2240, "cliffs": 2241, "boulders": 2242, "band": 2243, "dollar": 2244, "bill": 2245, "bills": 2246, "instruments": 2247, "breath": 2248, "underwater": 2249, "fingers": 2250, "smeared": 2251, "chocolate": 2252, "act": 2253, "silly": 2254, "gas": 2255, "curve": 2256, "placed": 2257, "british": 2258, "union": 2259, "jack": 2260, "novelty": 2261, "backstroke": 2262, "energizer": 2263, "bunny": 2264, "attraction": 2265, "13": 2266, "defenders": 2267, "hurdles": 2268, "tri": 2269, "tents": 2270, "barrier": 2271, "fuzzy": 2272, "creature": 2273, "crab": 2274, "cups": 2275, "marathon": 2276, "runners": 2277, "lane": 2278, "rag": 2279, "dried": 2280, "containing": 2281, "cage": 2282, "pig": 2283, "crawling": 2284, "waring": 2285, "pieces": 2286, "gymnast": 2287, "gymnasium": 2288, "workout": 2289, "flooring": 2290, "infant": 2291, "squatting": 2292, "beverage": 2293, "overweight": 2294, "yawning": 2295, "shoeless": 2296, "homeless": 2297, "winnie": 2298, "pooh": 2299, "sheet": 2300, "returning": 2301, "serve": 2302, "clay": 2303, "signing": 2304, "portrait": 2305, "action": 2306, "created": 2307, "boardwalk": 2308, "too": 2309, "village": 2310, "masks": 2311, "embrace": 2312, "acting": 2313, "garb": 2314, "enjoying": 2315, "downhill": 2316, "cats": 2317, "pug": 2318, "mohawk": 2319, "drums": 2320, "feathers": 2321, "drum": 2322, "reindeer": 2323, "tussle": 2324, "deflated": 2325, "chats": 2326, "string": 2327, "traditional": 2328, "kimonos": 2329, "elaborate": 2330, "butterfly": 2331, "order": 2332, "posed": 2333, "fashion": 2334, "stretched": 2335, "higher": 2336, "jog": 2337, "jogging": 2338, "waterfront": 2339, "winding": 2340, "pitch": 2341, "winds": 2342, "more": 2343, "rugby": 2344, "hula": 2345, "hoops": 2346, "puddles": 2347, "maneuvers": 2348, "link": 2349, "nap": 2350, "tutu": 2351, "waterskies": 2352, "waterskiing": 2353, "waterskier": 2354, "colourful": 2355, "snowboards": 2356, "snowmobile": 2357, "blocking": 2358, "assistance": 2359, "grinning": 2360, "army": 2361, "bra": 2362, "afro": 2363, "fairy": 2364, "raft": 2365, "innertube": 2366, "dinghy": 2367, "monument": 2368, "mountaintop": 2369, "pyramid": 2370, "directions": 2371, "mouths": 2372, "lies": 2373, "completely": 2374, "hide": 2375, "coaster": 2376, "flipped": 2377, "multiple": 2378, "crests": 2379, "motorized": 2380, "wheelchair": 2381, "shelves": 2382, "shops": 2383, "distant": 2384, "spiky": 2385, "things": 2386, "crevice": 2387, "floral": 2388, "wagon": 2389, "dresser": 2390, "applying": 2391, "salon": 2392, "done": 2393, "sparse": 2394, "tattered": 2395, "umbrellas": 2396, "beads": 2397, "billboards": 2398, "pigtails": 2399, "heart": 2400, "fancy": 2401, "robe": 2402, "frowning": 2403, "bagpipe": 2404, "medieval": 2405, "similar": 2406, "trumpet": 2407, "marching": 2408, "fog": 2409, "balancing": 2410, "tractor": 2411, "squirrel": 2412, "drift": 2413, "melting": 2414, "mound": 2415, "upset": 2416, "streaming": 2417, "heavy": 2418, "rowboat": 2419, "rowing": 2420, "boards": 2421, "boogie": 2422, "wake": 2423, "surfboards": 2424, "ninja": 2425, "nude": 2426, "security": 2427, "series": 2428, "beam": 2429, "ad": 2430, "snowboarders": 2431, "cheers": 2432, "dancer": 2433, "batter": 2434, "protects": 2435, "dusk": 2436, "wakeboarder": 2437, "emerges": 2438, "dandelion": 2439, "toe": 2440, "suspenders": 2441, "spread": 2442, "tourists": 2443, "injured": 2444, "color": 2445, "fireworks": 2446, "glow": 2447, "necklaces": 2448, "sparklers": 2449, "china": 2450, "kisses": 2451, "ejected": 2452, "scarves": 2453, "cows": 2454, "herding": 2455, "skeleton": 2456, "decoration": 2457, "sail": 2458, "receives": 2459, "thumb": 2460, "ticket": 2461, "dimly": 2462, "singing": 2463, "club": 2464, "guitarist": 2465, "microphone": 2466, "listening": 2467, "chopsticks": 2468, "pot": 2469, "pan": 2470, "overturned": 2471, "items": 2472, "kilt": 2473, "scottish": 2474, "burgundy": 2475, "own": 2476, "soda": 2477, "sunlight": 2478, "afghan": 2479, "patch": 2480, "shades": 2481, "shadow": 2482, "post": 2483, "teens": 2484, "prom": 2485, "fellow": 2486, "formal": 2487, "skinned": 2488, "lighter": 2489, "protect": 2490, "scratching": 2491, "filling": 2492, "watery": 2493, "approach": 2494, "canoeing": 2495, "fights": 2496, "pouncing": 2497, "boot": 2498, "casting": 2499, "playful": 2500, "heading": 2501, "labeled": 2502, "musher": 2503, "construction": 2504, "drilling": 2505, "works": 2506, "active": 2507, "drill": 2508, "breed": 2509, "smooth": 2510, "iron": 2511, "hunched": 2512, "mans": 2513, "encouraging": 2514, "handed": 2515, "rundown": 2516, "warehouse": 2517, "so": 2518, "waterway": 2519, "wrinkled": 2520, "sight": 2521, "cookie": 2522, "cheered": 2523, "crowds": 2524, "complete": 2525, "finish": 2526, "benches": 2527, "had": 2528, "bandaged": 2529, "checked": 2530, "descending": 2531, "knees": 2532, "piggyback": 2533, "adorned": 2534, "intently": 2535, "travelling": 2536, "crouching": 2537, "defensive": 2538, "position": 2539, "goalkeeper": 2540, "stripped": 2541, "extends": 2542, "skips": 2543, "avoid": 2544, "located": 2545, "archway": 2546, "midst": 2547, "part": 2548, "overlooks": 2549, "valley": 2550, "forested": 2551, "batsman": 2552, "misses": 2553, "unique": 2554, "ravine": 2555, "crevasse": 2556, "nips": 2557, "bared": 2558, "period": 2559, "european": 2560, "stoop": 2561, "trainer": 2562, "wrestling": 2563, "cotton": 2564, "candy": 2565, "sandal": 2566, "cloud": 2567, "footballers": 2568, "victory": 2569, "baseman": 2570, "league": 2571, "rival": 2572, "second": 2573, "hangs": 2574, "flannel": 2575, "streets": 2576, "opponents": 2577, "progress": 2578, "bit": 2579, "bluejeans": 2580, "cameraman": 2581, "pounces": 2582, "males": 2583, "vintage": 2584, "admire": 2585, "fashioned": 2586, "admiring": 2587, "cowboys": 2588, "rodeo": 2589, "bucking": 2590, "bulls": 2591, "props": 2592, "fisherman": 2593, "mist": 2594, "whitewater": 2595, "kayaks": 2596, "pitching": 2597, "practices": 2598, "punches": 2599, "trains": 2600, "husky": 2601, "emerging": 2602, "opens": 2603, "horseback": 2604, "moon": 2605, "cooking": 2606, "bound": 2607, "colliding": 2608, "dinosaur": 2609, "lime": 2610, "corgi": 2611, "obedience": 2612, "concert": 2613, "parasailing": 2614, "workers": 2615, "cash": 2616, "register": 2617, "tips": 2618, "jar": 2619, "tip": 2620, "knit": 2621, "kart": 2622, "feathered": 2623, "caution": 2624, "feather": 2625, "lolly": 2626, "purses": 2627, "cheek": 2628, "tracks": 2629, "pale": 2630, "casual": 2631, "growls": 2632, "tugging": 2633, "playhouse": 2634, "cabin": 2635, "focus": 2636, "oars": 2637, "shoot": 2638, "diner": 2639, "played": 2640, "8": 2641, "huddle": 2642, "musical": 2643, "juggles": 2644, "manicured": 2645, "growling": 2646, "seem": 2647, "doberman": 2648, "twig": 2649, "guards": 2650, "brings": 2651, "buried": 2652, "stones": 2653, "pointy": 2654, "quietly": 2655, "appear": 2656, "navigates": 2657, "sloped": 2658, "himself": 2659, "greenery": 2660, "cookies": 2661, "socializing": 2662, "smokes": 2663, "whist": 2664, "pickup": 2665, "headset": 2666, "visor": 2667, "tags": 2668, "creating": 2669, "billowing": 2670, "crash": 2671, "clings": 2672, "demonstrates": 2673, "pedals": 2674, "upright": 2675, "rear": 2676, "retrieve": 2677, "summer": 2678, "hitting": 2679, "videotaped": 2680, "canyon": 2681, "ceremony": 2682, "pauses": 2683, "alert": 2684, "skinny": 2685, "corridor": 2686, "jumper": 2687, "completes": 2688, "records": 2689, "bungee": 2690, "cord": 2691, "weeds": 2692, "we": 2693, "because": 2694, "rainbow": 2695, "kite": 2696, "raincoat": 2697, "nears": 2698, "elevation": 2699, "juice": 2700, "fencing": 2701, "bundled": 2702, "characters": 2703, "billboard": 2704, "itself": 2705, "everywhere": 2706, "wasteland": 2707, "pebble": 2708, "pebbles": 2709, "rounding": 2710, "artists": 2711, "stopped": 2712, "flooded": 2713, "monster": 2714, "admires": 2715, "bandage": 2716, "hairstyle": 2717, "plaza": 2718, "square": 2719, "training": 2720, "asphalt": 2721, "return": 2722, "sprints": 2723, "leaf": 2724, "geyser": 2725, "casts": 2726, "shrubs": 2727, "wipes": 2728, "wiping": 2729, "belly": 2730, "lush": 2731, "countryside": 2732, "treks": 2733, "peaks": 2734, "flailing": 2735, "teaching": 2736, "officers": 2737, "policemen": 2738, "law": 2739, "speaking": 2740, "formation": 2741, "moss": 2742, "bumpy": 2743, "hay": 2744, "pumpkin": 2745, "star": 2746, "sparkler": 2747, "routine": 2748, "skirts": 2749, "auditorium": 2750, "23": 2751, "barriers": 2752, "aisle": 2753, "shoveling": 2754, "shovels": 2755, "praying": 2756, "hear": 2757, "bands": 2758, "windsurfing": 2759, "snack": 2760, "caps": 2761, "speeds": 2762, "early": 2763, "wristbands": 2764, "overpass": 2765, "attention": 2766, "saris": 2767, "tabby": 2768, "sunshade": 2769, "tropical": 2770, "resort": 2771, "dummy": 2772, "environment": 2773, "human": 2774, "doll": 2775, "gesturing": 2776, "driven": 2777, "boarder": 2778, "geese": 2779, "lower": 2780, "carpeted": 2781, "awning": 2782, "licks": 2783, "tuxedo": 2784, "smock": 2785, "extended": 2786, "scary": 2787, "skills": 2788, "fencers": 2789, "fair": 2790, "egret": 2791, "reeds": 2792, "upper": 2793, "driveway": 2794, "amid": 2795, "spilled": 2796, "sparks": 2797, "bearing": 2798, "gazes": 2799, "slalom": 2800, "poodles": 2801, "interact": 2802, "bone": 2803, "artificial": 2804, "hard": 2805, "tethered": 2806, "flowing": 2807, "wrestler": 2808, "wrestlers": 2809, "bedroom": 2810, "bonnets": 2811, "tuxedos": 2812, "arched": 2813, "pathway": 2814, "son": 2815, "motorboat": 2816, "determined": 2817, "expression": 2818, "mountainous": 2819, "tow": 2820, "becomes": 2821, "waterskis": 2822, "brooms": 2823, "forefront": 2824, "mom": 2825, "depicting": 2826, "jesus": 2827, "shrine": 2828, "carried": 2829, "waterski": 2830, "wakeboarding": 2831, "velvet": 2832, "basement": 2833, "song": 2834, "serious": 2835, "dribbling": 2836, "rafts": 2837, "coaching": 2838, "sprinting": 2839, "bread": 2840, "glides": 2841, "babies": 2842, "lagoon": 2843, "fixing": 2844, "swooping": 2845, "beak": 2846, "vehicles": 2847, "newspapers": 2848, "faucet": 2849, "spigot": 2850, "tap": 2851, "community": 2852, "rollerskating": 2853, "mock": 2854, "headdress": 2855, "earrings": 2856, "kayakers": 2857, "canoes": 2858, "sees": 2859, "wires": 2860, "junk": 2861, "rubble": 2862, "bushy": 2863, "courtyard": 2864, "slacks": 2865, "arabian": 2866, "offstage": 2867, "cycle": 2868, "shouting": 2869, "rimmed": 2870, "award": 2871, "baskets": 2872, "podium": 2873, "awaiting": 2874, "houses": 2875, "boston": 2876, "viewer": 2877, "shiny": 2878, "current": 2879, "soft": 2880, "wilderness": 2881, "nature": 2882, "decorative": 2883, "spraying": 2884, "squirting": 2885, "telescope": 2886, "gun": 2887, "office": 2888, "tickets": 2889, "screams": 2890, "bows": 2891, "yawns": 2892, "groceries": 2893, "rows": 2894, "kicker": 2895, "scenic": 2896, "peaceful": 2897, "morning": 2898, "skyline": 2899, "bracelet": 2900, "sleeves": 2901, "stall": 2902, "shelter": 2903, "burlap": 2904, "sack": 2905, "shadows": 2906, "cartwheel": 2907, "ascends": 2908, "sleds": 2909, "dragging": 2910, "pirate": 2911, "hug": 2912, "vegetables": 2913, "costumed": 2914, "spout": 2915, "skatepark": 2916, "upraised": 2917, "doors": 2918, "spikes": 2919, "ambulance": 2920, "seattle": 2921, "observes": 2922, "hotel": 2923, "solitary": 2924, "moment": 2925, "flops": 2926, "tied": 2927, "carying": 2928, "hardhat": 2929, "length": 2930, "surfboarder": 2931, "dyed": 2932, "industrial": 2933, "terrace": 2934, "waterside": 2935, "snowmobiles": 2936, "helmeted": 2937, "coverings": 2938, "good": 2939, "reception": 2940, "reddish": 2941, "necked": 2942, "knife": 2943, "supports": 2944, "tails": 2945, "sooner": 2946, "marks": 2947, "lipstick": 2948, "torso": 2949, "afternoon": 2950, "reflecting": 2951, "soldiers": 2952, "ignoring": 2953, "checks": 2954, "operating": 2955, "speaker": 2956, "late": 2957, "secured": 2958, "footprints": 2959, "demonstrating": 2960, "massage": 2961, "scuba": 2962, "diver": 2963, "travel": 2964, "polaris": 2965, "cargo": 2966, "causing": 2967, "spiral": 2968, "sporting": 2969, "bib": 2970, "toss": 2971, "labrador": 2972, "called": 2973, "lounge": 2974, "abandoned": 2975, "lamp": 2976, "working": 2977, "weird": 2978, "snowing": 2979, "stripe": 2980, "glacier": 2981, "contents": 2982, "marked": 2983, "gathering": 2984, "computers": 2985, "teacher": 2986, "filmed": 2987, "lecture": 2988, "dotted": 2989, "dot": 2990, "sledge": 2991, "lobby": 2992, "lens": 2993, "ridding": 2994, "shirted": 2995, "blown": 2996, "acrobatic": 2997, "huddled": 2998, "grin": 2999, "romp": 3000, "tulips": 3001, "adjusting": 3002, "incline": 3003, "crouched": 3004, "squat": 3005, "multicolor": 3006, "lie": 3007, "individual": 3008, "crown": 3009, "photographers": 3010, "when": 3011, "festive": 3012, "lease": 3013, "dumps": 3014, "curved": 3015, "pedaling": 3016, "worn": 3017, "wraps": 3018, "objects": 3019, "headscarfs": 3020, "packages": 3021, "traverses": 3022, "test": 3023, "performance": 3024, "zara": 3025, "strike": 3026, "goofy": 3027, "bigger": 3028, "passed": 3029, "halter": 3030, "rushes": 3031, "handlebars": 3032, "observing": 3033, "paints": 3034, "belt": 3035, "whom": 3036, "kicked": 3037, "curiously": 3038, "chewed": 3039, "vacant": 3040, "retrievers": 3041, "bricks": 3042, "laid": 3043, "fairground": 3044, "darker": 3045, "icicle": 3046, "stripy": 3047, "canon": 3048, "oar": 3049, "wild": 3050, "motor": 3051, "campground": 3052, "campsite": 3053, "blazing": 3054, "cobbled": 3055, "than": 3056, "storm": 3057, "drain": 3058, "torn": 3059, "scruffy": 3060, "love": 3061, "language": 3062, "australian": 3063, "pets": 3064, "apartment": 3065, "fedora": 3066, "spiked": 3067, "cycling": 3068, "wrapping": 3069, "spreading": 3070, "flapping": 3071, "tinkerbell": 3072, "jacked": 3073, "muscular": 3074, "quilt": 3075, "saber": 3076, "sword": 3077, "puffy": 3078, "tired": 3079, "grown": 3080, "logs": 3081, "skipping": 3082, "swords": 3083, "height": 3084, "tray": 3085, "pillows": 3086, "spiderman": 3087, "furniture": 3088, "identically": 3089, "bananas": 3090, "collide": 3091, "tumbling": 3092, "posts": 3093, "main": 3094, "sails": 3095, "sailing": 3096, "removing": 3097, "garter": 3098, "piercing": 3099, "earring": 3100, "jogs": 3101, "everyone": 3102, "sad": 3103, "calf": 3104, "labs": 3105, "size": 3106, "sling": 3107, "either": 3108, "nighttime": 3109, "laptops": 3110, "sundress": 3111, "wrestles": 3112, "battling": 3113, "examining": 3114, "soaked": 3115, "keeps": 3116, "rollerskates": 3117, "overlook": 3118, "livestock": 3119, "accompanied": 3120, "telephone": 3121, "booths": 3122, "booth": 3123, "brushes": 3124, "fingerpaints": 3125, "apron": 3126, "decorations": 3127, "squirts": 3128, "strapped": 3129, "dragged": 3130, "droplets": 3131, "oxen": 3132, "performers": 3133, "dive": 3134, "ollie": 3135, "dodges": 3136, "gowns": 3137, "terriers": 3138, "sandbox": 3139, "parasails": 3140, "windsurfer": 3141, "was": 3142, "fell": 3143, "labradoodle": 3144, "rollerskater": 3145, "fort": 3146, "built": 3147, "mouthed": 3148, "bay": 3149, "facepaint": 3150, "parent": 3151, "suitcase": 3152, "themed": 3153, "scratches": 3154, "kind": 3155, "cleaning": 3156, "container": 3157, "pail": 3158, "slip": 3159, "golf": 3160, "india": 3161, "bedspread": 3162, "petting": 3163, "strapless": 3164, "toddlers": 3165, "playpen": 3166, "padded": 3167, "placing": 3168, "jogger": 3169, "louis": 3170, "vuitton": 3171, "bath": 3172, "bathtub": 3173, "bathroom": 3174, "mechanical": 3175, "rabbit": 3176, "goatee": 3177, "floatation": 3178, "florida": 3179, "rally": 3180, "conference": 3181, "battle": 3182, "submerges": 3183, "missing": 3184, "tooth": 3185, "powder": 3186, "mowed": 3187, "call": 3188, "tackles": 3189, "paraglider": 3190, "residential": 3191, "fountains": 3192, "mexican": 3193, "aims": 3194, "engaged": 3195, "sox": 3196, "railroad": 3197, "firefighter": 3198, "hood": 3199, "fireman": 3200, "engine": 3201, "zigzag": 3202, "atvs": 3203, "descent": 3204, "western": 3205, "youth": 3206, "leggings": 3207, "revealing": 3208, "camp": 3209, "thick": 3210, "feeding": 3211, "eyebrows": 3212, "moustache": 3213, "cigars": 3214, "marx": 3215, "equestrian": 3216, "tee": 3217, "theme": 3218, "raise": 3219, "phones": 3220, "cellphones": 3221, "crack": 3222, "symbol": 3223, "change": 3224, "led": 3225, "goose": 3226, "cuts": 3227, "fresh": 3228, "youngsters": 3229, "exercise": 3230, "hugged": 3231, "cooks": 3232, "nightclub": 3233, "form": 3234, "stacks": 3235, "washed": 3236, "apples": 3237, "retaining": 3238, "gondola": 3239, "rowers": 3240, "skyscraper": 3241, "stunts": 3242, "occupied": 3243, "nipple": 3244, "piercings": 3245, "bulldog": 3246, "movie": 3247, "squats": 3248, "member": 3249, "frisbees": 3250, "aquarium": 3251, "seal": 3252, "squeezing": 3253, "surround": 3254, "seems": 3255, "self": 3256, "cheeks": 3257, "seats": 3258, "safari": 3259, "cracked": 3260, "clears": 3261, "ramps": 3262, "arch": 3263, "sweatshirts": 3264, "squirt": 3265, "pasture": 3266, "diaper": 3267, "rods": 3268, "begging": 3269, "whispering": 3270, "gathers": 3271, "firetruck": 3272, "firefighters": 3273, "strips": 3274, "fields": 3275, "hilly": 3276, "attacking": 3277, "greyish": 3278, "mark": 3279, "check": 3280, "kissed": 3281, "blues": 3282, "brothers": 3283, "striking": 3284, "haircut": 3285, "coated": 3286, "happening": 3287, "tv": 3288, "balck": 3289, "relax": 3290, "converse": 3291, "pursued": 3292, "obstacles": 3293, "stride": 3294, "crocodile": 3295, "obama": 3296, "keep": 3297, "misty": 3298, "chalk": 3299, "shocked": 3300, "messenger": 3301, "ink": 3302, "trailing": 3303, "treads": 3304, "images": 3305, "alligator": 3306, "mean": 3307, "bears": 3308, "passerby": 3309, "expressions": 3310, "concerned": 3311, "squinting": 3312, "stuff": 3313, "shoots": 3314, "trip": 3315, "grasps": 3316, "bowler": 3317, "marble": 3318, "attack": 3319, "cook": 3320, "stove": 3321, "intense": 3322, "speeding": 3323, "double": 3324, "captured": 3325, "domino": 3326, "pizza": 3327, "tower": 3328, "licked": 3329, "galloping": 3330, "paddled": 3331, "fighters": 3332, "recently": 3333, "instructor": 3334, "drenched": 3335, "electrical": 3336, "games": 3337, "pattern": 3338, "bales": 3339, "cobblestone": 3340, "island": 3341, "speedo": 3342, "panting": 3343, "sacks": 3344, "u": 3345, "crouch": 3346, "wakeboards": 3347, "saddle": 3348, "trailer": 3349, "shed": 3350, "dramatically": 3351, "vending": 3352, "purchasing": 3353, "lack": 3354, "hundreds": 3355, "defends": 3356, "portable": 3357, "toilets": 3358, "port": 3359, "potties": 3360, "blossoms": 3361, "flowering": 3362, "bodies": 3363, "much": 3364, "exiting": 3365, "tights": 3366, "books": 3367, "blossoming": 3368, "topped": 3369, "loaded": 3370, "footballer": 3371, "wolf": 3372, "if": 3373, "spoon": 3374, "vast": 3375, "find": 3376, "mets": 3377, "oriental": 3378, "skin": 3379, "thatched": 3380, "penske": 3381, "downtown": 3382, "story": 3383, "pinata": 3384, "collared": 3385, "contest": 3386, "awkwardly": 3387, "cardigan": 3388, "standard": 3389, "restaraunt": 3390, "stores": 3391, "brindle": 3392, "forehead": 3393, "paperwork": 3394, "n": 3395, "pro": 3396, "america": 3397, "explosion": 3398, "dusty": 3399, "glider": 3400, "pelican": 3401, "toilet": 3402, "drummer": 3403, "saxophones": 3404, "saxophone": 3405, "married": 3406, "gown": 3407, "7": 3408, "gift": 3409, "refrigerator": 3410, "firing": 3411, "puck": 3412, "site": 3413, "jug": 3414, "swan": 3415, "coloring": 3416, "menus": 3417, "waiter": 3418, "menu": 3419, "unhappy": 3420, "elevated": 3421, "minivan": 3422, "evening": 3423, "signal": 3424, "floaties": 3425, "say": 3426, "cards": 3427, "vine": 3428, "harbor": 3429, "tugboat": 3430, "bagpipes": 3431, "most": 3432, "plates": 3433, "needle": 3434, "alike": 3435, "letter": 3436, "ducks": 3437, "produce": 3438, "starring": 3439, "russell": 3440, "corndogs": 3441, "frames": 3442, "winks": 3443, "winking": 3444, "touched": 3445, "drapped": 3446, "soap": 3447, "tether": 3448, "displayed": 3449, "mime": 3450, "suited": 3451, "oversized": 3452, "presents": 3453, "scared": 3454, "apparatus": 3455, "ornate": 3456, "barricade": 3457, "colorfully": 3458, "tiled": 3459, "carousel": 3460, "model": 3461, "biplane": 3462, "helicopter": 3463, "sizes": 3464, "purchase": 3465, "product": 3466, "pinned": 3467, "displays": 3468, "awards": 3469, "ragged": 3470, "obscured": 3471, "wielding": 3472, "sleigh": 3473, "feature": 3474, "otherwise": 3475, "collection": 3476, "garage": 3477, "howling": 3478, "royal": 3479, "basketballs": 3480, "frolicking": 3481, "loose": 3482, "groups": 3483, "huskies": 3484, "greenish": 3485, "prancing": 3486, "finished": 3487, "rises": 3488, "fiery": 3489, "windy": 3490, "foothills": 3491, "piles": 3492, "prairie": 3493, "numerous": 3494, "suds": 3495, "clean": 3496, "embraces": 3497, "draft": 3498, "waist": 3499, "diners": 3500, "aiming": 3501, "defending": 3502, "crossbones": 3503, "pirates": 3504, "curled": 3505, "local": 3506, "tracksuit": 3507, "swans": 3508, "messy": 3509, "else": 3510, "trim": 3511, "huts": 3512, "boa": 3513, "grilling": 3514, "buy": 3515, "roadway": 3516, "cries": 3517, "sumo": 3518, "tandem": 3519, "yorkie": 3520, "trophy": 3521, "winner": 3522, "fatigues": 3523, "assisting": 3524, "boundary": 3525, "inground": 3526, "devil": 3527, "bug": 3528, "insect": 3529, "washing": 3530, "americans": 3531, "confused": 3532, "scooters": 3533, "tricycles": 3534, "cheerleading": 3535, "sari": 3536, "involving": 3537, "theater": 3538, "dome": 3539, "speckled": 3540, "pins": 3541, "native": 3542, "prize": 3543, "embankment": 3544, "cartwheels": 3545, "were": 3546, "establishment": 3547, "pre": 3548, "cliffside": 3549, "dragon": 3550, "god": 3551, "diapers": 3552, "capes": 3553, "freshly": 3554, "cone": 3555, "roadside": 3556, "clap": 3557, "deer": 3558, "ollies": 3559, "badminton": 3560, "dove": 3561, "applies": 3562, "tiara": 3563, "wig": 3564, "hooping": 3565, "engulfed": 3566, "12": 3567, "offers": 3568, "firemen": 3569, "laps": 3570, "tangled": 3571, "railings": 3572, "ultimate": 3573, "straddles": 3574, "balanced": 3575, "zoo": 3576, "sprint": 3577, "dye": 3578, "laundry": 3579, "chains": 3580, "struggle": 3581, "entering": 3582, "mop": 3583, "newborn": 3584, "television": 3585, "litter": 3586, "prevent": 3587, "plush": 3588, "presses": 3589, "strolls": 3590, "chops": 3591, "handgun": 3592, "pistol": 3593, "used": 3594, "protesters": 3595, "rush": 3596, "gull": 3597, "seagull": 3598, "sorts": 3599, "shadowed": 3600, "will": 3601, "wheat": 3602, "flaming": 3603, "ballet": 3604, "draw": 3605, "crib": 3606, "accordion": 3607, "priest": 3608, "chili": 3609, "escape": 3610, "tiles": 3611, "steam": 3612, "keyboard": 3613, "feeds": 3614, "lollipop": 3615, "eggs": 3616, "easter": 3617, "noodles": 3618, "march": 3619, "turkeys": 3620, "hawaiian": 3621, "carts": 3622, "twin": 3623, "michael": 3624, "jackson": 3625, "trekking": 3626, "hooking": 3627, "railway": 3628, "banks": 3629, "browses": 3630, "ornamental": 3631, "washes": 3632, "nets": 3633, "pierced": 3634, "penguins": 3635, "zombie": 3636, "bouquet": 3637, "cutout": 3638, "barn": 3639, "words": 3640, "28": 3641, "sidelines": 3642, "marking": 3643, "clover": 3644, "rocket": 3645, "armbands": 3646, "thorugh": 3647, "squeeze": 3648, "downward": 3649, "lounging": 3650, "rubs": 3651, "cutting": 3652, "views": 3653, "department": 3654, "st": 3655, "french": 3656, "pigeon": 3657, "crocs": 3658, "activity": 3659, "film": 3660, "uno": 3661, "bass": 3662, "wetsuits": 3663, "oppose": 3664, "clause": 3665, "odd": 3666, "limo": 3667, "limousine": 3668, "figures": 3669, "punk": 3670, "potato": 3671, "chip": 3672, "unknown": 3673, "overcoat": 3674, "campfire": 3675, "sequined": 3676, "serving": 3677, "hummer": 3678, "towed": 3679, "beat": 3680, "broom": 3681, "conversations": 3682, "yelling": 3683, "ornament": 3684, "us": 3685, "vegetation": 3686, "shots": 3687, "crank": 3688, "target": 3689, "protection": 3690, "fires": 3691, "marches": 3692, "hovers": 3693, "hovering": 3694, "siting": 3695, "nursing": 3696, "milk": 3697, "political": 3698, "plains": 3699, "sunrise": 3700, "fries": 3701, "hearts": 3702, "tutus": 3703, "ballerinas": 3704, "wicker": 3705, "robot": 3706, "emitting": 3707, "conversing": 3708, "guiding": 3709, "astride": 3710, "guided": 3711, "wrists": 3712, "smartly": 3713, "statues": 3714, "smelling": 3715, "examine": 3716, "littered": 3717, "sponge": 3718, "bats": 3719, "ages": 3720, "ponchos": 3721, "sparkling": 3722, "lighthouse": 3723, "tussling": 3724, "missed": 3725, "wheelers": 3726, "ok": 3727, "scrubby": 3728, "rappelling": 3729, "participates": 3730, "sunshine": 3731, "sheltie": 3732, "sippy": 3733, "marina": 3734, "mountaineer": 3735, "gazing": 3736, "scuffle": 3737, "swung": 3738, "pride": 3739, "ipod": 3740, "speech": 3741, "armenian": 3742, "genocide": 3743, "priests": 3744, "mixed": 3745, "submerged": 3746, "unison": 3747, "united": 3748, "states": 3749, "spin": 3750, "seesaw": 3751, "inspecting": 3752, "faded": 3753, "legos": 3754, "memorial": 3755, "flinging": 3756, "notes": 3757, "copper": 3758, "leaped": 3759, "effort": 3760, "leaned": 3761, "exhaust": 3762, "merchandise": 3763, "claps": 3764, "blankets": 3765, "mardi": 3766, "gras": 3767, "waking": 3768, "lassie": 3769, "squares": 3770, "leafs": 3771, "shell": 3772, "cooling": 3773, "pretend": 3774, "bale": 3775, "pumpkins": 3776, "stack": 3777, "magazine": 3778, "kangaroo": 3779, "mingling": 3780, "stained": 3781, "tongues": 3782, "rooftop": 3783, "paintball": 3784, "shack": 3785, "whit": 3786, "bookstore": 3787, "investigate": 3788, "streaks": 3789, "company": 3790, "contraption": 3791, "herd": 3792, "chained": 3793, "map": 3794, "burn": 3795, "dragsters": 3796, "lion": 3797, "scrambling": 3798, "chess": 3799, "hawk": 3800, "lip": 3801, "gaze": 3802, "flung": 3803, "streamers": 3804, "sucks": 3805, "dew": 3806, "meter": 3807, "locking": 3808, "cutouts": 3809, "region": 3810, "heron": 3811, "countertop": 3812, "snap": 3813, "laden": 3814, "bodyboard": 3815, "evil": 3816, "punching": 3817, "interviews": 3818, "tide": 3819, "syrup": 3820, "interviewed": 3821, "mr": 3822, "letters": 3823, "p": 3824, "fully": 3825, "crate": 3826, "picket": 3827, "rifle": 3828, "library": 3829, "segway": 3830, "substance": 3831, "backgroud": 3832, "arrow": 3833, "patiently": 3834, "extremely": 3835, "snarling": 3836, "drawn": 3837, "cricketer": 3838, "search": 3839, "condoms": 3840, "twirls": 3841, "extravagant": 3842, "bmw": 3843, "eagle": 3844, "join": 3845, "frolic": 3846, "25": 3847, "rafters": 3848, "bee": 3849, "mascot": 3850, "build": 3851, "bathe": 3852, "wigs": 3853, "roses": 3854, "feed": 3855, "tilted": 3856, "sidecar": 3857, "corners": 3858, "renaissance": 3859, "directly": 3860, "washington": 3861, "melted": 3862, "persons": 3863, "tackler": 3864, "radio": 3865, "flyer": 3866, "favorite": 3867, "hamburgers": 3868, "19": 3869, "swoops": 3870, "hydrant": 3871, "umpire": 3872, "bowling": 3873, "barely": 3874, "observed": 3875, "real": 3876, "shoppers": 3877, "products": 3878, "guide": 3879, "frying": 3880, "lobster": 3881, "present": 3882, "seagulls": 3883, "cannon": 3884, "centipede": 3885, "rugged": 3886, "bitten": 3887, "redbull": 3888, "shake": 3889, "spotlight": 3890, "binoculars": 3891, "hospital": 3892, "perspective": 3893, "horn": 3894, "nt": 3895, "judge": 3896, "rover": 3897, "earpiece": 3898, "photographing": 3899, "serves": 3900, "opened": 3901, "ruins": 3902, "goth": 3903, "buggy": 3904, "microphones": 3905, "rolled": 3906, "llama": 3907, "beachgoers": 3908, "flop": 3909, "completing": 3910, "derby": 3911, "advertisements": 3912, "medals": 3913, "classroom": 3914, "mounds": 3915, "dandelions": 3916, "steer": 3917, "care": 3918, "amish": 3919, "mattress": 3920, "sleeve": 3921, "veil": 3922, "rocker": 3923, "sash": 3924, "lanterns": 3925, "peanut": 3926, "butter": 3927, "slipper": 3928, "acoustic": 3929, "gigolo": 3930, "parrot": 3931, "bounced": 3932, "fives": 3933, "tour": 3934, "angels": 3935, "lodge": 3936, "barber": 3937, "straining": 3938, "lavender": 3939, "nurses": 3940, "loading": 3941, "lemonade": 3942, "brides": 3943, "sundown": 3944, "ringing": 3945, "hopper": 3946, "nails": 3947, "trade": 3948, "poem": 3949, "offering": 3950, "toothbrush": 3951, "jeeps": 3952, "groucho": 3953, "bout": 3954, "served": 3955, "cigarettes": 3956, "carton": 3957, "dumbbell": 3958, "wharf": 3959, "recorder": 3960, "breeze": 3961, "tagged": 3962, "desks": 3963, "hopscotch": 3964, "cry": 3965, "dc": 3966, "bathrobe": 3967, "vw": 3968, "fighter": 3969, "parasailer": 3970, "controller": 3971, "hell": 3972, "spell": 3973, "pajama": 3974, "those": 3975, "windsurfs": 3976, "escalator": 3977, "officials": 3978, "comic": 3979, "henna": 3980, "tubing": 3981, "bernard": 3982, "muslim": 3983, "jukebox": 3984, "cracker": 3985, "potty": 3986, "sewing": 3987, "impeach": 3988, "flings": 3989, "bunk": 3990, "chalkboard": 3991, "tunic": 3992, "demonstration": 3993, "highland": 3994, "africans": 3995, "graduation": 3996, "leapfrog": 3997, "looked": 3998, "dolphins": 3999, "shipping": 4000, "cigar": 4001, "chairlift": 4002, "sink": 4003, "barrels": 4004, "lizards": 4005, "donkeys": 4006, "beating": 4007, "dunk": 4008, "kennel": 4009, "donkey": 4010, "whales": 4011, "einstein": 4012, "rollerbladers": 4013, "bases": 4014, "noodle": 4015, "pharmacy": 4016, "sunbathe": 4017, "milkshake": 4018, "turkey": 4019, "dumpster": 4020, "stools": 4021, "": 4022, "": 4023, "": 4024, "": 0}
--------------------------------------------------------------------------------