13 |
14 |
15 | {% block header_content%} {%include "header_content.html"%} {%endblock%}
16 |
17 |
18 |
19 | {%block demo_images %} {%include "demo_images.html"%} {%endblock%}
20 |
21 | {%block terminal %}
22 | {% include "terminal.html" %}{% endblock %} {% block result%}{%include "result.html"%}
23 | {%endblock%} {% block credits %} {%include "credits.html"%} {%endblock%}
24 |
25 |
--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "vilbert_multitask.settings")
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError:
10 | # The above import may fail for some other reason. Ensure that the
11 | # issue is really that Django is missing to avoid masking other
12 | # exceptions on Python 2.
13 | try:
14 | import django
15 | except ImportError:
16 | raise ImportError(
17 | "Couldn't import Django. Are you sure it's installed and "
18 | "available on your PYTHONPATH environment variable? Did you "
19 | "forget to activate a virtual environment?"
20 | )
21 | raise
22 | execute_from_command_line(sys.argv)
23 |
--------------------------------------------------------------------------------
/demo/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | from .models import Tasks, QuestionAnswer
4 | # from import_export.admin import ImportExportMixin
5 |
6 |
7 | class ImportExportTimeStampedAdmin(admin.ModelAdmin):
8 | exclude = ("created_at", "modified_at")
9 |
10 |
11 | @admin.register(Tasks)
12 | class TaskAdmin(ImportExportTimeStampedAdmin):
13 | readonly_fields = ("created_at",)
14 | list_display = (
15 | "unique_id",
16 | "name",
17 | "placeholder",
18 | "example",
19 | "num_of_images",
20 | "description",
21 | )
22 |
23 |
24 | @admin.register(QuestionAnswer)
25 | class QuestionAnswerAdmin(ImportExportTimeStampedAdmin):
26 | readonly_fields = ("created_at",)
27 | list_display = (
28 | "task",
29 | "input_text",
30 | "input_images",
31 | "answer_text",
32 | "answer_images",
33 | "socket_id",
34 | )
35 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/.tagconfig:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | {
4 | "name": "src/dropzone.coffee",
5 | "regexs": [
6 | "Dropzone.version = \"###\""
7 | ]
8 | },
9 | {
10 | "name": "dist/dropzone.js",
11 | "regexs": [
12 | "version = \"###\""
13 | ]
14 | },
15 | {
16 | "name": "dist/min/dropzone.min.js",
17 | "regexs": [
18 | "version=\"###\""
19 | ]
20 | },
21 | {
22 | "name": "dist/dropzone-amd-module.js",
23 | "regexs": [
24 | "version = \"###\""
25 | ]
26 | },
27 | {
28 | "name": "dist/min/dropzone-amd-module.min.js",
29 | "regexs": [
30 | "version=\"###\""
31 | ]
32 | },
33 | {
34 | "name": "package.json",
35 | "regexs": [
36 | "\"version\": \"###\""
37 | ]
38 | },
39 | {
40 | "name": "component.json",
41 | "regexs": [
42 | "\"version\": \"###\""
43 | ]
44 | },
45 | {
46 | "name": "bower.json",
47 | "regexs": [
48 | "\"version\": \"###\""
49 | ]
50 | }
51 | ]
52 | }
53 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "dropzone",
3 | "version": "5.7.0",
4 | "description": "Handles drag and drop of files for you.",
5 | "keywords": [
6 | "dragndrop",
7 | "drag and drop",
8 | "file upload",
9 | "upload"
10 | ],
11 | "homepage": "http://www.dropzonejs.com",
12 | "main": "./dist/dropzone.js",
13 | "maintainers": [
14 | {
15 | "name": "Matias Meno",
16 | "email": "m@tias.me",
17 | "web": "http://www.colorglare.com"
18 | }
19 | ],
20 | "contributors": [
21 | {
22 | "name": "Matias Meno",
23 | "email": "m@tias.me",
24 | "web": "http://www.colorglare.com"
25 | }
26 | ],
27 | "scripts": {
28 | "test": "grunt && npm run test-prebuilt",
29 | "test-prebuilt": "mocha-headless-chrome -f test/test-prebuilt.html -a no-sandbox -a disable-setuid-sandbox"
30 | },
31 | "bugs": {
32 | "email": "m@tias.me",
33 | "url": "https://gitlab.com/meno/dropzone/issues"
34 | },
35 | "license": "MIT",
36 | "repository": {
37 | "type": "git",
38 | "url": "https://gitlab.com/meno/dropzone.git"
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/LICENSE:
--------------------------------------------------------------------------------
1 | License
2 |
3 | (The MIT License)
4 |
5 | Copyright (c) 2012 Matias Meno
6 | Logo & Website Design (c) 2015 "1910" www.weare1910.com
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 |
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 |
--------------------------------------------------------------------------------
/vilbert_multitask/urls.py:
--------------------------------------------------------------------------------
1 | """vilbert_multitask URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.conf.urls import url, include
14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url, include
17 | from django.contrib import admin
18 | from django.conf import settings
19 | import django
20 |
21 | urlpatterns = [
22 | url(r"^admin/", admin.site.urls),
23 | url(r"^", include("demo.urls"), name="demo"),
24 | ]
25 |
26 |
27 | if settings.DEBUG:
28 | # # static files (images, css, javascript, etc.)
29 | urlpatterns += [
30 | url(r'^media/(?P.*)$', django.views.static.serve, {'document_root': settings.MEDIA_ROOT}),
31 | ]
--------------------------------------------------------------------------------
/demo/sender.py:
--------------------------------------------------------------------------------
1 | from django.conf import settings
2 | from .utils import log_to_terminal
3 |
4 | import os
5 | import pika
6 | import sys
7 | import json
8 |
9 |
10 | def vilbert_task(image_path, question, task_id, socket_id):
11 |
12 | connection = pika.BlockingConnection(pika.ConnectionParameters(
13 | host='localhost',
14 | port=5672,
15 | socket_timeout=10000))
16 | channel = connection.channel()
17 | queue = "vilbert_multitask_queue"
18 | channel.queue_declare(queue=queue, durable=True)
19 | message = {
20 | 'image_path': image_path,
21 | 'question': question,
22 | 'socket_id': socket_id,
23 | "task_id": task_id
24 | }
25 | log_to_terminal(socket_id, {"terminal": "Publishing job to ViLBERT Queue"})
26 | channel.basic_publish(exchange='',
27 | routing_key=queue,
28 | body=json.dumps(message),
29 | properties=pika.BasicProperties(
30 | delivery_mode = 2, # make message persistent
31 | ))
32 |
33 | print(" [x] Sent %r" % message)
34 | log_to_terminal(socket_id, {"terminal": "Job published successfully"})
35 | connection.close()
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/dist/basic.css:
--------------------------------------------------------------------------------
1 | /*
2 | * The MIT License
3 | * Copyright (c) 2012 Matias Meno
4 | */
5 | .dropzone, .dropzone * {
6 | box-sizing: border-box; }
7 |
8 | .dropzone {
9 | position: relative; }
10 | .dropzone .dz-preview {
11 | position: relative;
12 | display: inline-block;
13 | width: 120px;
14 | margin: 0.5em; }
15 | .dropzone .dz-preview .dz-progress {
16 | display: block;
17 | height: 15px;
18 | border: 1px solid #aaa; }
19 | .dropzone .dz-preview .dz-progress .dz-upload {
20 | display: block;
21 | height: 100%;
22 | width: 0;
23 | background: green; }
24 | .dropzone .dz-preview .dz-error-message {
25 | color: red;
26 | display: none; }
27 | .dropzone .dz-preview.dz-error .dz-error-message, .dropzone .dz-preview.dz-error .dz-error-mark {
28 | display: block; }
29 | .dropzone .dz-preview.dz-success .dz-success-mark {
30 | display: block; }
31 | .dropzone .dz-preview .dz-error-mark, .dropzone .dz-preview .dz-success-mark {
32 | position: absolute;
33 | display: none;
34 | left: 30px;
35 | top: 30px;
36 | width: 54px;
37 | height: 58px;
38 | left: 50%;
39 | margin-left: -27px; }
40 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aioredis==1.3.1
2 | appdirs==1.4.3
3 | asgi-redis==0.14.1
4 | asgiref==1.0.0
5 | async-timeout==3.0.1
6 | attrs==19.3.0
7 | autobahn==0.16.0
8 | Automat==20.2.0
9 | backports.ssl-match-hostname==3.5.0.1
10 | black==19.10b0
11 | certifi==2019.11.28
12 | cffi==1.14.0
13 | channels==0.17.2
14 | Click==7.0
15 | constantly==15.1.0
16 | cryptography==2.8
17 | daphne==0.15.0
18 | dask==0.11.1
19 | decorator==4.0.10
20 | defusedxml==0.6.0
21 | diff-match-patch==20181111
22 | Django==1.10.1
23 | django-filter==2.2.0
24 | django-import-export==2.0.2
25 | django-multiupload==0.5.2
26 | djangorestframework==3.9.3
27 | et-xmlfile==1.0.1
28 | hiredis==1.0.1
29 | hyperlink==19.0.0
30 | idna==2.9
31 | incremental==17.5.0
32 | jdcal==1.4.1
33 | Markdown==3.2.1
34 | MarkupPy==1.14
35 | msgpack==0.6.2
36 | msgpack-python==0.4.8
37 | nltk==3.4.1
38 | numpy==1.18.2
39 | odfpy==1.4.1
40 | openpyxl==3.0.3
41 | pathspec==0.7.0
42 | pika==1.1.0
43 | psycopg2==2.8.4
44 | pyasn1==0.4.8
45 | pyasn1-modules==0.2.8
46 | pycparser==2.20
47 | PyHamcrest==2.0.2
48 | pyOpenSSL==19.1.0
49 | pyparsing==2.1.10
50 | python-dateutil==2.5.3
51 | pytz==2016.7
52 | PyYAML==3.12
53 | redis==2.10.5
54 | regex==2020.2.20
55 | requests==2.14.2
56 | scipy==1.1.0
57 | service-identity==18.1.0
58 | six==1.10.0
59 | sqlparse==0.3.0
60 | tablib==1.1.0
61 | toml==0.10.0
62 | Twisted==16.4.1
63 | txaio==2.5.1
64 | typed-ast==1.4.1
65 | ujson==1.35
66 | websocket-client==0.37.0
67 | xlrd==1.2.0
68 | xlwt==1.3.0
69 | zope.interface==5.0.1
70 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # 12-in-1: Multi-Task Vision and Language Representation Learning Web Demo
3 |
4 | Much of vision-and-language research focuses on a small but diverse set of independent tasks and supporting datasets often studied in isolation; however, the visually-grounded language understanding skills required for success at these tasks overlap significantly. In this work, we investigate these relationships between vision-and-language tasks by developing a large-scale, multi-task training regime. Our approach culminates in a single model on 12 datasets from four broad categories of task including visual question answering, caption-based image retrieval, grounding referring expressions, and multi-modal verification. Compared to independently trained single-task models, this represents a reduction from approximately 3 billion parameters to 270 million while simultaneously improving performance by 2.05 points on average across tasks. We use our multi-task framework to perform in-depth analysis of the effect of joint training diverse tasks. Further, we show that finetuning task-specific models from our single multi-task model can lead to further improvements, achieving performance at or above the state-of-the-art.
5 |
6 | **Arxiv Paper Link**: https://arxiv.org/abs/1912.02315
7 |
8 | **Demo Link**: https://vilbert.cloudcv.org/
9 |
10 | If you have more questions about the project, then you can email us on team@cloudcv.org
11 |
12 | ### Bulit & Maintained by -
13 |
14 | [Rishabh Jain](https://rishabhjain.xyz)
15 |
16 | ### Acknowledgements
17 |
18 | We thank Jiasen Lu for his help.
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/header_content.html:
--------------------------------------------------------------------------------
1 |
4 |
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD License
2 |
3 | For vilbert-multitask software
4 |
5 | Copyright (c) 2020-present, CloudCV.
6 |
7 | Redistribution and use in source and binary forms, with or without modification,
8 | are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name vilbert-multitask nor the names of its contributors may be used to
18 | endorse or promote products derived from this software without specific
19 | prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/demo/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | from django.utils.html import format_html
3 |
4 | class TimeStampedModel(models.Model):
5 | """
6 | An abstract base class model that provides self-managed `created_at` and
7 | `modified_at` fields.
8 | """
9 |
10 | created_at = models.DateTimeField(auto_now_add=True)
11 | modified_at = models.DateTimeField(auto_now=True)
12 |
13 | class Meta:
14 | abstract = True
15 | app_label = "demo"
16 |
17 |
18 | class Tasks(TimeStampedModel):
19 | unique_id = models.PositiveIntegerField(unique=True)
20 | name = models.CharField(max_length=1000, blank=True, null=True)
21 | placeholder = models.TextField(null=True, blank=True)
22 | description = models.TextField(null=True, blank=True)
23 | num_of_images = models.PositiveIntegerField()
24 | example = models.CharField(max_length=1000, null=True, blank=True)
25 |
26 | class Meta:
27 | app_label = "demo"
28 | db_table = "tasks"
29 |
30 |
31 | class QuestionAnswer(TimeStampedModel):
32 | task = models.ForeignKey(Tasks)
33 | input_text = models.TextField(null=True, blank=True)
34 | input_images = models.CharField(max_length=10000, null=True, blank=True)
35 | answer_text = models.TextField(null=True, blank=True)
36 | answer_images = models.CharField(max_length=10000, null=True, blank=True)
37 | socket_id = models.CharField(max_length=1000, null=True, blank=True)
38 | class Meta:
39 | app_label = "demo"
40 | db_table = "questionanswer"
41 |
42 | def img_url(self):
43 | return format_html(" ", self.image)
44 |
45 | class Attachment(models.Model):
46 | file = models.FileField(upload_to='attachments')
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dropzone.js is a light weight JavaScript library that turns an HTML element into a dropzone.
4 | This means that a user can drag and drop a file onto it, and the file gets uploaded to the server via AJAX.
5 |
6 | * * *
7 |
8 | _If you want support, please use [stackoverflow](http://stackoverflow.com/) with the `dropzone.js` tag and not the
9 | GitHub issues tracker. Only post an issue here if you think you discovered a bug or have a feature request._
10 |
11 | * * *
12 |
13 | **Please read the [contributing guidelines](CONTRIBUTING.md) before you start working on Dropzone!**
14 |
15 |
16 |
19 |
20 |
21 |
22 | This is no longer the official repository for Dropzone. I have switched to [gitlab.com](https://gitlab.com/meno/dropzone)
23 | as the primary location to continue development.
24 |
25 | There are multiple reasons why I am switching from GitHub to GitLab, but a few of the reasons are the
26 | issue tracker that GitHub is providing, *drowning* me in issues that I am unable to categorise or prioritize properly,
27 | the lack of proper continuous integration, and build files. I don't want the compiled `.js` files in my repository, and
28 | people regularly commit changes to the compiled files and create pull requests with them.
29 |
30 | I will write a blog post soon, that goes into detail about why I am doing the switch.
31 |
32 | This repository will still remain, and always host the most up to date versions of dropzone, but only the distribution
33 | files!
34 |
35 | MIT License
36 | -----------
37 |
--------------------------------------------------------------------------------
/demo/static/js/script.js:
--------------------------------------------------------------------------------
1 | window.onload = function() {
2 | var allLinks = document.querySelectorAll("div.new-tab a");
3 | for (var i = 0; i < allLinks.length; i++) {
4 | var currentLink = allLinks[i];
5 | currentLink.setAttribute("target", "_blank");
6 | }
7 | }
8 |
9 | function show_task_details(task_name, task_description) {
10 | $("#task-name").empty();
11 | var taskColDiv = $('
');
12 | var taskDescriptionRowDiv = $('');
13 |
14 | $(taskColDiv).append(task_name);
15 | $(taskColDiv).append(taskDescriptionRowDiv);
16 | $("#task-name").append(taskColDiv);
17 | }
18 |
19 |
20 | // function get_task_data(task_id) {
21 | // var url = "https://vilbert.cloudcv.org/get_task_details/";
22 | // var url = url.concat(task_id)
23 | // var url = url.concat("/")
24 | // console.log(url);
25 | // $.ajax({
26 | // type: 'GET', // define the type of HTTP verb we want to use (GET)
27 | // url: url // the url where we want to GET
28 | // }).done(function(task_data) {
29 | // show_task_details(task_data.name, task_data.description)
30 | // // $(task_data.example).appendTo("#task-example");
31 | // var question =$("#question").val();
32 | // window.task_data = task_data;
33 | // $("task-example").text(task_data.example);
34 | // console.log(task_data);
35 | // if (question!="") {
36 | // $("#question").val(question);
37 | // } else {
38 | // $("#question").attr("placeholder", task_data.placeholder).val("").focus().blur();
39 | // }
40 |
41 | // });
42 | // }
--------------------------------------------------------------------------------
/demo/migrations/0001_add_models_for_demo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Generated by Django 1.11.23 on 2020-03-28 01:37
3 | from __future__ import unicode_literals
4 |
5 | from django.db import migrations, models
6 | import django.db.models.deletion
7 |
8 |
9 | class Migration(migrations.Migration):
10 |
11 | initial = True
12 |
13 | dependencies = [
14 | ]
15 |
16 | operations = [
17 | migrations.CreateModel(
18 | name='QuestionAnswer',
19 | fields=[
20 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
21 | ('created_at', models.DateTimeField(auto_now_add=True)),
22 | ('modified_at', models.DateTimeField(auto_now=True)),
23 | ('input_text', models.TextField(blank=True, null=True)),
24 | ('input_images', models.CharField(blank=True, max_length=10000, null=True)),
25 | ('answer_text', models.TextField(blank=True, null=True)),
26 | ('answer_images', models.CharField(blank=True, max_length=10000, null=True)),
27 | ('socket_id', models.CharField(blank=True, max_length=1000, null=True)),
28 | ],
29 | options={
30 | 'db_table': 'questionanswer',
31 | },
32 | ),
33 | migrations.CreateModel(
34 | name='Tasks',
35 | fields=[
36 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
37 | ('created_at', models.DateTimeField(auto_now_add=True)),
38 | ('modified_at', models.DateTimeField(auto_now=True)),
39 | ('unique_id', models.PositiveIntegerField(unique=True)),
40 | ('name', models.CharField(blank=True, max_length=1000, null=True)),
41 | ('placeholder', models.TextField(blank=True, null=True)),
42 | ('description', models.TextField(blank=True, null=True)),
43 | ('num_of_images', models.PositiveIntegerField()),
44 | ],
45 | options={
46 | 'db_table': 'tasks',
47 | },
48 | ),
49 | migrations.AddField(
50 | model_name='questionanswer',
51 | name='task',
52 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='demo.Tasks'),
53 | ),
54 | ]
55 |
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/credits.html:
--------------------------------------------------------------------------------
1 |
34 |
35 |
59 |
60 |
70 |
71 |
77 |
85 |
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/head.html:
--------------------------------------------------------------------------------
1 | {% load static %}
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | CloudCV: ViLBERT Multi-Task Demo
24 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
50 |
--------------------------------------------------------------------------------
/demo/static/css/style.css:
--------------------------------------------------------------------------------
1 | p,
2 | h1,
3 | h2,
4 | h3,
5 | h4,
6 | font {
7 | font-weight: 300 !important;
8 | font-family: 'Roboto', sans-serif !important;
9 | }
10 |
11 | body {
12 | font-weight: 300 !important;
13 | font-family: 'Roboto', sans-serif !important;
14 | padding-top: 30px;
15 | }
16 |
17 | .fs-16 {
18 | font-size: 16px;
19 | }
20 | .fs-14 {
21 | font-size: 14px !important;
22 | }
23 |
24 | .align-center {
25 | text-align: center;
26 | }
27 |
28 | .caption {
29 | font-size: 18px !important;
30 | font-weight: bold;
31 | }
32 |
33 | #inputAnswer,
34 | #predictedAnswer {
35 | font-weight: bold;
36 | }
37 |
38 | .under {
39 | position: absolute;
40 | z-index: -1;
41 | }
42 |
43 | .over {
44 | position: absolute;
45 | z-index: 0;
46 | mix-blend-mode: multiply;
47 | }
48 |
49 | #gradCamCaption {
50 | position: relative;
51 | padding-top: 350px;
52 | }
53 |
54 | .finalImages {
55 | width: 350px !important;
56 | height: 350px !important;
57 | padding: 6px;
58 | }
59 |
60 | .resultText {
61 | margin-bottom: 15px !important;
62 | }
63 |
64 | .vqa_model {
65 | padding-bottom: 9px;
66 | margin: 40px 0 20px;
67 | }
68 |
69 | .hiddenDiv {
70 | display: none;
71 | }
72 |
73 | .button_link {
74 | cursor: pointer;
75 | }
76 |
77 | .demo_img {
78 | height: 300px !important;
79 | padding: 6px;
80 | }
81 |
82 | .demo_img:hover {
83 | border: 6px solid black;
84 | padding: 0px;
85 | cursor: pointer;
86 | }
87 |
88 | .image-selected {
89 | border: 3px solid red !important;
90 | padding: 0px;
91 | cursor: pointer;
92 | }
93 |
94 |
95 | #show-demo-images-btn {
96 | text-align: right;
97 | }
98 |
99 | @media(max-width: 540px) {
100 | #show-demo-images-btn {
101 | text-align: center;
102 | }
103 | }
104 |
105 | .dz-upload {
106 | display: block;
107 | background-color: red;
108 | height: 10px;
109 | width: 0%;
110 | }
111 |
112 | input[type="radio"] {
113 | vertical-align: super;
114 | width: 5%;
115 | height: 2%;
116 | cursor: pointer;
117 | }
118 |
119 | .border-bottom {
120 | border-bottom: 1px solid #eee;
121 | }
122 |
123 | .choose-model {
124 | padding-bottom: 9px;
125 | border-bottom: 1px solid #eee;
126 | }
127 |
128 | .demo-images {
129 | background-color: #428bca !important;
130 | border-color: #357ebd !important;
131 | }
132 |
133 | .long-placeholder::-webkit-input-placeholder { /* Chrome/Opera/Safari */
134 | white-space:pre-line;
135 | position:relative;
136 | top:-7px;
137 |
138 | }
139 | .long-placeholder::-moz-placeholder { /* Firefox 19+ */
140 | white-space:pre-line;
141 | position:relative;
142 | top:-7px;
143 | }
144 | .fs-20 {
145 | font-size: 20px !important;
146 | }
147 |
148 | .fs-22 {
149 | font-size: 22px !important;
150 | }
151 |
152 | .fs-24 {
153 | font-size: 24px !important;
154 | }
155 |
156 | .fs-26 {
157 | font-size: 26px !important;
158 | }
--------------------------------------------------------------------------------
/demo/views.py:
--------------------------------------------------------------------------------
1 | from django.http import JsonResponse
2 | from channels import Group
3 | from django.shortcuts import render
4 | from django.views.decorators.csrf import csrf_exempt
5 | from django.conf import settings
6 |
7 | from .sender import vilbert_task
8 | from .utils import log_to_terminal
9 | from .models import Tasks, QuestionAnswer
10 |
11 | import uuid
12 | import os
13 | import random
14 | import traceback
15 | import demo.constants as constants
16 |
17 | COCO_PARTIAL_IMAGE_NAME = constants.COCO_PARTIAL_IMAGE_NAME
18 |
19 | @csrf_exempt
20 | def vilbert_multitask(request, template_name="index.html"):
21 | socketid = uuid.uuid4()
22 | if request.method == "POST":
23 | try:
24 | # Fetch the parameters from client side
25 | socketid = request.POST.get("socket_id")
26 | task_id = request.POST.get("task_id")
27 | input_question = request.POST.get("question").lower()
28 | input_images_list = request.POST.getlist("image_list[]")
29 | print(input_images_list, input_question, task_id)
30 | abs_image_path = []
31 | for i in range(len(input_images_list)):
32 | abs_image_path.append(str(os.path.join(settings.BASE_DIR, str(input_images_list[i][1:]))))
33 | print(socketid, task_id, input_question, abs_image_path)
34 | # Run the Model wrapper
35 | log_to_terminal(socketid, {"terminal": "Starting Vilbert Multitask Job..."})
36 | vilbert_task(abs_image_path, str(input_question), task_id, socketid)
37 | except Exception as e:
38 | log_to_terminal(socketid, {"terminal": traceback.print_exc()})
39 | demo_images, images_name = get_demo_images(constants.COCO_IMAGES_PATH)
40 | return render(request, template_name, {"demo_images": demo_images,
41 | "socketid": socketid,
42 | "images_name": images_name})
43 |
44 |
45 | def get_task_details(request, task_id):
46 | try:
47 | task = Tasks.objects.get(unique_id=task_id)
48 | except Tasks.DoesNotExist:
49 | response_data = {
50 | "error": "Tasks with id {} doesn't exist".format(task_id)
51 | }
52 | return JsonResponse(response_data)
53 | response_data = {
54 | "unique_id": task.unique_id,
55 | "name": task.name,
56 | "placeholder": task.placeholder,
57 | "description": task.description,
58 | "num_of_images": task.num_of_images,
59 | "example": task.example
60 | }
61 | return JsonResponse(response_data)
62 |
63 |
64 | def get_demo_images(demo_images_path):
65 | try:
66 | image_count = 0
67 | demo_images = []
68 | while(image_count<6):
69 | random_image = random.choice(os.listdir(demo_images_path))
70 | if COCO_PARTIAL_IMAGE_NAME in random_image:
71 | demo_images.append(random_image)
72 | image_count += 1
73 |
74 | demo_images_path = [os.path.join(constants.COCO_IMAGES_URL, x) for x in demo_images]
75 | images_name = [x for x in demo_images]
76 | except Exception as e:
77 | print(traceback.print_exc())
78 | images = ['img1.jpg', 'img2.jpg', 'img3.jpg', 'img4.jpg', 'img5.jpg', 'img6.jpg',]
79 | demo_images_path = [os.path.join(settings.STATIC_URL, 'images', x) for x in images]
80 | images_name = [x for x in images]
81 | return demo_images_path, images_name
82 |
83 |
84 | def handle_uploaded_file(f, path):
85 | with open(path, 'wb+') as destination:
86 | for chunk in f.chunks():
87 | destination.write(chunk)
88 |
89 | @csrf_exempt
90 | def file_upload(request):
91 | if request.method == "POST":
92 | images = request.FILES.getlist("files[]")
93 | print("Image", images)
94 | socketid = request.POST.get('socketid')
95 | dir_type = constants.VILBERT_MULTITASK_CONFIG['image_dir']
96 | file_paths = []
97 | for i in images:
98 | image_uuid = uuid.uuid4()
99 | image_extension = str(i).split(".")[-1]
100 | img_path = os.path.join(dir_type, str(image_uuid)) + "." + image_extension
101 | # handle image upload
102 | handle_uploaded_file(i, img_path)
103 | file_paths.append(img_path.replace(settings.BASE_DIR, ""))
104 |
105 | img_url = img_path.replace(settings.BASE_DIR, "")
106 | return JsonResponse({"file_paths": file_paths})
107 |
--------------------------------------------------------------------------------
/vilbert_multitask/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for vilbert_multitask project.
3 |
4 | Generated by 'django-admin startproject' using Django 1.11.23.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/1.11/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/1.11/ref/settings/
11 | """
12 |
13 | import os
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = 'v0)e5((^-3_jpp1ghg-tq@!hr_quadcpojvzdvd2yworqajb)z'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = []
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | "channels",
41 | "demo",
42 | ]
43 |
44 | MIDDLEWARE = [
45 | 'django.middleware.security.SecurityMiddleware',
46 | 'django.contrib.sessions.middleware.SessionMiddleware',
47 | 'django.middleware.common.CommonMiddleware',
48 | 'django.middleware.csrf.CsrfViewMiddleware',
49 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
50 | 'django.contrib.messages.middleware.MessageMiddleware',
51 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
52 | ]
53 |
54 | ROOT_URLCONF = 'vilbert_multitask.urls'
55 |
56 | TEMPLATES = [
57 | {
58 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
59 | 'DIRS': [os.path.join(BASE_DIR, "demo", "templates", "vilbert_multitask")],
60 | 'APP_DIRS': True,
61 | 'OPTIONS': {
62 | 'context_processors': [
63 | 'django.template.context_processors.debug',
64 | 'django.template.context_processors.request',
65 | 'django.contrib.auth.context_processors.auth',
66 | 'django.contrib.messages.context_processors.messages',
67 | ],
68 | },
69 | },
70 | ]
71 |
72 | WSGI_APPLICATION = 'vilbert_multitask.wsgi.application'
73 |
74 |
75 | # Database
76 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases
77 |
78 | # DATABASES = {
79 | # 'default': {
80 | # 'ENGINE': 'django.db.backends.sqlite3',
81 | # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
82 | # }
83 | # }
84 |
85 | DATABASES = {
86 | 'default': {
87 | 'ENGINE': 'django.db.backends.postgresql',
88 | 'NAME': 'vilbert_multitask',
89 | 'USER': 'vilbert',
90 | 'PASSWORD': 'vilbert@123',
91 | 'HOST': '127.0.0.1',
92 | 'PORT': '5432',
93 | }
94 | }
95 |
96 |
97 | # Password validation
98 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
99 |
100 | AUTH_PASSWORD_VALIDATORS = [
101 | {
102 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
103 | },
104 | {
105 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
106 | },
107 | {
108 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
109 | },
110 | {
111 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
112 | },
113 | ]
114 |
115 |
116 | # Internationalization
117 | # https://docs.djangoproject.com/en/1.11/topics/i18n/
118 |
119 | LANGUAGE_CODE = 'en-us'
120 |
121 | TIME_ZONE = 'UTC'
122 |
123 | USE_I18N = True
124 |
125 | USE_L10N = True
126 |
127 | USE_TZ = True
128 |
129 |
130 | # Static files (CSS, JavaScript, Images)
131 | # https://docs.djangoproject.com/en/1.11/howto/static-files/
132 | STATIC_URL = '/static/'
133 | STATIC_ROOT = os.path.join(BASE_DIR, 'static')
134 |
135 | MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
136 |
137 | MEDIA_URL= "/media/"
138 |
139 | PIKA_HOST = 'localhost'
140 | CHANNEL_LAYERS = {
141 | "default": {
142 | "BACKEND": "asgi_redis.RedisChannelLayer",
143 | "CONFIG": {
144 | "hosts": [("localhost", 6379)],
145 | "prefix": u"vilbert_multitask_demo"
146 | },
147 | "ROUTING": "demo.routers.channel_routing",
148 | },
149 | }
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/demo_images.html:
--------------------------------------------------------------------------------
1 | {%load static%}
2 |
8 |
9 |
50 |
57 |
58 |
59 |
Upload your own images
60 |
61 |
62 |
63 |
64 |
65 |
130 |
131 |
132 |
133 |
138 |
139 |
147 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/dist/min/dropzone.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes passing-through{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%, 70%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}100%{opacity:0;-webkit-transform:translateY(-40px);-moz-transform:translateY(-40px);-ms-transform:translateY(-40px);-o-transform:translateY(-40px);transform:translateY(-40px)}}@-moz-keyframes passing-through{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%, 70%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}100%{opacity:0;-webkit-transform:translateY(-40px);-moz-transform:translateY(-40px);-ms-transform:translateY(-40px);-o-transform:translateY(-40px);transform:translateY(-40px)}}@keyframes passing-through{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%, 70%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}100%{opacity:0;-webkit-transform:translateY(-40px);-moz-transform:translateY(-40px);-ms-transform:translateY(-40px);-o-transform:translateY(-40px);transform:translateY(-40px)}}@-webkit-keyframes slide-in{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}}@-moz-keyframes slide-in{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}}@keyframes slide-in{0%{opacity:0;-webkit-transform:translateY(40px);-moz-transform:translateY(40px);-ms-transform:translateY(40px);-o-transform:translateY(40px);transform:translateY(40px)}30%{opacity:1;-webkit-transform:translateY(0px);-moz-transform:translateY(0px);-ms-transform:translateY(0px);-o-transform:translateY(0px);transform:translateY(0px)}}@-webkit-keyframes pulse{0%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}10%{-webkit-transform:scale(1.1);-moz-transform:scale(1.1);-ms-transform:scale(1.1);-o-transform:scale(1.1);transform:scale(1.1)}20%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}}@-moz-keyframes pulse{0%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}10%{-webkit-transform:scale(1.1);-moz-transform:scale(1.1);-ms-transform:scale(1.1);-o-transform:scale(1.1);transform:scale(1.1)}20%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}}@keyframes pulse{0%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}10%{-webkit-transform:scale(1.1);-moz-transform:scale(1.1);-ms-transform:scale(1.1);-o-transform:scale(1.1);transform:scale(1.1)}20%{-webkit-transform:scale(1);-moz-transform:scale(1);-ms-transform:scale(1);-o-transform:scale(1);transform:scale(1)}}.dropzone,.dropzone *{box-sizing:border-box}.dropzone{min-height:150px;border:2px solid rgba(0,0,0,0.3);background:white;padding:20px 20px}.dropzone.dz-clickable{cursor:pointer}.dropzone.dz-clickable *{cursor:default}.dropzone.dz-clickable .dz-message,.dropzone.dz-clickable .dz-message *{cursor:pointer}.dropzone.dz-started .dz-message{display:none}.dropzone.dz-drag-hover{border-style:solid}.dropzone.dz-drag-hover .dz-message{opacity:0.5}.dropzone .dz-message{text-align:center;margin:2em 0}.dropzone .dz-message .dz-button{background:none;color:inherit;border:none;padding:0;font:inherit;cursor:pointer;outline:inherit}.dropzone .dz-preview{position:relative;display:inline-block;vertical-align:top;margin:16px;min-height:100px}.dropzone .dz-preview:hover{z-index:1000}.dropzone .dz-preview:hover .dz-details{opacity:1}.dropzone .dz-preview.dz-file-preview .dz-image{border-radius:20px;background:#999;background:linear-gradient(to bottom, #eee, #ddd)}.dropzone .dz-preview.dz-file-preview .dz-details{opacity:1}.dropzone .dz-preview.dz-image-preview{background:white}.dropzone .dz-preview.dz-image-preview .dz-details{-webkit-transition:opacity 0.2s linear;-moz-transition:opacity 0.2s linear;-ms-transition:opacity 0.2s linear;-o-transition:opacity 0.2s linear;transition:opacity 0.2s linear}.dropzone .dz-preview .dz-remove{font-size:14px;text-align:center;display:block;cursor:pointer;border:none}.dropzone .dz-preview .dz-remove:hover{text-decoration:underline}.dropzone .dz-preview:hover .dz-details{opacity:1}.dropzone .dz-preview .dz-details{z-index:20;position:absolute;top:0;left:0;opacity:0;font-size:13px;min-width:100%;max-width:100%;padding:2em 1em;text-align:center;color:rgba(0,0,0,0.9);line-height:150%}.dropzone .dz-preview .dz-details .dz-size{margin-bottom:1em;font-size:16px}.dropzone .dz-preview .dz-details .dz-filename{white-space:nowrap}.dropzone .dz-preview .dz-details .dz-filename:hover span{border:1px solid rgba(200,200,200,0.8);background-color:rgba(255,255,255,0.8)}.dropzone .dz-preview .dz-details .dz-filename:not(:hover){overflow:hidden;text-overflow:ellipsis}.dropzone .dz-preview .dz-details .dz-filename:not(:hover) span{border:1px solid transparent}.dropzone .dz-preview .dz-details .dz-filename span,.dropzone .dz-preview .dz-details .dz-size span{background-color:rgba(255,255,255,0.4);padding:0 0.4em;border-radius:3px}.dropzone .dz-preview:hover .dz-image img{-webkit-transform:scale(1.05, 1.05);-moz-transform:scale(1.05, 1.05);-ms-transform:scale(1.05, 1.05);-o-transform:scale(1.05, 1.05);transform:scale(1.05, 1.05);-webkit-filter:blur(8px);filter:blur(8px)}.dropzone .dz-preview .dz-image{border-radius:20px;overflow:hidden;width:120px;height:120px;position:relative;display:block;z-index:10}.dropzone .dz-preview .dz-image img{display:block}.dropzone .dz-preview.dz-success .dz-success-mark{-webkit-animation:passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);-moz-animation:passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);-ms-animation:passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);-o-animation:passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);animation:passing-through 3s cubic-bezier(0.77, 0, 0.175, 1)}.dropzone .dz-preview.dz-error .dz-error-mark{opacity:1;-webkit-animation:slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);-moz-animation:slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);-ms-animation:slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);-o-animation:slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);animation:slide-in 3s cubic-bezier(0.77, 0, 0.175, 1)}.dropzone .dz-preview .dz-success-mark,.dropzone .dz-preview .dz-error-mark{pointer-events:none;opacity:0;z-index:500;position:absolute;display:block;top:50%;left:50%;margin-left:-27px;margin-top:-27px}.dropzone .dz-preview .dz-success-mark svg,.dropzone .dz-preview .dz-error-mark svg{display:block;width:54px;height:54px}.dropzone .dz-preview.dz-processing .dz-progress{opacity:1;-webkit-transition:all 0.2s linear;-moz-transition:all 0.2s linear;-ms-transition:all 0.2s linear;-o-transition:all 0.2s linear;transition:all 0.2s linear}.dropzone .dz-preview.dz-complete .dz-progress{opacity:0;-webkit-transition:opacity 0.4s ease-in;-moz-transition:opacity 0.4s ease-in;-ms-transition:opacity 0.4s ease-in;-o-transition:opacity 0.4s ease-in;transition:opacity 0.4s ease-in}.dropzone .dz-preview:not(.dz-processing) .dz-progress{-webkit-animation:pulse 6s ease infinite;-moz-animation:pulse 6s ease infinite;-ms-animation:pulse 6s ease infinite;-o-animation:pulse 6s ease infinite;animation:pulse 6s ease infinite}.dropzone .dz-preview .dz-progress{opacity:1;z-index:1000;pointer-events:none;position:absolute;height:16px;left:50%;top:50%;margin-top:-8px;width:80px;margin-left:-40px;background:rgba(255,255,255,0.9);-webkit-transform:scale(1);border-radius:8px;overflow:hidden}.dropzone .dz-preview .dz-progress .dz-upload{background:#333;background:linear-gradient(to bottom, #666, #444);position:absolute;top:0;left:0;bottom:0;width:0;-webkit-transition:width 300ms ease-in-out;-moz-transition:width 300ms ease-in-out;-ms-transition:width 300ms ease-in-out;-o-transition:width 300ms ease-in-out;transition:width 300ms ease-in-out}.dropzone .dz-preview.dz-error .dz-error-message{display:block}.dropzone .dz-preview.dz-error:hover .dz-error-message{opacity:1;pointer-events:auto}.dropzone .dz-preview .dz-error-message{pointer-events:none;z-index:1000;position:absolute;display:block;display:none;opacity:0;-webkit-transition:opacity 0.3s ease;-moz-transition:opacity 0.3s ease;-ms-transition:opacity 0.3s ease;-o-transition:opacity 0.3s ease;transition:opacity 0.3s ease;border-radius:8px;font-size:13px;top:130px;left:-10px;width:140px;background:#be2626;background:linear-gradient(to bottom, #be2626, #a92222);padding:0.5em 1.2em;color:white}.dropzone .dz-preview .dz-error-message:after{content:'';position:absolute;top:-6px;left:64px;width:0;height:0;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #be2626}
2 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/dist/dropzone.css:
--------------------------------------------------------------------------------
1 | /*
2 | * The MIT License
3 | * Copyright (c) 2012 Matias Meno
4 | */
5 | @-webkit-keyframes passing-through {
6 | 0% {
7 | opacity: 0;
8 | -webkit-transform: translateY(40px);
9 | -moz-transform: translateY(40px);
10 | -ms-transform: translateY(40px);
11 | -o-transform: translateY(40px);
12 | transform: translateY(40px); }
13 | 30%, 70% {
14 | opacity: 1;
15 | -webkit-transform: translateY(0px);
16 | -moz-transform: translateY(0px);
17 | -ms-transform: translateY(0px);
18 | -o-transform: translateY(0px);
19 | transform: translateY(0px); }
20 | 100% {
21 | opacity: 0;
22 | -webkit-transform: translateY(-40px);
23 | -moz-transform: translateY(-40px);
24 | -ms-transform: translateY(-40px);
25 | -o-transform: translateY(-40px);
26 | transform: translateY(-40px); } }
27 | @-moz-keyframes passing-through {
28 | 0% {
29 | opacity: 0;
30 | -webkit-transform: translateY(40px);
31 | -moz-transform: translateY(40px);
32 | -ms-transform: translateY(40px);
33 | -o-transform: translateY(40px);
34 | transform: translateY(40px); }
35 | 30%, 70% {
36 | opacity: 1;
37 | -webkit-transform: translateY(0px);
38 | -moz-transform: translateY(0px);
39 | -ms-transform: translateY(0px);
40 | -o-transform: translateY(0px);
41 | transform: translateY(0px); }
42 | 100% {
43 | opacity: 0;
44 | -webkit-transform: translateY(-40px);
45 | -moz-transform: translateY(-40px);
46 | -ms-transform: translateY(-40px);
47 | -o-transform: translateY(-40px);
48 | transform: translateY(-40px); } }
49 | @keyframes passing-through {
50 | 0% {
51 | opacity: 0;
52 | -webkit-transform: translateY(40px);
53 | -moz-transform: translateY(40px);
54 | -ms-transform: translateY(40px);
55 | -o-transform: translateY(40px);
56 | transform: translateY(40px); }
57 | 30%, 70% {
58 | opacity: 1;
59 | -webkit-transform: translateY(0px);
60 | -moz-transform: translateY(0px);
61 | -ms-transform: translateY(0px);
62 | -o-transform: translateY(0px);
63 | transform: translateY(0px); }
64 | 100% {
65 | opacity: 0;
66 | -webkit-transform: translateY(-40px);
67 | -moz-transform: translateY(-40px);
68 | -ms-transform: translateY(-40px);
69 | -o-transform: translateY(-40px);
70 | transform: translateY(-40px); } }
71 | @-webkit-keyframes slide-in {
72 | 0% {
73 | opacity: 0;
74 | -webkit-transform: translateY(40px);
75 | -moz-transform: translateY(40px);
76 | -ms-transform: translateY(40px);
77 | -o-transform: translateY(40px);
78 | transform: translateY(40px); }
79 | 30% {
80 | opacity: 1;
81 | -webkit-transform: translateY(0px);
82 | -moz-transform: translateY(0px);
83 | -ms-transform: translateY(0px);
84 | -o-transform: translateY(0px);
85 | transform: translateY(0px); } }
86 | @-moz-keyframes slide-in {
87 | 0% {
88 | opacity: 0;
89 | -webkit-transform: translateY(40px);
90 | -moz-transform: translateY(40px);
91 | -ms-transform: translateY(40px);
92 | -o-transform: translateY(40px);
93 | transform: translateY(40px); }
94 | 30% {
95 | opacity: 1;
96 | -webkit-transform: translateY(0px);
97 | -moz-transform: translateY(0px);
98 | -ms-transform: translateY(0px);
99 | -o-transform: translateY(0px);
100 | transform: translateY(0px); } }
101 | @keyframes slide-in {
102 | 0% {
103 | opacity: 0;
104 | -webkit-transform: translateY(40px);
105 | -moz-transform: translateY(40px);
106 | -ms-transform: translateY(40px);
107 | -o-transform: translateY(40px);
108 | transform: translateY(40px); }
109 | 30% {
110 | opacity: 1;
111 | -webkit-transform: translateY(0px);
112 | -moz-transform: translateY(0px);
113 | -ms-transform: translateY(0px);
114 | -o-transform: translateY(0px);
115 | transform: translateY(0px); } }
116 | @-webkit-keyframes pulse {
117 | 0% {
118 | -webkit-transform: scale(1);
119 | -moz-transform: scale(1);
120 | -ms-transform: scale(1);
121 | -o-transform: scale(1);
122 | transform: scale(1); }
123 | 10% {
124 | -webkit-transform: scale(1.1);
125 | -moz-transform: scale(1.1);
126 | -ms-transform: scale(1.1);
127 | -o-transform: scale(1.1);
128 | transform: scale(1.1); }
129 | 20% {
130 | -webkit-transform: scale(1);
131 | -moz-transform: scale(1);
132 | -ms-transform: scale(1);
133 | -o-transform: scale(1);
134 | transform: scale(1); } }
135 | @-moz-keyframes pulse {
136 | 0% {
137 | -webkit-transform: scale(1);
138 | -moz-transform: scale(1);
139 | -ms-transform: scale(1);
140 | -o-transform: scale(1);
141 | transform: scale(1); }
142 | 10% {
143 | -webkit-transform: scale(1.1);
144 | -moz-transform: scale(1.1);
145 | -ms-transform: scale(1.1);
146 | -o-transform: scale(1.1);
147 | transform: scale(1.1); }
148 | 20% {
149 | -webkit-transform: scale(1);
150 | -moz-transform: scale(1);
151 | -ms-transform: scale(1);
152 | -o-transform: scale(1);
153 | transform: scale(1); } }
154 | @keyframes pulse {
155 | 0% {
156 | -webkit-transform: scale(1);
157 | -moz-transform: scale(1);
158 | -ms-transform: scale(1);
159 | -o-transform: scale(1);
160 | transform: scale(1); }
161 | 10% {
162 | -webkit-transform: scale(1.1);
163 | -moz-transform: scale(1.1);
164 | -ms-transform: scale(1.1);
165 | -o-transform: scale(1.1);
166 | transform: scale(1.1); }
167 | 20% {
168 | -webkit-transform: scale(1);
169 | -moz-transform: scale(1);
170 | -ms-transform: scale(1);
171 | -o-transform: scale(1);
172 | transform: scale(1); } }
173 | .dropzone, .dropzone * {
174 | box-sizing: border-box; }
175 |
176 | .dropzone {
177 | min-height: 150px;
178 | border: 2px solid rgba(0, 0, 0, 0.3);
179 | background: white;
180 | padding: 20px 20px; }
181 | .dropzone.dz-clickable {
182 | cursor: pointer; }
183 | .dropzone.dz-clickable * {
184 | cursor: default; }
185 | .dropzone.dz-clickable .dz-message, .dropzone.dz-clickable .dz-message * {
186 | cursor: pointer; }
187 | .dropzone.dz-started .dz-message {
188 | display: none; }
189 | .dropzone.dz-drag-hover {
190 | border-style: solid; }
191 | .dropzone.dz-drag-hover .dz-message {
192 | opacity: 0.5; }
193 | .dropzone .dz-message {
194 | text-align: center;
195 | margin: 2em 0; }
196 | .dropzone .dz-message .dz-button {
197 | background: none;
198 | color: inherit;
199 | border: none;
200 | padding: 0;
201 | font: inherit;
202 | cursor: pointer;
203 | outline: inherit; }
204 | .dropzone .dz-preview {
205 | position: relative;
206 | display: inline-block;
207 | vertical-align: top;
208 | margin: 16px;
209 | min-height: 100px; }
210 | .dropzone .dz-preview:hover {
211 | z-index: 1000; }
212 | .dropzone .dz-preview:hover .dz-details {
213 | opacity: 1; }
214 | .dropzone .dz-preview.dz-file-preview .dz-image {
215 | border-radius: 20px;
216 | background: #999;
217 | background: linear-gradient(to bottom, #eee, #ddd); }
218 | .dropzone .dz-preview.dz-file-preview .dz-details {
219 | opacity: 1; }
220 | .dropzone .dz-preview.dz-image-preview {
221 | background: white; }
222 | .dropzone .dz-preview.dz-image-preview .dz-details {
223 | -webkit-transition: opacity 0.2s linear;
224 | -moz-transition: opacity 0.2s linear;
225 | -ms-transition: opacity 0.2s linear;
226 | -o-transition: opacity 0.2s linear;
227 | transition: opacity 0.2s linear; }
228 | .dropzone .dz-preview .dz-remove {
229 | font-size: 14px;
230 | text-align: center;
231 | display: block;
232 | cursor: pointer;
233 | border: none; }
234 | .dropzone .dz-preview .dz-remove:hover {
235 | text-decoration: underline; }
236 | .dropzone .dz-preview:hover .dz-details {
237 | opacity: 1; }
238 | .dropzone .dz-preview .dz-details {
239 | z-index: 20;
240 | position: absolute;
241 | top: 0;
242 | left: 0;
243 | opacity: 0;
244 | font-size: 13px;
245 | min-width: 100%;
246 | max-width: 100%;
247 | padding: 2em 1em;
248 | text-align: center;
249 | color: rgba(0, 0, 0, 0.9);
250 | line-height: 150%; }
251 | .dropzone .dz-preview .dz-details .dz-size {
252 | margin-bottom: 1em;
253 | font-size: 16px; }
254 | .dropzone .dz-preview .dz-details .dz-filename {
255 | white-space: nowrap; }
256 | .dropzone .dz-preview .dz-details .dz-filename:hover span {
257 | border: 1px solid rgba(200, 200, 200, 0.8);
258 | background-color: rgba(255, 255, 255, 0.8); }
259 | .dropzone .dz-preview .dz-details .dz-filename:not(:hover) {
260 | overflow: hidden;
261 | text-overflow: ellipsis; }
262 | .dropzone .dz-preview .dz-details .dz-filename:not(:hover) span {
263 | border: 1px solid transparent; }
264 | .dropzone .dz-preview .dz-details .dz-filename span, .dropzone .dz-preview .dz-details .dz-size span {
265 | background-color: rgba(255, 255, 255, 0.4);
266 | padding: 0 0.4em;
267 | border-radius: 3px; }
268 | .dropzone .dz-preview:hover .dz-image img {
269 | -webkit-transform: scale(1.05, 1.05);
270 | -moz-transform: scale(1.05, 1.05);
271 | -ms-transform: scale(1.05, 1.05);
272 | -o-transform: scale(1.05, 1.05);
273 | transform: scale(1.05, 1.05);
274 | -webkit-filter: blur(8px);
275 | filter: blur(8px); }
276 | .dropzone .dz-preview .dz-image {
277 | border-radius: 20px;
278 | overflow: hidden;
279 | width: 120px;
280 | height: 120px;
281 | position: relative;
282 | display: block;
283 | z-index: 10; }
284 | .dropzone .dz-preview .dz-image img {
285 | display: block; }
286 | .dropzone .dz-preview.dz-success .dz-success-mark {
287 | -webkit-animation: passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);
288 | -moz-animation: passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);
289 | -ms-animation: passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);
290 | -o-animation: passing-through 3s cubic-bezier(0.77, 0, 0.175, 1);
291 | animation: passing-through 3s cubic-bezier(0.77, 0, 0.175, 1); }
292 | .dropzone .dz-preview.dz-error .dz-error-mark {
293 | opacity: 1;
294 | -webkit-animation: slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);
295 | -moz-animation: slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);
296 | -ms-animation: slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);
297 | -o-animation: slide-in 3s cubic-bezier(0.77, 0, 0.175, 1);
298 | animation: slide-in 3s cubic-bezier(0.77, 0, 0.175, 1); }
299 | .dropzone .dz-preview .dz-success-mark, .dropzone .dz-preview .dz-error-mark {
300 | pointer-events: none;
301 | opacity: 0;
302 | z-index: 500;
303 | position: absolute;
304 | display: block;
305 | top: 50%;
306 | left: 50%;
307 | margin-left: -27px;
308 | margin-top: -27px; }
309 | .dropzone .dz-preview .dz-success-mark svg, .dropzone .dz-preview .dz-error-mark svg {
310 | display: block;
311 | width: 54px;
312 | height: 54px; }
313 | .dropzone .dz-preview.dz-processing .dz-progress {
314 | opacity: 1;
315 | -webkit-transition: all 0.2s linear;
316 | -moz-transition: all 0.2s linear;
317 | -ms-transition: all 0.2s linear;
318 | -o-transition: all 0.2s linear;
319 | transition: all 0.2s linear; }
320 | .dropzone .dz-preview.dz-complete .dz-progress {
321 | opacity: 0;
322 | -webkit-transition: opacity 0.4s ease-in;
323 | -moz-transition: opacity 0.4s ease-in;
324 | -ms-transition: opacity 0.4s ease-in;
325 | -o-transition: opacity 0.4s ease-in;
326 | transition: opacity 0.4s ease-in; }
327 | .dropzone .dz-preview:not(.dz-processing) .dz-progress {
328 | -webkit-animation: pulse 6s ease infinite;
329 | -moz-animation: pulse 6s ease infinite;
330 | -ms-animation: pulse 6s ease infinite;
331 | -o-animation: pulse 6s ease infinite;
332 | animation: pulse 6s ease infinite; }
333 | .dropzone .dz-preview .dz-progress {
334 | opacity: 1;
335 | z-index: 1000;
336 | pointer-events: none;
337 | position: absolute;
338 | height: 16px;
339 | left: 50%;
340 | top: 50%;
341 | margin-top: -8px;
342 | width: 80px;
343 | margin-left: -40px;
344 | background: rgba(255, 255, 255, 0.9);
345 | -webkit-transform: scale(1);
346 | border-radius: 8px;
347 | overflow: hidden; }
348 | .dropzone .dz-preview .dz-progress .dz-upload {
349 | background: #333;
350 | background: linear-gradient(to bottom, #666, #444);
351 | position: absolute;
352 | top: 0;
353 | left: 0;
354 | bottom: 0;
355 | width: 0;
356 | -webkit-transition: width 300ms ease-in-out;
357 | -moz-transition: width 300ms ease-in-out;
358 | -ms-transition: width 300ms ease-in-out;
359 | -o-transition: width 300ms ease-in-out;
360 | transition: width 300ms ease-in-out; }
361 | .dropzone .dz-preview.dz-error .dz-error-message {
362 | display: block; }
363 | .dropzone .dz-preview.dz-error:hover .dz-error-message {
364 | opacity: 1;
365 | pointer-events: auto; }
366 | .dropzone .dz-preview .dz-error-message {
367 | pointer-events: none;
368 | z-index: 1000;
369 | position: absolute;
370 | display: block;
371 | display: none;
372 | opacity: 0;
373 | -webkit-transition: opacity 0.3s ease;
374 | -moz-transition: opacity 0.3s ease;
375 | -ms-transition: opacity 0.3s ease;
376 | -o-transition: opacity 0.3s ease;
377 | transition: opacity 0.3s ease;
378 | border-radius: 8px;
379 | font-size: 13px;
380 | top: 130px;
381 | left: -10px;
382 | width: 140px;
383 | background: #be2626;
384 | background: linear-gradient(to bottom, #be2626, #a92222);
385 | padding: 0.5em 1.2em;
386 | color: white; }
387 | .dropzone .dz-preview .dz-error-message:after {
388 | content: '';
389 | position: absolute;
390 | top: -6px;
391 | left: 64px;
392 | width: 0;
393 | height: 0;
394 | border-left: 6px solid transparent;
395 | border-right: 6px solid transparent;
396 | border-bottom: 6px solid #be2626; }
397 |
--------------------------------------------------------------------------------
/worker.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vilbert_multitask.settings')
4 |
5 | import django
6 | django.setup()
7 |
8 | from django.conf import settings
9 | from demo.utils import log_to_terminal
10 | from demo.models import QuestionAnswer, Tasks
11 |
12 | import demo.constants as constants
13 | import pika
14 | import time
15 | import yaml
16 | import json
17 | import traceback
18 | import signal
19 | import requests
20 | import atexit
21 |
22 | django.db.close_old_connections()
23 |
24 |
25 | import sys
26 | import os
27 | import torch
28 | import yaml
29 | import cv2
30 | import argparse
31 | import glob
32 | import pdb
33 | import numpy as np
34 | import PIL
35 | import _pickle as cPickle
36 | import time
37 | import traceback
38 | import uuid
39 |
40 | from PIL import Image
41 | from easydict import EasyDict as edict
42 | from pytorch_transformers.tokenization_bert import BertTokenizer
43 |
44 | from vilbert.datasets import ConceptCapLoaderTrain, ConceptCapLoaderVal
45 | from vilbert.vilbert import VILBertForVLTasks, BertConfig, BertForMultiModalPreTraining
46 | from vilbert.task_utils import LoadDatasetEval
47 |
48 | import matplotlib.pyplot as plt
49 |
50 | from maskrcnn_benchmark.config import cfg
51 | from maskrcnn_benchmark.layers import nms
52 | from maskrcnn_benchmark.modeling.detector import build_detection_model
53 | from maskrcnn_benchmark.structures.image_list import to_image_list
54 | from maskrcnn_benchmark.utils.model_serialization import load_state_dict
55 | from types import SimpleNamespace
56 |
57 |
58 |
59 | class FeatureExtractor:
60 | MAX_SIZE = 1333
61 | MIN_SIZE = 800
62 |
63 | def __init__(self):
64 | self.args = self.get_parser()
65 | self.detection_model = self._build_detection_model()
66 |
67 | def get_parser(self):
68 | parser = SimpleNamespace(model_file= 'save/resnext_models/model_final.pth',
69 | config_file='save/resnext_models/e2e_faster_rcnn_X-152-32x8d-FPN_1x_MLP_2048_FPN_512_train.yaml',
70 | batch_size=1,
71 | num_features=100,
72 | feature_name="fc6",
73 | confidence_threshold=0,
74 | background=False,
75 | partition=0)
76 | return parser
77 |
78 | def _build_detection_model(self):
79 | cfg.merge_from_file(self.args.config_file)
80 | cfg.freeze()
81 |
82 | model = build_detection_model(cfg)
83 | checkpoint = torch.load(self.args.model_file, map_location=torch.device("cpu"))
84 |
85 | load_state_dict(model, checkpoint.pop("model"))
86 |
87 | model.to("cuda")
88 | model.eval()
89 | return model
90 |
91 | def _image_transform(self, path):
92 | img = Image.open(path)
93 | im = np.array(img).astype(np.float32)
94 | # IndexError: too many indices for array, grayscale images
95 | if len(im.shape) < 3:
96 | im = np.repeat(im[:, :, np.newaxis], 3, axis=2)
97 | im = im[:,:,:3]
98 | im = im[:, :, ::-1]
99 | im -= np.array([102.9801, 115.9465, 122.7717])
100 | im_shape = im.shape
101 | im_height = im_shape[0]
102 | im_width = im_shape[1]
103 | im_size_min = np.min(im_shape[0:2])
104 | im_size_max = np.max(im_shape[0:2])
105 |
106 | # Scale based on minimum size
107 | im_scale = self.MIN_SIZE / im_size_min
108 |
109 | # Prevent the biggest axis from being more than max_size
110 | # If bigger, scale it down
111 | if np.round(im_scale * im_size_max) > self.MAX_SIZE:
112 | im_scale = self.MAX_SIZE / im_size_max
113 |
114 | im = cv2.resize(
115 | im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
116 | )
117 | img = torch.from_numpy(im).permute(2, 0, 1)
118 |
119 | im_info = {"width": im_width, "height": im_height}
120 |
121 | return img, im_scale, im_info
122 |
123 | def _process_feature_extraction(
124 | self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0
125 | ):
126 | batch_size = len(output[0]["proposals"])
127 | n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
128 | score_list = output[0]["scores"].split(n_boxes_per_image)
129 | score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
130 | feats = output[0][feature_name].split(n_boxes_per_image)
131 | cur_device = score_list[0].device
132 |
133 | feat_list = []
134 | info_list = []
135 |
136 | for i in range(batch_size):
137 | dets = output[0]["proposals"][i].bbox / im_scales[i]
138 | scores = score_list[i]
139 | max_conf = torch.zeros((scores.shape[0])).to(cur_device)
140 | conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
141 | start_index = 1
142 | # Column 0 of the scores matrix is for the background class
143 | if self.args.background:
144 | start_index = 0
145 | for cls_ind in range(start_index, scores.shape[1]):
146 | cls_scores = scores[:, cls_ind]
147 | keep = nms(dets, cls_scores, 0.5)
148 | max_conf[keep] = torch.where(
149 | # Better than max one till now and minimally greater than conf_thresh
150 | (cls_scores[keep] > max_conf[keep])
151 | & (cls_scores[keep] > conf_thresh_tensor[keep]),
152 | cls_scores[keep],
153 | max_conf[keep],
154 | )
155 |
156 | sorted_scores, sorted_indices = torch.sort(max_conf, descending=True)
157 | num_boxes = (sorted_scores[: self.args.num_features] != 0).sum()
158 | keep_boxes = sorted_indices[: self.args.num_features]
159 | feat_list.append(feats[i][keep_boxes])
160 | bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
161 | # Predict the class label using the scores
162 | objects = torch.argmax(scores[keep_boxes][start_index:], dim=1)
163 | cls_prob = torch.max(scores[keep_boxes][start_index:], dim=1)
164 |
165 | info_list.append(
166 | {
167 | "bbox": bbox.cpu().numpy(),
168 | "num_boxes": num_boxes.item(),
169 | "objects": objects.cpu().numpy(),
170 | "image_width": im_infos[i]["width"],
171 | "image_height": im_infos[i]["height"],
172 | "cls_prob": scores[keep_boxes].cpu().numpy(),
173 | }
174 | )
175 |
176 | return feat_list, info_list
177 |
178 | def get_detectron_features(self, image_paths):
179 | img_tensor, im_scales, im_infos = [], [], []
180 |
181 | for image_path in image_paths:
182 | im, im_scale, im_info = self._image_transform(image_path)
183 | img_tensor.append(im)
184 | im_scales.append(im_scale)
185 | im_infos.append(im_info)
186 |
187 | # Image dimensions should be divisible by 32, to allow convolutions
188 | # in detector to work
189 | current_img_list = to_image_list(img_tensor, size_divisible=32)
190 | current_img_list = current_img_list.to("cuda")
191 |
192 | with torch.no_grad():
193 | output = self.detection_model(current_img_list)
194 |
195 | feat_list = self._process_feature_extraction(
196 | output,
197 | im_scales,
198 | im_infos,
199 | self.args.feature_name,
200 | self.args.confidence_threshold,
201 | )
202 |
203 | return feat_list
204 |
205 | def _chunks(self, array, chunk_size):
206 | for i in range(0, len(array), chunk_size):
207 | yield array[i : i + chunk_size]
208 |
209 | def _save_feature(self, file_name, feature, info):
210 | file_base_name = os.path.basename(file_name)
211 | file_base_name = file_base_name.split(".")[0]
212 | info["image_id"] = file_base_name
213 | info["features"] = feature.cpu().numpy()
214 | file_base_name = file_base_name + ".npy"
215 |
216 | np.save(os.path.join(self.args.output_folder, file_base_name), info)
217 |
218 | def extract_features(self, image_path):
219 |
220 | with torch.no_grad():
221 | features, infos = self.get_detectron_features(image_path)
222 |
223 | return features, infos
224 |
225 |
226 | def tokenize_batch(batch):
227 | return [tokenizer.convert_tokens_to_ids(sent) for sent in batch]
228 |
229 | def untokenize_batch(batch):
230 | return [tokenizer.convert_ids_to_tokens(sent) for sent in batch]
231 |
232 | def detokenize(sent):
233 | """ Roughly detokenizes (mainly undoes wordpiece) """
234 | new_sent = []
235 | for i, tok in enumerate(sent):
236 | if tok.startswith("##"):
237 | new_sent[len(new_sent) - 1] = new_sent[len(new_sent) - 1] + tok[2:]
238 | else:
239 | new_sent.append(tok)
240 | return new_sent
241 |
242 | def printer(sent, should_detokenize=True):
243 | if should_detokenize:
244 | sent = detokenize(sent)[1:-1]
245 | print(" ".join(sent))
246 |
247 |
248 | def prediction(question, features, spatials, segment_ids, input_mask, image_mask, co_attention_mask, task_tokens, task_id, infos):
249 |
250 | if task_id == "7":
251 | N = len(infos) # define top N results need to return.
252 | else:
253 | N = 3
254 |
255 | # check the number of image is correct:
256 | if task_id in ["1", "15", "13", "11", "4", "16"]:
257 | assert len(infos) == 1, "task require 1 image"
258 | elif task_id in ["12"]:
259 | assert len(infos) == 2, "task require 2 images"
260 | elif task_id in ["7"]:
261 | assert len(infos) > 1 and len(infos) <= 10, "task require 2-10 images"
262 | else:
263 | raise ValueError('task not valid.')
264 |
265 |
266 | if task_id == "12":
267 | batch_size = 1
268 | max_num_bbox = features.size(1)
269 | num_options = question.size(1)
270 | question = question.repeat(2, 1)
271 | # question = question.view(batch_size * 2, int(question.size(1) / 2))
272 | input_mask = input_mask.repeat(2, 1)
273 | # input_mask = input_mask.view(batch_size * 2, int(input_mask.size(1) / 2))
274 | segment_ids = segment_ids.repeat(2, 1)
275 | # segment_ids = segment_ids.view(batch_size * 2, int(segment_ids.size(1) / 2))
276 | task_tokens = task_tokens.repeat(2, 1)
277 |
278 | if task_id == "7":
279 | num_image = features.size(0)
280 | max_num_bbox = features.size(1)
281 | question = question.repeat(num_image, 1)
282 | input_mask = input_mask.repeat(num_image, 1)
283 | segment_ids = segment_ids.repeat(num_image, 1)
284 | task_tokens = task_tokens.repeat(num_image, 1)
285 |
286 | with torch.no_grad():
287 | vil_prediction, vil_prediction_gqa, vil_logit, vil_binary_prediction, vil_tri_prediction, vision_prediction, vision_logit, linguisic_prediction, linguisic_logit, attn_data_list = model(
288 | question, features, spatials, segment_ids, input_mask, image_mask, co_attention_mask, task_tokens, output_all_attention_masks=True
289 | )
290 |
291 | # logits = torch.max(vil_prediction, 1)[1].data # argmax
292 | # pdb.set_trace()
293 |
294 | # Load VQA label to answers:
295 | if task_id == "1" or task_id == "2":
296 | prob = torch.softmax(vil_prediction.view(-1), dim=0)
297 | prob_val, prob_idx = torch.sort(prob, 0, True)
298 |
299 | label2ans_path = os.path.join('save', "VQA" ,"cache", "trainval_label2ans.pkl")
300 | vqa_label2ans = cPickle.load(open(label2ans_path, "rb"))
301 | answer = [vqa_label2ans[prob_idx[i].item()] for i in range(N)]
302 | confidence = [prob_val[i].item() for i in range(N)]
303 | output = {
304 | "top3_answer": answer,
305 | "top3_confidence": confidence
306 | }
307 | return output
308 |
309 | # Load GQA label to answers:
310 | if task_id == "15":
311 | label2ans_path = os.path.join('save', "gqa" ,"cache", "trainval_label2ans.pkl")
312 |
313 | prob_gqa = torch.softmax(vil_prediction_gqa.view(-1), dim=0)
314 | prob_val, prob_idx = torch.sort(prob_gqa, 0, True)
315 | gqa_label2ans = cPickle.load(open(label2ans_path, "rb"))
316 |
317 | answer = [gqa_label2ans[prob_idx[i].item()] for i in range(N)]
318 | confidence = [prob_val[i].item() for i in range(N)]
319 | output = {
320 | "top3_answer": answer,
321 | "top3_confidence": confidence
322 | }
323 | return output
324 |
325 | # vil_binary_prediction NLVR2, 0: False 1: True Task 12
326 | if task_id == "12":
327 | label_map = {0:"False", 1:"True"}
328 |
329 | prob_binary = torch.softmax(vil_binary_prediction.view(-1), dim=0)
330 | prob_val, prob_idx = torch.sort(prob_binary, 0, True)
331 |
332 | answer = [label_map[prob_idx[i].item()] for i in range(2)]
333 | confidence = [prob_val[i].item() for i in range(2)]
334 | output = {
335 | "top3_answer": answer,
336 | "top3_confidence": confidence
337 | }
338 | return output
339 |
340 | # vil_entaliment:
341 | if task_id == "13":
342 | label_map = {0:"contradiction (false)", 1:"neutral", 2:"entailment (true)"}
343 |
344 | # logtis_tri = torch.max(vil_tri_prediction, 1)[1].data
345 | prob_tri = torch.softmax(vil_tri_prediction.view(-1), dim=0)
346 | prob_val, prob_idx = torch.sort(prob_tri, 0, True)
347 |
348 | answer = [label_map[prob_idx[i].item()] for i in range(3)]
349 | confidence = [prob_val[i].item() for i in range(3)]
350 | output = {
351 | "top3_answer": answer,
352 | "top3_confidence": confidence
353 | }
354 | return output
355 |
356 | # vil_logit:
357 | # For image retrieval
358 | if task_id == "7":
359 | sort_val, sort_idx = torch.sort(torch.softmax(vil_logit.view(-1), dim=0), 0, True)
360 |
361 | idx = [sort_idx[i].item() for i in range(N)]
362 | confidence = [sort_val[i].item() for i in range(N)]
363 | output = {
364 | "top3_answer": idx,
365 | "top3_confidence": confidence
366 | }
367 | return output
368 |
369 | # grounding:
370 | # For refer expressions -
371 | if task_id == "11" or task_id == "4" or task_id == "16":
372 | image_w = infos[0]['image_width']
373 | image_h = infos[0]['image_height']
374 | prob = torch.softmax(vision_logit.view(-1), dim=0)
375 | grounding_val, grounding_idx = torch.sort(prob, 0, True)
376 | out = []
377 | for i in range(N):
378 | idx = grounding_idx[i]
379 | val = grounding_val[i]
380 | box = spatials[0][idx][:4].tolist()
381 | y1 = int(box[1] * image_h)
382 | y2 = int(box[3] * image_h)
383 | x1 = int(box[0] * image_w)
384 | x2 = int(box[2] * image_w)
385 | out.append({"y1":y1, "y2":y2, "x1":x1, "x2":x2, 'confidence':val.item()*100})
386 | return out
387 |
388 | def custom_prediction(query, task, features, infos, task_id):
389 |
390 | # if task is Guesswhat:
391 | if task_id in ["16"]:
392 | tokens_list = []
393 | dialogs = query.split("q:")[1:]
394 | for dialog in dialogs:
395 | QA_pair = dialog.split("a:")
396 | tokens_list.append("start " + QA_pair[0] + " answer " + QA_pair[1] + " stop ")
397 |
398 | tokens = ''
399 | for token in tokens_list:
400 | tokens = tokens + token
401 |
402 | tokens = tokenizer.encode(query)
403 | tokens = tokenizer.add_special_tokens_single_sentence(tokens)
404 |
405 | segment_ids = [0] * len(tokens)
406 | input_mask = [1] * len(tokens)
407 |
408 | max_length = 37
409 | if len(tokens) < max_length:
410 | # Note here we pad in front of the sentence
411 | padding = [0] * (max_length - len(tokens))
412 | tokens = tokens + padding
413 | input_mask += padding
414 | segment_ids += padding
415 |
416 | text = torch.from_numpy(np.array(tokens)).cuda().unsqueeze(0)
417 | input_mask = torch.from_numpy(np.array(input_mask)).cuda().unsqueeze(0)
418 | segment_ids = torch.from_numpy(np.array(segment_ids)).cuda().unsqueeze(0)
419 | task = torch.from_numpy(np.array(task)).cuda().unsqueeze(0)
420 |
421 | num_image = len(infos)
422 |
423 | feature_list = []
424 | image_location_list = []
425 | image_mask_list = []
426 | for i in range(num_image):
427 | image_w = infos[i]['image_width']
428 | image_h = infos[i]['image_height']
429 | feature = features[i]
430 | num_boxes = feature.shape[0]
431 |
432 | g_feat = torch.sum(feature, dim=0) / num_boxes
433 | num_boxes = num_boxes + 1
434 | feature = torch.cat([g_feat.view(1,-1), feature], dim=0)
435 | boxes = infos[i]['bbox']
436 | image_location = np.zeros((boxes.shape[0], 5), dtype=np.float32)
437 | image_location[:,:4] = boxes
438 | image_location[:,4] = (image_location[:,3] - image_location[:,1]) * (image_location[:,2] - image_location[:,0]) / (float(image_w) * float(image_h))
439 | image_location[:,0] = image_location[:,0] / float(image_w)
440 | image_location[:,1] = image_location[:,1] / float(image_h)
441 | image_location[:,2] = image_location[:,2] / float(image_w)
442 | image_location[:,3] = image_location[:,3] / float(image_h)
443 | g_location = np.array([0,0,1,1,1])
444 | image_location = np.concatenate([np.expand_dims(g_location, axis=0), image_location], axis=0)
445 | image_mask = [1] * (int(num_boxes))
446 |
447 | feature_list.append(feature)
448 | image_location_list.append(torch.tensor(image_location))
449 | image_mask_list.append(torch.tensor(image_mask))
450 |
451 |
452 | features = torch.stack(feature_list, dim=0).float().cuda()
453 | spatials = torch.stack(image_location_list, dim=0).float().cuda()
454 | image_mask = torch.stack(image_mask_list, dim=0).byte().cuda()
455 | co_attention_mask = torch.zeros((num_image, num_boxes, max_length)).cuda()
456 |
457 | answer = prediction(text, features, spatials, segment_ids, input_mask, image_mask, co_attention_mask, task, task_id, infos)
458 | return answer
459 |
460 | # =============================
461 | # ViLBERT Model Loading Part
462 | # =============================
463 | def load_vilbert_model():
464 | global feature_extractor
465 | global tokenizer
466 | global model
467 |
468 | feature_extractor = FeatureExtractor()
469 |
470 | args = SimpleNamespace(from_pretrained= "save/multitask_model/pytorch_model_9.bin",
471 | bert_model="bert-base-uncased",
472 | config_file="config/bert_base_6layer_6conect.json",
473 | max_seq_length=101,
474 | train_batch_size=1,
475 | do_lower_case=True,
476 | predict_feature=False,
477 | seed=42,
478 | num_workers=0,
479 | baseline=False,
480 | img_weight=1,
481 | distributed=False,
482 | objective=1,
483 | visual_target=0,
484 | dynamic_attention=False,
485 | task_specific_tokens=True,
486 | tasks='1',
487 | save_name='',
488 | in_memory=False,
489 | batch_size=1,
490 | local_rank=-1,
491 | split='mteval',
492 | clean_train_sets=True
493 | )
494 |
495 | config = BertConfig.from_json_file(args.config_file)
496 | with open('./vilbert_tasks.yml', 'r') as f:
497 | task_cfg = edict(yaml.safe_load(f))
498 |
499 | task_names = []
500 | for i, task_id in enumerate(args.tasks.split('-')):
501 | task = 'TASK' + task_id
502 | name = task_cfg[task]['name']
503 | task_names.append(name)
504 |
505 | timeStamp = args.from_pretrained.split('/')[-1] + '-' + args.save_name
506 | config = BertConfig.from_json_file(args.config_file)
507 | default_gpu=True
508 |
509 | if args.predict_feature:
510 | config.v_target_size = 2048
511 | config.predict_feature = True
512 | else:
513 | config.v_target_size = 1601
514 | config.predict_feature = False
515 |
516 | if args.task_specific_tokens:
517 | config.task_specific_tokens = True
518 |
519 | if args.dynamic_attention:
520 | config.dynamic_attention = True
521 |
522 | config.visualization = True
523 | num_labels = 3129
524 |
525 | if args.baseline:
526 | model = BaseBertForVLTasks.from_pretrained(
527 | args.from_pretrained, config=config, num_labels=num_labels, default_gpu=default_gpu
528 | )
529 | else:
530 | model = VILBertForVLTasks.from_pretrained(
531 | args.from_pretrained, config=config, num_labels=num_labels, default_gpu=default_gpu
532 | )
533 |
534 | model.eval()
535 | cuda = torch.cuda.is_available()
536 | if cuda: model = model.cuda(0)
537 | tokenizer = BertTokenizer.from_pretrained(
538 | args.bert_model, do_lower_case=args.do_lower_case
539 | )
540 |
541 |
542 | def callback(ch, method, properties, body):
543 | print("I'm callback")
544 | start = time.time()
545 | body = yaml.safe_load(body) # using yaml instead of json.loads since that unicodes the string in value
546 | print(" [x] Received %r" % body)
547 | try:
548 | task = Tasks.objects.get(unique_id=int(body["task_id"]))
549 | question_obj = QuestionAnswer.objects.create(task=task,
550 | input_text=body['question'],
551 | input_images=body['image_path'],
552 | socket_id=body['socket_id'])
553 | print("created question answer object")
554 | except:
555 | print(str(traceback.print_exc()))
556 | try:
557 | image_path = body["image_path"]
558 | features, infos = feature_extractor.extract_features(image_path)
559 | query = body["question"]
560 | socket_id = body["socket_id"]
561 | task_id = body["task_id"]
562 | task = [eval(task_id)]
563 | answer = custom_prediction(query, task, features, infos, task_id)
564 | if (task_id == "1" or task_id == "15" or task_id == "2" or task_id == "13"):
565 | top3_answer = answer["top3_answer"]
566 | top3_confidence = answer["top3_confidence"]
567 | top3_list = []
568 | for i in range(3):
569 | temp = {}
570 | temp["answer"] = top3_answer[i]
571 | temp["confidence"] = round(top3_confidence[i]*100, 2)
572 | top3_list.append(temp)
573 |
574 | result = {
575 | "task_id": task_id,
576 | "result": top3_list
577 | }
578 | print("The task result is", result)
579 | question_obj.answer_text = result
580 | question_obj.save()
581 |
582 | if (task_id == "4" or task_id == "16" or task_id == "11"):
583 | print("The answer is", answer)
584 | image_name_with_bounding_boxes = uuid.uuid4()
585 |
586 | image = image_path[0].split("/")
587 | abs_path = ""
588 | for i in range(len(image)-3):
589 | abs_path += image[i]
590 | abs_path += "/"
591 | color_list = [(0,0,255),(0,255,0),(255,0,0)]
592 | image_name_list = []
593 | confidence_list = []
594 | for i, j in zip(answer, color_list):
595 | image_obj = cv2.imread(image_path[0])
596 | image_name = uuid.uuid4()
597 | image_with_bounding_boxes = cv2.rectangle(image_obj, (i["x1"], i["y1"]), (i["x2"], i["y2"]), j, 4)
598 | image_name_list.append(str(image_name))
599 | confidence_list.append(round(i["confidence"], 2))
600 | cv2.imwrite(os.path.join(abs_path, "media", "refer_expressions_task", str(image_name)+ ".jpg"), image_with_bounding_boxes)
601 | result = {
602 | "task_id": task_id,
603 | "image_name_list": image_name_list,
604 | "confidence_list": confidence_list
605 | }
606 | question_obj.answer_images = result
607 | question_obj.save()
608 |
609 | if (task_id == "12"):
610 | print(answer)
611 | top3_answer = answer["top3_answer"]
612 | top3_confidence = answer["top3_confidence"]
613 | top3_list = []
614 | for i in range(2):
615 | temp = {}
616 | temp["answer"] = top3_answer[i]
617 | temp["confidence"] = round(top3_confidence[i]*100, 2)
618 | top3_list.append(temp)
619 | result = {
620 | "task_id": task_id,
621 | "result": top3_list
622 | }
623 | question_obj.answer_text = result
624 | question_obj.save()
625 |
626 | if (task_id == "7"):
627 | top3_answer = answer["top3_answer"]
628 | top3_confidence = answer["top3_confidence"]
629 | image_name_list = []
630 | confidence_list = []
631 | for i in range(len(top3_answer)):
632 | print(image_path[top3_answer[i]])
633 | if "demo" in image_path[0].split("/"):
634 | image_name_list.append("demo/" + os.path.split(image_path[top3_answer[i]])[1].split(".")[0] + "." + str(image_path[0].split("/")[-1].split(".")[1]))
635 | else:
636 | image_name_list.append("test2014/" + os.path.split(image_path[top3_answer[i]])[1].split(".")[0] + "." + str(image_path[0].split("/")[-1].split(".")[1]))
637 | confidence_list.append(round(top3_confidence[i]*100, 2))
638 | result = {
639 | "task_id": task_id,
640 | "image_name_list": image_name_list,
641 | "confidence_list": confidence_list
642 | }
643 | print("The result is", result)
644 | question_obj.answer_images = result
645 | question_obj.save()
646 |
647 | log_to_terminal(body['socket_id'], {"terminal": json.dumps(result)})
648 | log_to_terminal(body['socket_id'], {"result": json.dumps(result)})
649 | log_to_terminal(body['socket_id'], {"terminal": "Completed Task"})
650 | ch.basic_ack(delivery_tag=method.delivery_tag)
651 | print("Message Deleted")
652 | django.db.close_old_connections()
653 | except Exception as e:
654 | print(traceback.print_exc())
655 | print(str(e))
656 |
657 | end = time.time()
658 | print("Time taken is", end - start)
659 |
660 |
661 | def main():
662 | # Load correponding Vilbert model into global instance
663 | load_vilbert_model()
664 | connection = pika.BlockingConnection(pika.ConnectionParameters(
665 | host='localhost',
666 | port=5672,
667 | socket_timeout=10000))
668 | channel = connection.channel()
669 | channel.queue_declare(queue='vilbert_multitask_queue', durable=True)
670 | print('[*] Waiting for messages. To exit press CTRL+C')
671 | # Listen to interface
672 | channel.basic_consume('vilbert_multitask_queue', callback)
673 | channel.start_consuming()
674 |
675 | if __name__ == "__main__":
676 | main()
677 |
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/result.html:
--------------------------------------------------------------------------------
1 |
3 |
29 |
30 |
266 |
267 |
270 |
271 |
272 |
273 |
274 |
275 |
278 |
279 |
280 |
281 |
282 |
285 |
286 |
287 |
290 |
291 |
292 |
293 |
294 |
297 |
298 |
299 |
302 |
303 |
304 |
307 |
308 |
313 |
314 |
315 |
316 |
317 |
318 |
320 | Select Task
321 | Visual question answering (e.g., VQA)
322 |
323 | Spatial reasoning question answering
324 | (e.g., GQA)
325 | Pointing question answering (e.g.,
326 | Visual7W)
327 | Referring expression (e.g., RefCOCO)
328 |
329 | Referring dialog (e.g., GuessWhat)
330 |
331 | Visual entailment (e.g., SNLI-VE)
332 |
333 | Image-pair caption matching (e.g., NLVR2)
334 |
335 | Image retrieval (e.g., COCO)
336 |
337 |
338 |
344 |
345 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
357 |
358 |
367 |
368 |
369 |
370 |
372 |
373 |
382 |
383 |
384 |
385 |
387 |
388 |
397 |
398 |
410 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
431 |
432 |
433 |
436 |
437 |
438 |
439 |
440 |
443 |
444 |
445 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
459 |
460 |
461 |
464 |
465 |
466 |
467 |
468 |
471 |
472 |
473 |
476 |
477 |
478 |
479 |
480 |
481 |
511 |
512 |
513 |
528 |
--------------------------------------------------------------------------------
/demo/templates/vilbert_multitask/header.html:
--------------------------------------------------------------------------------
1 |
462 |
463 |
516 |
523 |
--------------------------------------------------------------------------------
/demo/static/js/dropzone-5.7.0/dist/min/dropzone-amd-module.min.js:
--------------------------------------------------------------------------------
1 | !function(e){"function"==typeof define&&define.amd?define(["jquery"],e):e(jQuery)}(function(e){var t={exports:{}};function n(e){return(n="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e})(e)}function a(e,t){return!t||"object"!==n(t)&&"function"!=typeof t?s(e):t}function l(e){return(l=Object.setPrototypeOf?Object.getPrototypeOf:function(e){return e.__proto__||Object.getPrototypeOf(e)})(e)}function s(e){if(void 0===e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function i(e,t){return(i=Object.setPrototypeOf||function(e,t){return e.__proto__=t,e})(e,t)}function u(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function r(e,t){for(var n=0;n '),this.element.appendChild(e));var l=e.getElementsByTagName("span")[0];return l&&(null!=l.textContent?l.textContent=this.options.dictFallbackMessage:null!=l.innerText&&(l.innerText=this.options.dictFallbackMessage)),this.element.appendChild(this.getFallbackForm())},resize:function(e,t,n,i){var r={srcX:0,srcY:0,srcWidth:e.width,srcHeight:e.height},o=e.width/e.height;null==t&&null==n?(t=r.srcWidth,n=r.srcHeight):null==t?t=n*o:null==n&&(n=t/o);var a=(t=Math.min(t,r.srcWidth))/(n=Math.min(n,r.srcHeight));if(r.srcWidth>t||r.srcHeight>n)if("crop"===i)a