├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── abs_metric.py
├── abs_render.py
├── abs_util_orig.py
├── attention.py
├── baseline1_eval.py
├── baseline1_models.py
├── baseline1_train.py
├── baseline2_eval.py
├── baseline2_models.py
├── baseline2_train.py
├── baseline3_eval.py
├── baseline3_models.py
├── baseline3_train.py
├── baseline4_eval.py
├── baseline4_models.py
├── baseline4_train.py
├── codraw_data.py
├── datagen.py
├── episode.py
├── eval_automatic.py
├── eval_run_bots.py
├── eval_transcripts.py
├── example.eval_server_common.py
├── exp28_scenenn.py
├── interactivity.py
├── model.py
├── nkfb_util.py
├── packer.py
├── saved_models.py
└── transcripts-eval-v1.json


/.gitignore:
--------------------------------------------------------------------------------
  1 | # This file contains redis configuration/passwords
  2 | eval_server_common.py
  3 | 
  4 | 
  5 | ### https://raw.github.com/github/gitignore/HEAD/Python.gitignore
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to codraw-models
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
 7 | 
 8 | 
 9 | ## Pull Requests
10 | We actively welcome your pull requests.
11 | 
12 | 1. Fork the repo and create your branch from `master`.
13 | 2. If you've added code that should be tested, add tests.
14 | 3. If you've changed APIs, update the documentation.
15 | 4. Ensure that our pre-trained models can still be loaded and that evaluating those models gives the expected results.
16 | 5. Make sure your code lints.
17 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
18 | 
19 | ## Contributor License Agreement ("CLA")
20 | In order to accept your pull request, we need you to submit a CLA. You only need
21 | to do this once to work on any of Facebook's open source projects.
22 | 
23 | Complete your CLA here: <https://code.facebook.com/cla>
24 | 
25 | ## Issues
26 | We use GitHub issues to track public bugs. Please ensure your description is
27 | clear and has sufficient instructions to be able to reproduce the issue.
28 | 
29 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
30 | disclosure of security bugs. In those cases, please go through the process
31 | outlined on that page and do not file a public issue.
32 | 
33 | ## License
34 | By contributing to codraw-models, you agree that your contributions will be
35 | licensed under the LICENSE file in the root directory of this source tree.
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 | 	wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public: 
 53 | 	wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | Section 1 -- Definitions.
 71 | 
 72 |   a. Adapted Material means material subject to Copyright and Similar
 73 |      Rights that is derived from or based upon the Licensed Material
 74 |      and in which the Licensed Material is translated, altered,
 75 |      arranged, transformed, or otherwise modified in a manner requiring
 76 |      permission under the Copyright and Similar Rights held by the
 77 |      Licensor. For purposes of this Public License, where the Licensed
 78 |      Material is a musical work, performance, or sound recording,
 79 |      Adapted Material is always produced where the Licensed Material is
 80 |      synched in timed relation with a moving image.
 81 | 
 82 |   b. Adapter's License means the license You apply to Your Copyright
 83 |      and Similar Rights in Your contributions to Adapted Material in
 84 |      accordance with the terms and conditions of this Public License.
 85 | 
 86 |   c. Copyright and Similar Rights means copyright and/or similar rights
 87 |      closely related to copyright including, without limitation,
 88 |      performance, broadcast, sound recording, and Sui Generis Database
 89 |      Rights, without regard to how the rights are labeled or
 90 |      categorized. For purposes of this Public License, the rights
 91 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 92 |      Rights.
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. NonCommercial means not primarily intended for or directed towards
116 |      commercial advantage or monetary compensation. For purposes of
117 |      this Public License, the exchange of the Licensed Material for
118 |      other material subject to Copyright and Similar Rights by digital
119 |      file-sharing or similar means is NonCommercial provided there is
120 |      no payment of monetary compensation in connection with the
121 |      exchange.
122 | 
123 |   j. Share means to provide material to the public by any means or
124 |      process that requires permission under the Licensed Rights, such
125 |      as reproduction, public display, public performance, distribution,
126 |      dissemination, communication, or importation, and to make material
127 |      available to the public including in ways that members of the
128 |      public may access the material from a place and at a time
129 |      individually chosen by them.
130 | 
131 |   k. Sui Generis Database Rights means rights other than copyright
132 |      resulting from Directive 96/9/EC of the European Parliament and of
133 |      the Council of 11 March 1996 on the legal protection of databases,
134 |      as amended and/or succeeded, as well as other essentially
135 |      equivalent rights anywhere in the world.
136 | 
137 |   l. You means the individual or entity exercising the Licensed Rights
138 |      under this Public License. Your has a corresponding meaning.
139 | 
140 | Section 2 -- Scope.
141 | 
142 |   a. License grant.
143 | 
144 |        1. Subject to the terms and conditions of this Public License,
145 |           the Licensor hereby grants You a worldwide, royalty-free,
146 |           non-sublicensable, non-exclusive, irrevocable license to
147 |           exercise the Licensed Rights in the Licensed Material to:
148 | 
149 |             a. reproduce and Share the Licensed Material, in whole or
150 |                in part, for NonCommercial purposes only; and
151 | 
152 |             b. produce, reproduce, and Share Adapted Material for
153 |                NonCommercial purposes only.
154 | 
155 |        2. Exceptions and Limitations. For the avoidance of doubt, where
156 |           Exceptions and Limitations apply to Your use, this Public
157 |           License does not apply, and You do not need to comply with
158 |           its terms and conditions.
159 | 
160 |        3. Term. The term of this Public License is specified in Section
161 |           6(a).
162 | 
163 |        4. Media and formats; technical modifications allowed. The
164 |           Licensor authorizes You to exercise the Licensed Rights in
165 |           all media and formats whether now known or hereafter created,
166 |           and to make technical modifications necessary to do so. The
167 |           Licensor waives and/or agrees not to assert any right or
168 |           authority to forbid You from making technical modifications
169 |           necessary to exercise the Licensed Rights, including
170 |           technical modifications necessary to circumvent Effective
171 |           Technological Measures. For purposes of this Public License,
172 |           simply making modifications authorized by this Section 2(a)
173 |           (4) never produces Adapted Material.
174 | 
175 |        5. Downstream recipients.
176 | 
177 |             a. Offer from the Licensor -- Licensed Material. Every
178 |                recipient of the Licensed Material automatically
179 |                receives an offer from the Licensor to exercise the
180 |                Licensed Rights under the terms and conditions of this
181 |                Public License.
182 | 
183 |             b. No downstream restrictions. You may not offer or impose
184 |                any additional or different terms or conditions on, or
185 |                apply any Effective Technological Measures to, the
186 |                Licensed Material if doing so restricts exercise of the
187 |                Licensed Rights by any recipient of the Licensed
188 |                Material.
189 | 
190 |        6. No endorsement. Nothing in this Public License constitutes or
191 |           may be construed as permission to assert or imply that You
192 |           are, or that Your use of the Licensed Material is, connected
193 |           with, or sponsored, endorsed, or granted official status by,
194 |           the Licensor or others designated to receive attribution as
195 |           provided in Section 3(a)(1)(A)(i).
196 | 
197 |   b. Other rights.
198 | 
199 |        1. Moral rights, such as the right of integrity, are not
200 |           licensed under this Public License, nor are publicity,
201 |           privacy, and/or other similar personality rights; however, to
202 |           the extent possible, the Licensor waives and/or agrees not to
203 |           assert any such rights held by the Licensor to the limited
204 |           extent necessary to allow You to exercise the Licensed
205 |           Rights, but not otherwise.
206 | 
207 |        2. Patent and trademark rights are not licensed under this
208 |           Public License.
209 | 
210 |        3. To the extent possible, the Licensor waives any right to
211 |           collect royalties from You for the exercise of the Licensed
212 |           Rights, whether directly or through a collecting society
213 |           under any voluntary or waivable statutory or compulsory
214 |           licensing scheme. In all other cases the Licensor expressly
215 |           reserves any right to collect such royalties, including when
216 |           the Licensed Material is used other than for NonCommercial
217 |           purposes.
218 | 
219 | Section 3 -- License Conditions.
220 | 
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 | 
224 |   a. Attribution.
225 | 
226 |        1. If You Share the Licensed Material (including in modified
227 |           form), You must:
228 | 
229 |             a. retain the following if it is supplied by the Licensor
230 |                with the Licensed Material:
231 | 
232 |                  i. identification of the creator(s) of the Licensed
233 |                     Material and any others designated to receive
234 |                     attribution, in any reasonable manner requested by
235 |                     the Licensor (including by pseudonym if
236 |                     designated);
237 | 
238 |                 ii. a copyright notice;
239 | 
240 |                iii. a notice that refers to this Public License;
241 | 
242 |                 iv. a notice that refers to the disclaimer of
243 |                     warranties;
244 | 
245 |                  v. a URI or hyperlink to the Licensed Material to the
246 |                     extent reasonably practicable;
247 | 
248 |             b. indicate if You modified the Licensed Material and
249 |                retain an indication of any previous modifications; and
250 | 
251 |             c. indicate the Licensed Material is licensed under this
252 |                Public License, and include the text of, or the URI or
253 |                hyperlink to, this Public License.
254 | 
255 |        2. You may satisfy the conditions in Section 3(a)(1) in any
256 |           reasonable manner based on the medium, means, and context in
257 |           which You Share the Licensed Material. For example, it may be
258 |           reasonable to satisfy the conditions by providing a URI or
259 |           hyperlink to a resource that includes the required
260 |           information.
261 | 
262 |        3. If requested by the Licensor, You must remove any of the
263 |           information required by Section 3(a)(1)(A) to the extent
264 |           reasonably practicable.
265 | 
266 |        4. If You Share Adapted Material You produce, the Adapter's
267 |           License You apply must not prevent recipients of the Adapted
268 |           Material from complying with this Public License.
269 | 
270 | Section 4 -- Sui Generis Database Rights.
271 | 
272 | Where the Licensed Rights include Sui Generis Database Rights that
273 | apply to Your use of the Licensed Material:
274 | 
275 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
276 |      to extract, reuse, reproduce, and Share all or a substantial
277 |      portion of the contents of the database for NonCommercial purposes
278 |      only;
279 | 
280 |   b. if You include all or a substantial portion of the database
281 |      contents in a database in which You have Sui Generis Database
282 |      Rights, then the database in which You have Sui Generis Database
283 |      Rights (but not its individual contents) is Adapted Material; and
284 | 
285 |   c. You must comply with the conditions in Section 3(a) if You Share
286 |      all or a substantial portion of the contents of the database.
287 | 
288 | For the avoidance of doubt, this Section 4 supplements and does not
289 | replace Your obligations under this Public License where the Licensed
290 | Rights include other Copyright and Similar Rights.
291 | 
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 | 
294 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 | 
305 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 | 
315 |   c. The disclaimer of warranties and limitation of liability provided
316 |      above shall be interpreted in a manner that, to the extent
317 |      possible, most closely approximates an absolute disclaimer and
318 |      waiver of all liability.
319 | 
320 | Section 6 -- Term and Termination.
321 | 
322 |   a. This Public License applies for the term of the Copyright and
323 |      Similar Rights licensed here. However, if You fail to comply with
324 |      this Public License, then Your rights under this Public License
325 |      terminate automatically.
326 | 
327 |   b. Where Your right to use the Licensed Material has terminated under
328 |      Section 6(a), it reinstates:
329 | 
330 |        1. automatically as of the date the violation is cured, provided
331 |           it is cured within 30 days of Your discovery of the
332 |           violation; or
333 | 
334 |        2. upon express reinstatement by the Licensor.
335 | 
336 |      For the avoidance of doubt, this Section 6(b) does not affect any
337 |      right the Licensor may have to seek remedies for Your violations
338 |      of this Public License.
339 | 
340 |   c. For the avoidance of doubt, the Licensor may also offer the
341 |      Licensed Material under separate terms or conditions or stop
342 |      distributing the Licensed Material at any time; however, doing so
343 |      will not terminate this Public License.
344 | 
345 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
346 |      License.
347 | 
348 | Section 7 -- Other Terms and Conditions.
349 | 
350 |   a. The Licensor shall not be bound by any additional or different
351 |      terms or conditions communicated by You unless expressly agreed.
352 | 
353 |   b. Any arrangements, understandings, or agreements regarding the
354 |      Licensed Material not stated herein are separate from and
355 |      independent of the terms and conditions of this Public License.
356 | 
357 | Section 8 -- Interpretation.
358 | 
359 |   a. For the avoidance of doubt, this Public License does not, and
360 |      shall not be interpreted to, reduce, limit, restrict, or impose
361 |      conditions on any use of the Licensed Material that could lawfully
362 |      be made without permission under this Public License.
363 | 
364 |   b. To the extent possible, if any provision of this Public License is
365 |      deemed unenforceable, it shall be automatically reformed to the
366 |      minimum extent necessary to make it enforceable. If the provision
367 |      cannot be reformed, it shall be severed from this Public License
368 |      without affecting the enforceability of the remaining terms and
369 |      conditions.
370 | 
371 |   c. No term or condition of this Public License will be waived and no
372 |      failure to comply consented to unless expressly agreed to by the
373 |      Licensor.
374 | 
375 |   d. Nothing in this Public License constitutes or may be interpreted
376 |      as a limitation upon, or waiver of, any privileges and immunities
377 |      that apply to the Licensor or You, including from the legal
378 |      processes of any jurisdiction or authority.
379 | 
380 | =======================================================================
381 | 
382 | Creative Commons is not a party to its public
383 | licenses. Notwithstanding, Creative Commons may elect to apply one of
384 | its public licenses to material it publishes and in those instances
385 | will be considered the “Licensor.” The text of the Creative Commons
386 | public licenses is dedicated to the public domain under the CC0 Public
387 | Domain Dedication. Except for the limited purpose of indicating that
388 | material is shared under a Creative Commons public license or as
389 | otherwise permitted by the Creative Commons policies published at
390 | creativecommons.org/policies, Creative Commons does not authorize the
391 | use of the trademark "Creative Commons" or any other trademark or logo
392 | of Creative Commons without its prior written consent including,
393 | without limitation, in connection with any unauthorized modifications
394 | to any of its public licenses or any other arrangements,
395 | understandings, or agreements concerning use of licensed material. For
396 | the avoidance of doubt, this paragraph does not form part of the
397 | public licenses.
398 | 
399 | Creative Commons may be contacted at creativecommons.org.
400 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CoDraw Models
 2 | 
 3 | This repository contains models for the Collaborative Drawing (CoDraw) task.
 4 | 
 5 | ## Installation
 6 | 
 7 | Dependencies:
 8 | * Python 3.6 or later
 9 | * PyTorch 0.4.0
10 |   * **IMPORTANT**: Our pre-trained models are **not** compatible with PyTorch 0.4.1. Please use version 0.4.0 exactly.
11 | * `Pillow` 5.1.0 (or compatible)
12 | * `editdistance` 0.4 (or compatible)
13 | 
14 | You will need to clone both this repository and the [CoDraw dataset repository](https://github.com/facebookresearch/CoDraw) into side-by-side folders. The README for the dataset repository contains additional instructions for downloading required dataset files.
15 | 
16 | The following relative paths should be reachable from within this repository:
17 | ```console
18 | $ ls -d ../CoDraw/Pngs
19 | ../CoDraw/Pngs
20 | $ ls -d ../CoDraw/dataset/CoDraw_1_0.json
21 | ../CoDraw/dataset/CoDraw_1_0.json
22 | ```
23 | 
24 | ### Pre-trained models
25 | 
26 | Pre-trained models can be downloaded from this link: [models.tar.gz](https://github.com/facebookresearch/codraw-models/releases/download/models/models.tar.gz). The archive contains a `models/` folder that should be placed at the root of this repository.
27 | 
28 | ## Usage
29 | 
30 | ### Automated evaluation
31 | 
32 | After downloading our pre-trained models, run `python eval_automatic.py` to calculate the machine-machine and script-based scene similarity numbers that we report in our paper.
33 | 
34 | ### Training new models
35 | 
36 | Before training any models, please make sure that the `models` and `rl_models` folders exist within this repo: `mkdir -p models rl_models`.
37 | 
38 | Each of the following commands trains a subset of the models we report in the paper:
39 | * `python baseline1_train.py`
40 | * `python baseline2_train.py`
41 | * `python baseline3_train.py`
42 | * `python baseline4_train.py`
43 | 
44 | Trained models are loaded by the function `load_baseline1` in `baseline1_models.py` and its counterparts in `baseline2_models.py`, `baseline3_models.py`, `baseline4_models.py`. Note that all of these functions use hard-coded paths that match our pre-trained model release; you will probably need to change these paths if you train your own models. Also note that the training process of some of the later models relies on the existence of earlier ones.
45 | 
46 | ### Evaluating playing with humans
47 | 
48 | The transcripts for the human-machine evaluation reported in our paper are in the `transcripts-eval-v1.json` file in this repository. To compute the scene similarity scores we report in our paper, update the `TRANSCRIPTS_PATH` variable in `eval_transcripts.py` and then run `python eval_transcripts.py`.
49 | 
50 | ## Reference
51 | 
52 | If you find this code useful in your research, we'd really appreciate it if you could cite the following paper:
53 | 
54 | ```
55 | @article{CoDraw,
56 | author = {Kim, Jin-Hwa and Kitaev, Nikita and Chen, Xinlei and Rohrbach, Marcus and Tian, Yuandong and Batra, Dhruv and Parikh, Devi},
57 | journal = {arXiv preprint arXiv:1712.05558},
58 | title = {{CoDraw: Collaborative Drawing as a Testbed for Grounded Goal-driven Communication}},
59 | url = {http://arxiv.org/abs/1712.05558},
60 | year = {2019}
61 | }
62 | ```
63 | 
64 | ## License
65 | 
66 | This repository is licensed under Creative Commons Attribution-NonCommercial 4.0 International Public License, as found in the LICENSE file.
67 | 


--------------------------------------------------------------------------------
/abs_metric.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | 
  9 | def scene_similarity_orig(pred, target):
 10 |     """
 11 |     DEPRECATED: use scene_similarity instead!
 12 | 
 13 |     This is a re-implementation of the original CoDraw similarity metric, as per
 14 |     https://arxiv.org/abs/1712.05558v1
 15 |     """
 16 |     idx1 = set(x.idx for x in target)
 17 |     idx2 = set(x.idx for x in pred)
 18 |     iou = len(idx1 & idx2) / len(idx1 | idx2)
 19 | 
 20 |     common_idxs = list(idx1 & idx2)
 21 |     match1 = [[x for x in target if x.idx == idx][0] for idx in common_idxs]
 22 |     match2 = [[x for x in pred if x.idx == idx][0] for idx in common_idxs]
 23 | 
 24 |     num = np.zeros(7)
 25 |     denom = np.zeros(7)
 26 | 
 27 |     num[0] = 1
 28 | 
 29 |     for c1, c2 in zip(match1, match2):
 30 |         if c1.idx not in c1.HUMAN_IDXS:
 31 |             num[1] += int(c1.flip != c2.flip)
 32 |             denom[1] += 1
 33 |         else:
 34 |             num[2] += int(c1.subtype != c2.subtype or c1.flip != c2.flip)
 35 |             denom[2] += 1
 36 | 
 37 |         num[3] += int(c1.depth != c2.depth)
 38 |         num[4] += np.sqrt((c1.normed_x - c2.normed_x) ** 2 + (c1.normed_y - c2.normed_y) ** 2)
 39 |         denom[3] += 1
 40 |         denom[4] += 1
 41 | 
 42 |     for idx_i in range(len(match1)):
 43 |         for idx_j in range(len(match1)):
 44 |             c1i, c1j = match1[idx_i], match1[idx_j]
 45 |             c2i, c2j = match2[idx_i], match2[idx_j]
 46 | 
 47 |             # NOTE(nikita): the metric, as originally defined, pairs up objects
 48 |             # with themselves, and also yields misleadingly high results for
 49 |             # models that place multiple clipart at the exact same location
 50 |             # (e.g. a model that places all clipart in the center of the canvas
 51 |             # will receive zero relative-position penalty)
 52 |             num[5] += int((c1i.x - c1j.x) * (c2i.x - c2j.x) < 0)
 53 |             num[6] += int((c1i.y - c1j.y) * (c2i.y - c2j.y) < 0)
 54 |             denom[5] += 1
 55 |             denom[6] += 1
 56 | 
 57 |     denom = np.maximum(denom, 1)
 58 | 
 59 |     score_components = iou * (num / denom)
 60 |     score_weights = np.array([5,-1,-1,-1,-1,-0.5,-0.5])
 61 | 
 62 |     return score_components @ score_weights
 63 | 
 64 | def scene_similarity_v1(pred, target):
 65 |     """
 66 |     DEPRECATED: use scene_similarity instead!
 67 | 
 68 |     The similarity metric used for initial experiments prior to June 8, 2018.
 69 |     Both this metric and scene_similarity_orig have corner cases where adding a
 70 |     new, correct clipart to the scene can actually cause the similarity score
 71 |     to decrease.
 72 |     """
 73 |     idx1 = set(x.idx for x in target)
 74 |     idx2 = set(x.idx for x in pred)
 75 |     iou = len(idx1 & idx2) / len(idx1 | idx2)
 76 | 
 77 |     common_idxs = list(idx1 & idx2)
 78 |     match1 = [[x for x in target if x.idx == idx][0] for idx in common_idxs]
 79 |     match2 = [[x for x in pred if x.idx == idx][0] for idx in common_idxs]
 80 | 
 81 |     num = np.zeros(7)
 82 |     denom = np.zeros(7)
 83 | 
 84 |     num[0] = 1
 85 | 
 86 |     for c1, c2 in zip(match1, match2):
 87 |         if c1.idx not in c1.HUMAN_IDXS:
 88 |             num[1] += int(c1.flip != c2.flip)
 89 |             denom[1] += 1
 90 |         else:
 91 |             num[2] += int(c1.subtype != c2.subtype or c1.flip != c2.flip)
 92 |             denom[2] += 1
 93 | 
 94 |         num[3] += int(c1.depth != c2.depth)
 95 |         num[4] += np.sqrt((c1.normed_x - c2.normed_x) ** 2 + (c1.normed_y - c2.normed_y) ** 2)
 96 |         denom[3] += 1
 97 |         denom[4] += 1
 98 | 
 99 |     for idx_i in range(len(match1)):
100 |         for idx_j in range(idx_i, len(match1)):
101 |             if idx_i == idx_j:
102 |                 continue
103 |             c1i, c1j = match1[idx_i], match1[idx_j]
104 |             c2i, c2j = match2[idx_i], match2[idx_j]
105 | 
106 |             # TODO(nikita): this doesn't correctly handle the case if two
107 |             # cliparts have *exactly* the same x/y coordinates in the target
108 |             num[5] += int((c1i.x - c1j.x) * (c2i.x - c2j.x) <= 0)
109 |             num[6] += int((c1i.y - c1j.y) * (c2i.y - c2j.y) <= 0)
110 |             denom[5] += 1
111 |             denom[6] += 1
112 | 
113 |     denom = np.maximum(denom, 1)
114 | 
115 |     score_components = iou * (num / denom)
116 |     score_weights = np.array([5,-1,-1,-1,-1,-0.5,-0.5])
117 | 
118 |     return score_components @ score_weights
119 | 
120 | 
121 | def scene_similarity_v2(pred, target):
122 |     """
123 |     DEPRECATED: use scene_similarity instead!
124 | 
125 |     This version of the scene similarity metric should be monotonic, in the
126 |     sense that adding correct clipart should always increase the score, adding
127 |     incorrect clipart should decrease it, and removing incorrect clipart should
128 |     increase it.
129 | 
130 |     This version jointly scores subtype/flip/depth for humans, which was later
131 |     replaced with a more fine-grained scoring
132 |     """
133 |     idx1 = set(x.idx for x in target)
134 |     idx2 = set(x.idx for x in pred)
135 |     iou = len(idx1 & idx2) / len(idx1 | idx2)
136 | 
137 |     intersection_size = len(idx1 & idx2)
138 |     union_size = len(idx1 | idx2)
139 | 
140 |     common_idxs = list(idx1 & idx2)
141 |     match1 = [[x for x in target if x.idx == idx][0] for idx in common_idxs]
142 |     match2 = [[x for x in pred if x.idx == idx][0] for idx in common_idxs]
143 | 
144 |     num = np.zeros(7)
145 |     denom = np.zeros(7)
146 | 
147 |     num[0] = intersection_size
148 | 
149 |     for c1, c2 in zip(match1, match2):
150 |         if c1.idx not in c1.HUMAN_IDXS:
151 |             num[1] += int(c1.flip != c2.flip)
152 |         else:
153 |             num[2] += int(c1.subtype != c2.subtype or c1.flip != c2.flip)
154 |         num[3] += int(c1.depth != c2.depth)
155 |         num[4] += np.sqrt((c1.normed_x - c2.normed_x) ** 2 + (c1.normed_y - c2.normed_y) ** 2)
156 | 
157 |     denom[:5] = union_size
158 | 
159 |     for idx_i in range(len(match1)):
160 |         for idx_j in range(idx_i, len(match1)):
161 |             if idx_i == idx_j:
162 |                 continue
163 |             c1i, c1j = match1[idx_i], match1[idx_j]
164 |             c2i, c2j = match2[idx_i], match2[idx_j]
165 | 
166 |             # TODO(nikita): this doesn't correctly handle the case if two
167 |             # cliparts have *exactly* the same x/y coordinates in the target
168 |             num[5] += int((c1i.x - c1j.x) * (c2i.x - c2j.x) <= 0)
169 |             num[6] += int((c1i.y - c1j.y) * (c2i.y - c2j.y) <= 0)
170 | 
171 |     denom[5:] = union_size * (intersection_size - 1)
172 | 
173 |     denom = np.maximum(denom, 1)
174 | 
175 |     score_components = num / denom
176 |     score_weights = np.array([5,-1,-1,-1,-1,-1,-1])
177 | 
178 |     return score_components @ score_weights
179 | 
180 | 
181 | def scene_similarity(pred, target):
182 |     """
183 |     This version of the scene similarity metric should be monotonic, in the
184 |     sense that adding correct clipart should always increase the score, adding
185 |     incorrect clipart should decrease it, and removing incorrect clipart should
186 |     increase it. It also breaks out the different components of Mike/Jenny:
187 |     flip, expression, and pose; as well as capping distance error at 1.
188 |     """
189 |     idx1 = set(x.idx for x in target)
190 |     idx2 = set(x.idx for x in pred)
191 |     iou = len(idx1 & idx2) / len(idx1 | idx2)
192 | 
193 |     intersection_size = len(idx1 & idx2)
194 |     union_size = len(idx1 | idx2)
195 | 
196 |     common_idxs = list(idx1 & idx2)
197 |     match1 = [[x for x in target if x.idx == idx][0] for idx in common_idxs]
198 |     match2 = [[x for x in pred if x.idx == idx][0] for idx in common_idxs]
199 | 
200 |     num = np.zeros(8)
201 |     denom = np.zeros(8)
202 | 
203 |     num[0] = intersection_size
204 | 
205 |     for c1, c2 in zip(match1, match2):
206 |         num[1] += int(c1.flip != c2.flip)
207 |         if c1.idx in c1.HUMAN_IDXS:
208 |             num[2] += int(c1.expression != c2.expression)
209 |             num[3] += int(c1.pose != c2.pose)
210 |         num[4] += int(c1.depth != c2.depth)
211 |         num[5] += min(1.0, np.sqrt((c1.normed_x - c2.normed_x) ** 2 + (c1.normed_y - c2.normed_y) ** 2))
212 | 
213 |     denom[:6] = union_size
214 | 
215 |     for idx_i in range(len(match1)):
216 |         for idx_j in range(idx_i, len(match1)):
217 |             if idx_i == idx_j:
218 |                 continue
219 |             c1i, c1j = match1[idx_i], match1[idx_j]
220 |             c2i, c2j = match2[idx_i], match2[idx_j]
221 | 
222 |             # TODO(nikita): this doesn't correctly handle the case if two
223 |             # cliparts have *exactly* the same x/y coordinates in the target
224 |             num[6] += int((c1i.x - c1j.x) * (c2i.x - c2j.x) <= 0)
225 |             num[7] += int((c1i.y - c1j.y) * (c2i.y - c2j.y) <= 0)
226 | 
227 |     denom[6:] = union_size * (intersection_size - 1)
228 | 
229 |     denom = np.maximum(denom, 1)
230 | 
231 |     score_components = num / denom
232 |     score_weights = np.array([5,-1,-0.5,-0.5,-1,-1,-1,-1])
233 | 
234 |     return score_components @ score_weights
235 | 
236 | def clipart_similarity_v1(a, b):
237 |     """
238 |     DEPRECATED: use clipart_similarity instead!
239 | 
240 |     The original clipart similarity metric, before subtype was split into
241 |     pose/expression
242 |     """
243 |     if a.idx != b.idx:
244 |         return 0
245 | 
246 |     score = 5
247 |     score -= int(a.subtype != b.subtype or a.flip != b.flip)
248 |     score -= int(a.depth != b.depth)
249 |     score -= np.sqrt((a.normed_x - b.normed_x) ** 2 + (a.normed_y - b.normed_y) ** 2)
250 |     return score
251 | 
252 | def clipart_similarity(a, b):
253 |     """
254 |     This version of the metric splits out subtype into pose/expression, and caps
255 |     distance error at 1.
256 |     """
257 |     if a.idx != b.idx:
258 |         return 0
259 | 
260 |     score = 5
261 |     score -= int(a.flip != b.flip)
262 |     score -= 0.5 * int(a.expression != b.expression)
263 |     score -= 0.5 * int(a.pose != b.pose)
264 |     score -= int(a.depth != b.depth)
265 |     score -= min(1.0, np.sqrt((a.normed_x - b.normed_x) ** 2 + (a.normed_y - b.normed_y) ** 2))
266 |     return score
267 | 


--------------------------------------------------------------------------------
/abs_render.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from pathlib import Path
  8 | from IPython.display import SVG, display
  9 | from PIL import Image
 10 | from binascii import b2a_base64
 11 | 
 12 | PNGS_PATH = (Path(__file__).parent / '../CoDraw/Pngs').resolve()
 13 | EMBED_PNGS_PATH = '../../CoDraw/Pngs'
 14 | DEPTH_SCALE = [1.0, 0.7, 0.49]
 15 | IMAGE_NAMES = [
 16 |  's_0s.png',
 17 |  's_1s.png',
 18 |  's_2s.png',
 19 |  's_3s.png',
 20 |  's_4s.png',
 21 |  's_5s.png',
 22 |  's_6s.png',
 23 |  's_7s.png',
 24 |  'p_0s.png',
 25 |  'p_1s.png',
 26 |  'p_2s.png',
 27 |  'p_3s.png',
 28 |  'p_4s.png',
 29 |  'p_5s.png',
 30 |  'p_6s.png',
 31 |  'p_7s.png',
 32 |  'p_8s.png',
 33 |  'p_9s.png',
 34 |  'hb0_0s.png',
 35 |  'hb0_1s.png',
 36 |  'hb0_2s.png',
 37 |  'hb0_3s.png',
 38 |  'hb0_4s.png',
 39 |  'hb0_5s.png',
 40 |  'hb0_6s.png',
 41 |  'hb0_7s.png',
 42 |  'hb0_8s.png',
 43 |  'hb0_9s.png',
 44 |  'hb0_10s.png',
 45 |  'hb0_11s.png',
 46 |  'hb0_12s.png',
 47 |  'hb0_13s.png',
 48 |  'hb0_14s.png',
 49 |  'hb0_15s.png',
 50 |  'hb0_16s.png',
 51 |  'hb0_17s.png',
 52 |  'hb0_18s.png',
 53 |  'hb0_19s.png',
 54 |  'hb0_20s.png',
 55 |  'hb0_21s.png',
 56 |  'hb0_22s.png',
 57 |  'hb0_23s.png',
 58 |  'hb0_24s.png',
 59 |  'hb0_25s.png',
 60 |  'hb0_26s.png',
 61 |  'hb0_27s.png',
 62 |  'hb0_28s.png',
 63 |  'hb0_29s.png',
 64 |  'hb0_30s.png',
 65 |  'hb0_31s.png',
 66 |  'hb0_32s.png',
 67 |  'hb0_33s.png',
 68 |  'hb0_34s.png',
 69 |  'hb1_0s.png',
 70 |  'hb1_1s.png',
 71 |  'hb1_2s.png',
 72 |  'hb1_3s.png',
 73 |  'hb1_4s.png',
 74 |  'hb1_5s.png',
 75 |  'hb1_6s.png',
 76 |  'hb1_7s.png',
 77 |  'hb1_8s.png',
 78 |  'hb1_9s.png',
 79 |  'hb1_10s.png',
 80 |  'hb1_11s.png',
 81 |  'hb1_12s.png',
 82 |  'hb1_13s.png',
 83 |  'hb1_14s.png',
 84 |  'hb1_15s.png',
 85 |  'hb1_16s.png',
 86 |  'hb1_17s.png',
 87 |  'hb1_18s.png',
 88 |  'hb1_19s.png',
 89 |  'hb1_20s.png',
 90 |  'hb1_21s.png',
 91 |  'hb1_22s.png',
 92 |  'hb1_23s.png',
 93 |  'hb1_24s.png',
 94 |  'hb1_25s.png',
 95 |  'hb1_26s.png',
 96 |  'hb1_27s.png',
 97 |  'hb1_28s.png',
 98 |  'hb1_29s.png',
 99 |  'hb1_30s.png',
100 |  'hb1_31s.png',
101 |  'hb1_32s.png',
102 |  'hb1_33s.png',
103 |  'hb1_34s.png',
104 |  'a_0s.png',
105 |  'a_1s.png',
106 |  'a_2s.png',
107 |  'a_3s.png',
108 |  'a_4s.png',
109 |  'a_5s.png',
110 |  'c_0s.png',
111 |  'c_1s.png',
112 |  'c_2s.png',
113 |  'c_3s.png',
114 |  'c_4s.png',
115 |  'c_5s.png',
116 |  'c_6s.png',
117 |  'c_7s.png',
118 |  'c_8s.png',
119 |  'c_9s.png',
120 |  'e_0s.png',
121 |  'e_1s.png',
122 |  'e_2s.png',
123 |  'e_3s.png',
124 |  'e_4s.png',
125 |  'e_5s.png',
126 |  'e_6s.png',
127 |  't_0s.png',
128 |  't_1s.png',
129 |  't_2s.png',
130 |  't_3s.png',
131 |  't_4s.png',
132 |  't_5s.png',
133 |  't_6s.png',
134 |  't_7s.png',
135 |  't_8s.png',
136 |  't_9s.png',
137 |  't_10s.png',
138 |  't_11s.png',
139 |  't_12s.png',
140 |  't_13s.png',
141 |  't_14s.png',
142 |  ]
143 | 
144 | def get_image_name(clipart):
145 |     if clipart.idx < 18:
146 |         return IMAGE_NAMES[clipart.idx]
147 |     elif clipart.idx < 18 + 2:
148 |         return IMAGE_NAMES[18 + (clipart.idx - 18) * 35 + clipart.subtype]
149 |     else:
150 |         return IMAGE_NAMES[clipart.idx + 34*2]
151 | 
152 | 
153 | def snippet_from_clipart(clipart, inline_images=True):
154 |     img_name = get_image_name(clipart)
155 |     img_path = PNGS_PATH / img_name
156 |     img_pil = Image.open(img_path)
157 |     width, height = img_pil.width, img_pil.height
158 |     if inline_images:
159 |         data = b2a_base64(img_path.read_bytes()).decode('ascii')
160 | 
161 |     scale = DEPTH_SCALE[clipart.depth]
162 |     width = width * scale
163 |     height = height * scale
164 | 
165 |     flip = -1 if bool(clipart.flip) else 1
166 |     x = clipart.x - width / 2.0
167 |     y = clipart.y - height / 2.0
168 | 
169 |     flipped_sub_x = (-width) if clipart.flip else 0
170 | 
171 |     if inline_images:
172 |         href = f"data:image/png;base64,{data}"
173 |     else:
174 |         href = f"{EMBED_PNGS_PATH}/{img_name}"
175 | 
176 |     return f"""
177 |     <g transform="translate({x}, {y})">
178 |         <image href="{href}" x="{flipped_sub_x}" y="0" width="{width}" height="{height}"
179 |          transform="scale({flip}, 1)"/>
180 |     </g>
181 |     """
182 | 
183 | def svg_from_cliparts(cliparts, color=None, label=None, inline_images=True, scale=1.0):
184 |     img_path = PNGS_PATH / 'background.png'
185 |     if inline_images:
186 |         data = b2a_base64(img_path.read_bytes()).decode('ascii')
187 |         href = f"data:image/png;base64,{data}"
188 |     else:
189 |         href = f"{EMBED_PNGS_PATH}/background.png"
190 |     svg = f"""
191 |     <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="{int(500*scale)}px" height="{int(400*scale)}px" viewBox="0 0 500 400">
192 |         <image href="{href}" x="0" y="0" width="100%" height="100%"/>
193 |     """
194 |     if color:
195 |         svg += f"""
196 |         <rect fill="{color}" opacity="0.2" x="0" y="0" width="100%" height="100%"/>
197 |     """
198 | 
199 |     # Sun (idx=3) is always in the back; this hack is also in Abs.js
200 |     # All sky objects (idx < 8) are behind any non-sky objects
201 |     # Past that, objects are sorted by depth and then by index
202 |     for clipart in sorted(cliparts, key=lambda c: c.render_order_key):
203 |         svg += snippet_from_clipart(clipart, inline_images=inline_images)
204 | 
205 |     if label:
206 |         svg += f"""<text x="95%" y="8%" style="text-anchor: end">{label}</text>"""
207 | 
208 |     svg += "</svg>"
209 |     return svg
210 | 
211 | def display_cliparts(cliparts, color=None, label=None, scale=1.0):
212 |     display(SVG(svg_from_cliparts(cliparts, color, label, scale=scale)))
213 | 


--------------------------------------------------------------------------------
/abs_util_orig.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | """
  8 | Abstract Scene (abs) utilities copied from the original CoDraw codebase
  9 | """
 10 | 
 11 | import math
 12 | import torch
 13 | from torch.autograd import Variable
 14 | import math
 15 | 
 16 | class AbsUtil:
 17 | 	"""AbsUtil ported from AbsUtil.js"""
 18 | 
 19 | 	# Various variables setting up the appearence of the interface
 20 | 	CANVAS_WIDTH = 500
 21 | 	CANVAS_HEIGHT = 400
 22 | 	NOT_USED = -10000
 23 | 
 24 | 	numClipArts = 58
 25 | 	numTypes = 8
 26 | 	numProps = 6
 27 | 	numClasses = [58,35,3,2,1,1]
 28 | 	Null = 0
 29 | 
 30 | 	def __init__(self, str):
 31 | 		# Each object type has its own prefix, the ordering of the object types affects the
 32 | 		# order in which they are rendered. That is the "t" type (toys) will be rendered on top
 33 | 		# of the "hb0" (boy) category assuming they have the same depth.
 34 | 		self.prefix = ['s','p','hb0','hb1','a','c','e','t']
 35 | 
 36 | 		# Total number of clipart for each type
 37 | 		self.typeTotalCt = [8,10,35,35,6,10,7,15]
 38 | 
 39 | 		# Total number of clipart to be randomly selected for each type
 40 | 		# The sum should equal numClipart
 41 | 		self.typeCt = [3,4,5,5,2,3,2,4]
 42 | 
 43 | 		self.str = str
 44 | 		self.obj = self.preprocess(str)
 45 | 
 46 | 	# Preprocess given CSV into 7Val format, which is
 47 | 	# 1. clipartIdx integer [0-57]
 48 | 	# ~~2. clipartType integer [0-7]~~
 49 | 	# 3. clipartSubType integer [0-34]
 50 | 	# 4. depth integer [0-2]
 51 | 	# 5. flip integer [0-1]
 52 | 	# 6. x-position float [1-500]
 53 | 	# 7. y-position float [1-400]
 54 | 	def preprocess(self, str, verbose=False):
 55 | 		idx = 1;
 56 | 		val = [];
 57 | 		if not str or len(str) < 1:
 58 | 			return None
 59 | 		results = str.split(',')
 60 | 		numClipArts = int(results[0])
 61 | 		for i in range(numClipArts):
 62 | 			v = list()
 63 | 			idx = idx + 1  # png filename
 64 | 			idx = idx + 1  # clip art local index
 65 | 			_clipArtObjectIdx = int(results[idx]); idx = idx + 1
 66 | 			_clipArtTypeIdx = int(results[idx]); idx = idx + 1
 67 | 
 68 | 			# This code was originally used to read the dataset from Python
 69 | 			_clipArtX = int(round(float(results[idx]))); idx = idx + 1
 70 | 			_clipArtY = int(round(float(results[idx]))); idx = idx + 1
 71 | 
 72 | 			# The javascript code, however, used parseInt instead. This has
 73 | 			# slightly different rounding behavior, which can be recreated by
 74 | 			# using the following Python code instead:
 75 | 			# _clipArtX = float(results[idx]); idx = idx + 1
 76 | 			# _clipArtY = float(results[idx]); idx = idx + 1
 77 | 			# _clipArtX = int(math.floor(_clipArtX)) if _clipArtX >= 0 else -int(math.floor(-_clipArtX))
 78 | 			# _clipArtY = int(math.floor(_clipArtY)) if _clipArtY >= 0 else -int(math.floor(-_clipArtY))
 79 | 
 80 | 			_clipArtZ = int(results[idx]); idx = idx + 1
 81 | 			_clipArtFlip = int(results[idx]); idx = idx + 1
 82 | 
 83 | 			if not verbose and (_clipArtX==AbsUtil.NOT_USED or _clipArtY==AbsUtil.NOT_USED):
 84 | 				continue
 85 | 
 86 | 			v.append(self.getClipArtIdx(_clipArtObjectIdx, _clipArtTypeIdx))
 87 | 			# v.append(_clipArtTypeIdx);  # remove this redundant feature
 88 | 			v.append(_clipArtObjectIdx if (_clipArtTypeIdx==2 or _clipArtTypeIdx==3) else 0)
 89 | 			v.append(_clipArtZ)
 90 | 			v.append(_clipArtFlip)
 91 | 			v.append(_clipArtX)
 92 | 			v.append(_clipArtY)
 93 | 			val.append(v)
 94 | 		return val
 95 | 
 96 | 	def asTensor(self):
 97 | 		if None==self.obj:
 98 | 			return None
 99 | 		# notice that position (x & y) is rounded as LongTensor
100 | 		t = torch.LongTensor(AbsUtil.numClipArts, 6).fill_(AbsUtil.Null)
101 | 		# clipartIdx & clipartSubType are starting with 1
102 | 		t[:,:2].add_(-1)
103 | 		for v in self.obj:
104 | 			clipartIdx = v[0]
105 | 			t[clipartIdx].copy_(torch.LongTensor(v))
106 | 		t[:,:2].add_(1)
107 | 		return t
108 | 
109 | 	def __repr__(self):
110 | 		return self.obj.__repr__()
111 | 
112 | 	def getClipArtIdx(self, clipArtObjectIdx, clipArtTypeIdx):
113 | 		typeTotalPos = [0,8,18,19,20,26,36,43]
114 | 		offset = 0 if (clipArtTypeIdx==2 or clipArtTypeIdx==3) else clipArtObjectIdx
115 | 		return typeTotalPos[clipArtTypeIdx] + offset
116 | 
117 | 	# Static methods #############################################################
118 | 
119 | 	# Sample clipart from idx(abs_d - abs_b)>0
120 | 	# @param abs_b Tensor(bx58x6)
121 | 	# @param abs_d Tensor(bx58x6)
122 | 	# @output Tensor(bx6)
123 | 	# @output Tensor(bx58)
124 | 	@staticmethod
125 | 	def sample_abs_c(abs_b, abs_d):
126 | 		# using Tensors directly
127 | 		abs_b = abs_b.data
128 | 		abs_d = abs_d.data
129 | 		# bx58
130 | 		abs_c_mask = (abs_d - abs_b).abs().sum(2)!=0  # updated cliparts
131 | 		# bx58x6
132 | 		mask = abs_c_mask.unsqueeze(2).expand_as(abs_d)
133 | 		# collapsed x 6
134 | 		abs_c = abs_d[mask.byte()].view(-1, abs_d.size(-1))
135 | 		return abs_c, abs_c_mask
136 | 
137 | 	# Get abs_c mask, if `r_mask` is given, masked over it.
138 | 	# @param abs_b (long, bx58x6): latest drawn scene before prev teller's message
139 | 	# @param abs_d (long, bx58x6): latest drawn scene before next teller's message
140 | 	# @param r_mask (byte, optional, b)
141 | 	# #output c_mask (byte, b): batch mask whether drawn scene is changed or not
142 | 	@staticmethod
143 | 	def get_c_mask(abs_b, abs_d, r_mask=None):
144 | 		if Variable==type(r_mask):
145 | 			r_mask = r_mask.data
146 | 		_, abs_c_mask = AbsUtil.sample_abs_c(abs_b, abs_d)  # _, bx58
147 | 		c_mask = abs_c_mask.sum(1).byte()>0
148 | 		if r_mask is not None:
149 | 			c_mask = c_mask.mul(r_mask)
150 | 		return c_mask
151 | 


--------------------------------------------------------------------------------
/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | """
 8 | Multi-headed attention implementation
 9 | """
10 | 
11 | #%%
12 | 
13 | import numpy as np
14 | 
15 | import torch
16 | import torch.cuda
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | 
20 | from nkfb_util import logsumexp, cuda_if_available
21 | 
22 | #%%
23 | 
24 | class AttentionSeqToMasked(nn.Module):
25 |     def __init__(self,
26 |             d_pre_q, d_pre_k, d_pre_v,
27 |             d_qk, d_v, num_heads,
28 |             attn_dropout):
29 |         super().__init__()
30 | 
31 |         self.d_qk = d_qk
32 |         self.d_v = d_v
33 |         self.num_heads = num_heads
34 | 
35 |         self.q_proj = nn.Linear(d_pre_q, self.num_heads * self.d_qk)
36 |         self.k_proj = nn.Linear(d_pre_k, self.num_heads * self.d_qk)
37 |         self.v_proj = nn.Linear(d_pre_v, self.num_heads * self.d_v)
38 |         self.attn_dropout = nn.Dropout(attn_dropout)
39 | 
40 |         self.d_out = self.num_heads * self.d_v
41 | 
42 |     def split_heads(self, tensor):
43 |         """
44 |         [...dims, a, num_heads x b] -> [...dims, num_heads, a, b]
45 |         """
46 |         return tensor.view(*tensor.shape[:-1], self.num_heads, -1).transpose(-3, -2)
47 | 
48 |     def join_heads(self, tensor):
49 |         """
50 |         [...dims, num_heads, a, b] -> [...dims, a, num_heads x b]
51 |         """
52 |         res = tensor.transpose(-3, -2).contiguous()
53 |         return res.view(*res.shape[:-2], -1)
54 | 
55 |     def precompute_kv(self, pre_ks, pre_vs):
56 |         assert not self.training
57 |         ks = self.split_heads(self.k_proj(pre_ks))
58 |         vs = self.split_heads(self.v_proj(pre_vs))
59 |         return ks, vs
60 | 
61 |     def forward(self, pre_qs=None, pre_ks=None, pre_vs=None, ks=None, vs=None, k_mask=None):
62 |         if isinstance(pre_qs, nn.utils.rnn.PackedSequence):
63 |             pre_qs, lengths = nn.utils.rnn.pad_packed_sequence(pre_qs, batch_first=True)
64 |         else:
65 |             lengths = None
66 |         qs = self.split_heads(self.q_proj(pre_qs))
67 |         if ks is None:
68 |             ks = self.split_heads(self.k_proj(pre_ks))
69 |         if vs is None:
70 |             vs = self.split_heads(self.v_proj(pre_vs))
71 | 
72 |         attn_logits = torch.matmul(qs, ks.transpose(-2, -1)) / np.sqrt(self.d_qk)
73 | 
74 |         if k_mask is not None:
75 |             # k_mask is [batch, pre_ks.shape[1]] mask signalling which values
76 |             # are valid attention targets
77 |             attn_logits = torch.where(
78 |                 k_mask[:, None, None, :],
79 |                 attn_logits,
80 |                 torch.full_like(attn_logits, float('-inf'))
81 |                 )
82 |         attn_probs = F.softmax(attn_logits, dim=-1)
83 |         attn_probs = self.attn_dropout(attn_probs)
84 | 
85 |         res = self.join_heads(torch.matmul(attn_probs, vs))
86 |         if lengths is not None:
87 |             res = nn.utils.rnn.pack_padded_sequence(res, lengths, batch_first=True)
88 |         return res
89 | 


--------------------------------------------------------------------------------
/baseline1_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from interactivity import INTERACTIVE, try_magic, try_cd
 8 | try_cd('~/dev/drawmodel/nkcodraw')
 9 | 
10 | #%%
11 | 
12 | import numpy as np
13 | from pathlib import Path
14 | import editdistance
15 | 
16 | import torch
17 | import torch.cuda
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | 
21 | from nkfb_util import logsumexp, cuda_if_available
22 | 
23 | import codraw_data
24 | from codraw_data import AbstractScene, Clipart
25 | import abs_render
26 | from abs_metric import scene_similarity, clipart_similarity
27 | from episode import Episode, respond_to, response_partial
28 | 
29 | from datagen import NearestNeighborData, MessageSimilarityData, BOWtoClipartData, ClipartToSeqData, BOWplusCanvasToMultiData
30 | from model import Model, select_clipart_to_tell, drawer_observe_canvas, make_fns, eval_fns
31 | from model import scripted_tell, scripted_tell_before_peek, scripted_tell_after_peek, draw_nothing
32 | from baseline1_models import load_baseline1
33 | 
34 | # %%
35 | 
36 | models = load_baseline1()
37 | 
38 | 
39 | 
40 | # %%
41 | 
42 | tellers = [
43 |     ('teller_nn', (models['teller_nn_a'], models['teller_nn_b'])),
44 |     ('teller_c2seq', (models['teller_c2seq_a'], models['teller_c2seq_b'])),
45 | ]
46 | 
47 | drawers = [
48 |     ('drawer_nn', (models['drawer_nn_a'], models['drawer_nn_b'])),
49 |     ('drawer_sim', (models['drawer_sim_a'], models['drawer_sim_b'])),
50 |     ('drawer_bow2c', (models['drawer_bow2c_a'], models['drawer_bow2c_b'])),
51 |     ('drawer_bow2bce', (models['drawer_bow2bce_a'], models['drawer_bow2bce_b'])),
52 |     ('drawer_bowcanvas2bce', (models['drawer_bowcanvas2bce_a'], models['drawer_bowcanvas2bce_b'])),
53 | ]
54 | 
55 | # %%
56 | 
57 | limit = None
58 | print("Drawer evaluations against script")
59 | for drawer_name, drawer_pair in drawers:
60 |     for split in ('a', 'b'):
61 |         sims = eval_fns(make_fns(split, scripted_tell, drawer_pair), limit=limit)
62 |         print(f"{drawer_name}_{split}", sims.mean())
63 | 
64 | # %%
65 | 
66 | limit = None
67 | print("Drawer evaluations against script before peek")
68 | for drawer_name, drawer_pair in drawers:
69 |     for split in ('a', 'b'):
70 |         sims = eval_fns(make_fns(split, scripted_tell_before_peek, drawer_pair), limit=limit)
71 |         print(f"{drawer_name}_{split}", sims.mean())
72 | 
73 | # %%
74 | 
75 | limit = None
76 | print("Drawer evaluations against script after peek")
77 | 
78 | sims = eval_fns(make_fns('', scripted_tell_after_peek, draw_nothing), limit=limit)
79 | print("draw_nothing", sims.mean())
80 | 
81 | for drawer_name, drawer_pair in drawers:
82 |     for split in ('a', 'b'):
83 |         sims = eval_fns(make_fns(split, scripted_tell_after_peek, drawer_pair), limit=limit)
84 |         print(f"{drawer_name}_{split}", sims.mean())
85 | 
86 | # %%
87 | 
88 | limit = None
89 | print("Teller/Drawer pair evaluations")
90 | for teller_name, teller_pair in tellers:
91 |     for drawer_name, drawer_pair in drawers:
92 |         for splits in ('aa', 'ab', 'ba', 'bb'):
93 |             sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
94 |             print(f"{teller_name}_{splits[0]} {drawer_name}_{splits[1]}", sims.mean())
95 | 


--------------------------------------------------------------------------------
/baseline1_train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | assert __name__ == "__main__", "Training script should not be imported!"
 13 | 
 14 | #%%
 15 | 
 16 | import numpy as np
 17 | from pathlib import Path
 18 | import editdistance
 19 | 
 20 | import torch
 21 | import torch.cuda
 22 | import torch.nn as nn
 23 | import torch.nn.functional as F
 24 | 
 25 | from nkfb_util import logsumexp, cuda_if_available
 26 | 
 27 | import codraw_data
 28 | from codraw_data import AbstractScene, Clipart
 29 | import abs_render
 30 | from abs_metric import scene_similarity, clipart_similarity
 31 | from episode import Episode, respond_to, response_partial
 32 | 
 33 | from datagen import NearestNeighborData, MessageSimilarityData, BOWtoClipartData, ClipartToSeqData, BOWplusCanvasToMultiData
 34 | from model import Model, select_clipart_to_tell, drawer_observe_canvas, make_fns, eval_fns, scripted_tell
 35 | from baseline1_models import NearestNeighborTeller, CharNeighborDrawer
 36 | from baseline1_models import BOWNeighborDrawer, BOWtoClipartDrawer, ClipartToSeqTeller
 37 | from baseline1_models import BOWtoMultiBCEDrawer, BOWplusCanvasDrawer
 38 | 
 39 | #%%
 40 | 
 41 | data_nn_a = NearestNeighborData('a')
 42 | data_nn_b = NearestNeighborData('b')
 43 | 
 44 | teller_nn_a = NearestNeighborTeller(data_nn_a)
 45 | teller_nn_b = NearestNeighborTeller(data_nn_b)
 46 | drawer_nn_a = CharNeighborDrawer(data_nn_a)
 47 | drawer_nn_b = CharNeighborDrawer(data_nn_b)
 48 | 
 49 | #%%
 50 | 
 51 | data_sim_a = MessageSimilarityData('a')
 52 | data_sim_b = MessageSimilarityData('b')
 53 | 
 54 | drawer_sim_a = BOWNeighborDrawer(data_sim_a)
 55 | drawer_sim_b = BOWNeighborDrawer(data_sim_b)
 56 | 
 57 | optimizer_sim_a = torch.optim.Adam(drawer_sim_a.parameters())
 58 | optimizer_sim_b = torch.optim.Adam(drawer_sim_b.parameters())
 59 | 
 60 | #%%
 61 | 
 62 | for epoch in range(500):
 63 |     drawer_sim_a.train()
 64 |     for num, ex in enumerate(drawer_sim_a.datagen.get_examples_batch()):
 65 |         optimizer_sim_a.zero_grad()
 66 |         loss = drawer_sim_a.forward(ex)
 67 |         loss.backward()
 68 |         optimizer_sim_a.step()
 69 | 
 70 |     print(f'Done epoch {epoch} loss {float(loss)}')
 71 |     if epoch % 25 == 0:
 72 |         drawer_sim_a.prepare_for_inference()
 73 |         for splits in ('aa', 'ba'):
 74 |             sims = eval_fns(make_fns(splits, (teller_nn_a, teller_nn_b), (drawer_sim_a, drawer_sim_b)), limit=100)
 75 |             print(splits, sims.mean())
 76 | drawer_sim_a.prepare_for_inference()
 77 | 
 78 | # %%
 79 | 
 80 | for epoch in range(500):
 81 |     drawer_sim_b.train()
 82 |     for num, ex in enumerate(drawer_sim_b.datagen.get_examples_batch()):
 83 |         optimizer_sim_b.zero_grad()
 84 |         loss = drawer_sim_b.forward(ex)
 85 |         loss.backward()
 86 |         optimizer_sim_b.step()
 87 | 
 88 |     print(f'Done epoch {epoch} loss {float(loss)}')
 89 |     if epoch % 25 == 0:
 90 |         drawer_sim_b.prepare_for_inference()
 91 |         for splits in ('ab', 'bb'):
 92 |             sims = eval_fns(make_fns(splits, (teller_nn_a, teller_nn_b), (drawer_sim_a, drawer_sim_b)), limit=100)
 93 |             print(splits, sims.mean())
 94 | drawer_sim_b.prepare_for_inference()
 95 | 
 96 | #%%
 97 | 
 98 | data_bow2c_a = BOWtoClipartData('a')
 99 | data_bow2c_b = BOWtoClipartData('b')
100 | 
101 | drawer_bow2c_a = BOWtoClipartDrawer(data_bow2c_a)
102 | drawer_bow2c_b = BOWtoClipartDrawer(data_bow2c_b)
103 | 
104 | optimizer_bow2c_a = torch.optim.Adam(drawer_bow2c_a.parameters())
105 | optimizer_bow2c_b = torch.optim.Adam(drawer_bow2c_b.parameters())
106 | 
107 | # %%
108 | 
109 | for epoch in range(20):
110 |     drawer_bow2c_a.train()
111 |     for num, ex in enumerate(drawer_bow2c_a.datagen.get_examples_batch()):
112 |         optimizer_bow2c_a.zero_grad()
113 |         loss = drawer_bow2c_a.forward(ex)
114 |         loss.backward()
115 |         optimizer_bow2c_a.step()
116 | 
117 |     print(f'Done epoch {epoch} loss {float(loss)}')
118 |     if epoch % 5 == 0:
119 |         for splits in ('aa', 'ba'):
120 |             sims = eval_fns(make_fns(splits, (teller_nn_a, teller_nn_b), (drawer_bow2c_a, drawer_bow2c_b)), limit=100)
121 |             print(splits, sims.mean())
122 | 
123 | #%%
124 | 
125 | for epoch in range(20):
126 |     drawer_bow2c_b.train()
127 |     for num, ex in enumerate(drawer_bow2c_b.datagen.get_examples_batch()):
128 |         optimizer_bow2c_b.zero_grad()
129 |         loss = drawer_bow2c_b.forward(ex)
130 |         loss.backward()
131 |         optimizer_bow2c_b.step()
132 | 
133 |     print(f'Done epoch {epoch} loss {float(loss)}')
134 |     if epoch % 5 == 0:
135 |         for splits in ('ab', 'bb'):
136 |             sims = eval_fns(make_fns(splits, (teller_nn_a, teller_nn_b), (drawer_bow2c_a, drawer_bow2c_b)), limit=100)
137 |             print(splits, sims.mean())
138 | #%%
139 | 
140 | data_c2seq_a = ClipartToSeqData('a')
141 | data_c2seq_b = ClipartToSeqData('b')
142 | 
143 | teller_c2seq_a = ClipartToSeqTeller(data_c2seq_a)
144 | teller_c2seq_b = ClipartToSeqTeller(data_c2seq_b)
145 | 
146 | optimizer_c2seq_a = torch.optim.Adam(teller_c2seq_a.parameters())
147 | optimizer_c2seq_b = torch.optim.Adam(teller_c2seq_b.parameters())
148 | 
149 | #%%
150 | 
151 | for epoch in range(80):
152 |     teller_c2seq_a.train()
153 |     for num, ex in enumerate(teller_c2seq_a.datagen.get_examples_batch()):
154 |         optimizer_c2seq_a.zero_grad()
155 |         loss = teller_c2seq_a(ex)
156 |         loss.backward()
157 |         optimizer_c2seq_a.step()
158 | 
159 |     print(f'Done epoch {epoch} loss {float(loss)}')
160 |     if epoch % 5 == 0:
161 |         for splits in ('aa', 'ab'):
162 |             sims = eval_fns(make_fns(splits, (teller_c2seq_a, teller_c2seq_b), (drawer_bow2c_a, drawer_bow2c_b)), limit=100)
163 |             print(splits, sims.mean())
164 | 
165 |     if epoch % 50 == 49:
166 |         optimizer_c2seq_a.param_groups[0]['lr'] *= 0.5
167 |         print("Learning rate reduced to", optimizer_c2seq_a.param_groups[0]['lr'])
168 | 
169 | #%%
170 | 
171 | for epoch in range(80):
172 |     teller_c2seq_b.train()
173 |     for num, ex in enumerate(teller_c2seq_b.datagen.get_examples_batch()):
174 |         optimizer_c2seq_b.zero_grad()
175 |         loss = teller_c2seq_b(ex)
176 |         loss.backward()
177 |         optimizer_c2seq_b.step()
178 | 
179 |     print(f'Done epoch {epoch} loss {float(loss)}')
180 |     if epoch % 5 == 0:
181 |         for splits in ('ba', 'bb'):
182 |             sims = eval_fns(make_fns(splits, (teller_c2seq_a, teller_c2seq_b), (drawer_bow2c_a, drawer_bow2c_b)), limit=100)
183 |             print(splits, sims.mean())
184 | 
185 |     if epoch % 50 == 49:
186 |         optimizer_c2seq_b.param_groups[0]['lr'] *= 0.5
187 |         print("Learning rate reduced to", optimizer_c2seq_b.param_groups[0]['lr'])
188 | 
189 | #%%
190 | 
191 | data_bowcanvas_a = BOWplusCanvasToMultiData('a')
192 | data_bowcanvas_b = BOWplusCanvasToMultiData('b')
193 | 
194 | drawer_bow2bce_a = BOWtoMultiBCEDrawer(data_bowcanvas_a)
195 | drawer_bow2bce_b = BOWtoMultiBCEDrawer(data_bowcanvas_b)
196 | 
197 | optimizer_bow2bce_a = torch.optim.Adam(drawer_bow2bce_a.parameters())
198 | optimizer_bow2bce_b = torch.optim.Adam(drawer_bow2bce_b.parameters())
199 | 
200 | #%%
201 | 
202 | for epoch in range(5):
203 |     drawer_bow2bce_a.train()
204 |     for num, ex in enumerate(drawer_bow2bce_a.datagen.get_examples_batch()):
205 |         optimizer_bow2bce_a.zero_grad()
206 |         loss = drawer_bow2bce_a.forward(ex)
207 |         loss.backward()
208 |         optimizer_bow2bce_a.step()
209 | 
210 |     print(f'Done epoch {epoch} loss {float(loss)}')
211 |     if epoch % 1 == 0:
212 |         for split in ('a',):
213 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_bow2bce_a, drawer_bow2bce_b)), limit=100)
214 |             print(split, sims.mean())
215 | 
216 | #%%
217 | 
218 | for epoch in range(5):
219 |     drawer_bow2bce_b.train()
220 |     for num, ex in enumerate(drawer_bow2bce_b.datagen.get_examples_batch()):
221 |         optimizer_bow2bce_b.zero_grad()
222 |         loss = drawer_bow2bce_b.forward(ex)
223 |         loss.backward()
224 |         optimizer_bow2bce_b.step()
225 | 
226 |     print(f'Done epoch {epoch} loss {float(loss)}')
227 |     if epoch % 1 == 0:
228 |         for split in ('b',):
229 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_bow2bce_a, drawer_bow2bce_b)), limit=100)
230 |             print(split, sims.mean())
231 | 
232 | #%%
233 | 
234 | drawer_bowcanvas2bce_a = BOWplusCanvasDrawer(data_bowcanvas_a)
235 | drawer_bowcanvas2bce_b = BOWplusCanvasDrawer(data_bowcanvas_b)
236 | 
237 | optimizer_bowcanvas2bce_a = torch.optim.Adam(drawer_bowcanvas2bce_a.parameters())
238 | optimizer_bowcanvas2bce_b = torch.optim.Adam(drawer_bowcanvas2bce_b.parameters())
239 | 
240 | #%%
241 | 
242 | for epoch in range(15):
243 |     drawer_bowcanvas2bce_a.train()
244 |     for num, ex in enumerate(drawer_bowcanvas2bce_a.datagen.get_examples_batch()):
245 |         optimizer_bowcanvas2bce_a.zero_grad()
246 |         loss = drawer_bowcanvas2bce_a.forward(ex)
247 |         loss.backward()
248 |         optimizer_bowcanvas2bce_a.step()
249 | 
250 |     print(f'Done epoch {epoch} loss {float(loss)}')
251 |     if epoch % 1 == 0:
252 |         for split in ('a',):
253 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_bowcanvas2bce_a, drawer_bowcanvas2bce_b)), limit=100)
254 |             print(split, sims.mean())
255 | 
256 | #%%
257 | 
258 | for epoch in range(15):
259 |     drawer_bowcanvas2bce_b.train()
260 |     for num, ex in enumerate(drawer_bowcanvas2bce_b.datagen.get_examples_batch()):
261 |         optimizer_bowcanvas2bce_b.zero_grad()
262 |         loss = drawer_bowcanvas2bce_b.forward(ex)
263 |         loss.backward()
264 |         optimizer_bowcanvas2bce_b.step()
265 | 
266 |     print(f'Done epoch {epoch} loss {float(loss)}')
267 |     if epoch % 1 == 0:
268 |         for split in ('b',):
269 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_bowcanvas2bce_a, drawer_bowcanvas2bce_b)), limit=100)
270 |             print(split, sims.mean())
271 | 
272 | #%%
273 | 
274 | baseline1_specs = dict(
275 |     teller_nn_a = teller_nn_a.spec,
276 |     teller_nn_b = teller_nn_b.spec,
277 |     drawer_nn_a = drawer_nn_a.spec,
278 |     drawer_nn_b = drawer_nn_b.spec,
279 | 
280 |     drawer_sim_a = drawer_sim_a.spec,
281 |     drawer_sim_b = drawer_sim_b.spec,
282 | 
283 |     drawer_bow2c_a = drawer_bow2c_a.spec,
284 |     drawer_bow2c_b = drawer_bow2c_b.spec,
285 | 
286 |     teller_c2seq_a = teller_c2seq_a.spec,
287 |     teller_c2seq_b = teller_c2seq_b.spec,
288 | 
289 |     drawer_bow2bce_a = drawer_bow2bce_a.spec,
290 |     drawer_bow2bce_b = drawer_bow2bce_b.spec,
291 | 
292 |     drawer_bowcanvas2bce_a = drawer_bowcanvas2bce_a.spec,
293 |     drawer_bowcanvas2bce_b = drawer_bowcanvas2bce_b.spec,
294 | )
295 | 
296 | #%%
297 | 
298 | torch.save(baseline1_specs, Path('models/baseline1.pt'))
299 | 


--------------------------------------------------------------------------------
/baseline2_eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | import numpy as np
 13 | from pathlib import Path
 14 | import editdistance
 15 | 
 16 | import torch
 17 | import torch.cuda
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | from nkfb_util import logsumexp, cuda_if_available
 22 | 
 23 | import codraw_data
 24 | from codraw_data import AbstractScene, Clipart
 25 | import abs_render
 26 | from abs_metric import scene_similarity, clipart_similarity
 27 | from episode import Episode, Transcriber, respond_to, response_partial
 28 | 
 29 | from baseline1_models import load_baseline1
 30 | from baseline2_models import load_baseline2
 31 | import model
 32 | from model import make_fns, eval_fns
 33 | 
 34 | # %%
 35 | 
 36 | compontent_evaluator = model.ComponentEvaluator.get()
 37 | 
 38 | # %%
 39 | 
 40 | models_baseline1 = load_baseline1()
 41 | models_baseline2 = load_baseline2()
 42 | 
 43 | # %%
 44 | 
 45 | tellers = [
 46 |     ('teller_nn', (models_baseline1['teller_nn_a'], models_baseline1['teller_nn_b'])),
 47 |     # ('teller_c2seq', (models_baseline1['teller_c2seq_a'], models_baseline1['teller_c2seq_b'])),
 48 |     ('teller_pragmaticnn', (models_baseline2['teller_pragmaticnn_a'], models_baseline2['teller_pragmaticnn_b'])),
 49 | ]
 50 | 
 51 | drawers = [
 52 |     # ('drawer_nn', (models_baseline1['drawer_nn_a'], models_baseline1['drawer_nn_b'])),
 53 |     # ('drawer_sim', (models_baseline1['drawer_sim_a'], models_baseline1['drawer_sim_b'])),
 54 |     # ('drawer_bow2c', (models_baseline1['drawer_bow2c_a'], models_baseline1['drawer_bow2c_b'])),
 55 |     ('drawer_bow2bce', (models_baseline1['drawer_bow2bce_a'], models_baseline1['drawer_bow2bce_b'])),
 56 |     ('drawer_bowcanvas2bce', (models_baseline1['drawer_bowcanvas2bce_a'], models_baseline1['drawer_bowcanvas2bce_b'])),
 57 |     ('drawer_lstmaddonly', (models_baseline2['drawer_lstmaddonly_a'], models_baseline2['drawer_lstmaddonly_b'])),
 58 | ]
 59 | 
 60 | # %%
 61 | print()
 62 | 
 63 | human_sims = np.array([
 64 |     scene_similarity(human_scene, true_scene)
 65 |     for true_scene, human_scene in codraw_data.get_truth_and_human_scenes('dev')
 66 |     ])
 67 | 
 68 | print(f"Human scene similarity: mean={human_sims.mean():.6f} std={human_sims.std():.6f} median={np.median(human_sims):.6f}")
 69 | 
 70 | # %%
 71 | print()
 72 | print()
 73 | # %%
 74 | 
 75 | limit = None
 76 | print("Teller           \t Drawer           \t Scene similarity")
 77 | for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
 78 |     for teller_name, teller_pair in tellers:
 79 |         for drawer_name, drawer_pair in drawers:
 80 |             for splits in splits_group:
 81 |                 sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
 82 |                 teller_caption = f"{teller_name}_{splits[0]}"
 83 |                 drawer_caption = f"{drawer_name}_{splits[1]}"
 84 |                 print(f"{teller_caption:17s}\t {drawer_caption:17s}\t",  sims.mean())
 85 |     print()
 86 | 
 87 | # %%
 88 | print()
 89 | print()
 90 | # %%
 91 | 
 92 | limit = None
 93 | print("Drawer evaluations against script")
 94 | print("Drawer           \t Scene similarity")
 95 | for drawer_name, drawer_pair in drawers:
 96 |     for split in ('a', 'b'):
 97 |         sims = eval_fns(make_fns(split, model.scripted_tell, drawer_pair), limit=limit)
 98 |         drawer_caption = f"{drawer_name}_{split}"
 99 |         print(f"{drawer_caption:17s}\t",  sims.mean())
100 | 
101 | # %%
102 | print()
103 | print()
104 | # %%
105 | 
106 | limit = None
107 | print("Teller           \t Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
108 | for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
109 |     for teller_name, teller_pair in tellers:
110 |         for drawer_name, drawer_pair in drawers:
111 |             for splits in splits_group:
112 |                 components = compontent_evaluator.eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
113 |                 teller_caption = f"{teller_name}_{splits[0]}"
114 |                 drawer_caption = f"{drawer_name}_{splits[1]}"
115 |                 print(f"{teller_caption:17s}\t {drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
116 |     print()
117 | 
118 | # %%
119 | print()
120 | print()
121 | # %%
122 | 
123 | limit = None
124 | print("Drawer evaluations against script")
125 | print("Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
126 | for drawer_name, drawer_pair in drawers:
127 |     for split in ('a', 'b'):
128 |         components = compontent_evaluator.eval_fns(make_fns(split, model.scripted_tell, drawer_pair), limit=limit)
129 |         drawer_caption = f"{drawer_name}_{split}"
130 |         print(f"{drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
131 | 
132 | # %%
133 | # %%
134 | # %%
135 | # %%
136 | # %%
137 | # %%
138 | 


--------------------------------------------------------------------------------
/baseline2_models.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | #%%
  8 | 
  9 | import numpy as np
 10 | from pathlib import Path
 11 | import heapq
 12 | 
 13 | import torch
 14 | import torch.cuda
 15 | import torch.nn as nn
 16 | import torch.nn.functional as F
 17 | 
 18 | from nkfb_util import logsumexp, cuda_if_available, torch_load
 19 | 
 20 | import codraw_data
 21 | from codraw_data import AbstractScene, Clipart
 22 | import abs_render
 23 | from abs_metric import scene_similarity, clipart_similarity
 24 | from episode import Episode, respond_to, response_partial
 25 | 
 26 | from datagen import BOWAddUpdateData, NearestNeighborData
 27 | from model import Model, select_clipart_to_tell, drawer_observe_canvas, make_fns, eval_fns
 28 | from model import scripted_tell, scripted_tell_before_peek, scripted_tell_after_peek
 29 | 
 30 | # %%
 31 | 
 32 | class BaseAddOnlyDrawer(Model, torch.nn.Module):
 33 |     datagen_cls = BOWAddUpdateData
 34 |     def init_full(self, d_hidden):
 35 |         # Helps overcome class imbalance (most cliparts are not drawn most of
 36 |         # the time)
 37 |         self.positive_scaling_coeff = 3.
 38 |         # Sigmoid is used to prevent drawing cliparts far off the canvas
 39 |         self.sigmoid_coeff = 2.
 40 |         # Scaling coefficient so that the sigmoid doesn't always saturate
 41 |         self.vals_coeff = 1. / 5.
 42 | 
 43 |         dg = self.datagen
 44 | 
 45 |         self.canvas_binary_to_hidden = nn.Sequential(
 46 |             nn.Dropout(0.2),
 47 |             nn.Linear(dg.NUM_BINARY, d_hidden, bias=False),
 48 |         )
 49 |         self.canvas_numerical_to_hidden = nn.Sequential(
 50 |             nn.Linear(dg.NUM_INDEX * dg.NUM_NUMERICAL, d_hidden, bias=False),
 51 |             )
 52 | 
 53 |         d_out = dg.NUM_INDEX * (dg.NUM_ALL + 1)
 54 |         self.hidden_to_clipart = nn.Sequential(
 55 |             nn.Dropout(0.4),
 56 |             nn.ReLU(),
 57 |             nn.Linear(d_hidden, d_out),
 58 |         )
 59 | 
 60 |     def lang_to_hidden(self, msg_idxs, offsets=None):
 61 |         # Offsets is None only when batch_size is 1
 62 |         raise NotImplementedError("Subclasses should override this")
 63 | 
 64 |     def forward(self, example_batch):
 65 |         dg = self.datagen
 66 | 
 67 |         hidden_feats = (
 68 |             self.lang_to_hidden(example_batch['msg_idxs'], example_batch['offsets'])
 69 |             + self.canvas_binary_to_hidden(example_batch['canvas_binary'].float())
 70 |             + self.canvas_numerical_to_hidden(example_batch['canvas_numerical'])
 71 |             )
 72 | 
 73 |         clipart_scores = self.hidden_to_clipart(hidden_feats).view(-1, dg.NUM_INDEX, dg.NUM_ALL + 1)
 74 | 
 75 |         correct_categorical = example_batch['clipart_categorical']
 76 |         correct_numerical = example_batch['clipart_numerical']
 77 |         correct_mask = example_batch['clipart_added_mask']
 78 | 
 79 |         clipart_idx_scores = clipart_scores[:,:,0]
 80 |         idx_losses = F.binary_cross_entropy_with_logits(clipart_idx_scores, correct_mask.to(torch.float), reduce=False)
 81 |         idx_losses = torch.where(correct_mask, self.positive_scaling_coeff * idx_losses, idx_losses)
 82 |         per_example_idx_loss = idx_losses.sum(1)
 83 | 
 84 |         flat_scores = clipart_scores[:,:,1:].view((-1, dg.NUM_ALL))
 85 | 
 86 |         (logits_subtype, logits_depth, logits_flip, vals_numerical) = torch.split(flat_scores, [dg.NUM_SUBTYPES, dg.NUM_DEPTH, dg.NUM_FLIP, dg.NUM_NUMERICAL], dim=1)
 87 |         vals_numerical = self.sigmoid_coeff * F.sigmoid(self.vals_coeff * vals_numerical)
 88 | 
 89 |         subtype_losses = F.cross_entropy(logits_subtype, correct_categorical[:,:,0].view((-1,)), reduce=False).view_as(correct_categorical[:,:,0])
 90 |         depth_losses = F.cross_entropy(logits_depth, correct_categorical[:,:,1].view((-1,)), reduce=False).view_as(correct_categorical[:,:,1])
 91 |         flip_losses = F.cross_entropy(logits_flip, correct_categorical[:,:,2].view((-1,)), reduce=False).view_as(correct_categorical[:,:,2])
 92 |         vals_losses = F.mse_loss(vals_numerical, correct_numerical.view((-1, dg.NUM_NUMERICAL)), reduce=False).view_as(correct_numerical).sum(-1)
 93 |         all_losses = torch.stack([subtype_losses, depth_losses, flip_losses, vals_losses], -1).sum(-1)
 94 |         per_example_loss = torch.where(correct_mask, all_losses, all_losses.new_zeros(1)).sum(-1)
 95 | 
 96 |         loss = per_example_idx_loss.mean() + per_example_loss.mean()
 97 | 
 98 |         return loss
 99 | 
100 |     @respond_to(codraw_data.ObserveCanvas)
101 |     def draw(self, episode):
102 |         dg = self.datagen
103 | 
104 |         msg = episode.get_last(codraw_data.TellGroup).msg
105 |         # assert msg != ""
106 |         words = [self.datagen.vocabulary_dict.get(word, None) for word in msg.split()]
107 |         words = [word for word in words if word is not None]
108 |         if not words:
109 |             episode.append(codraw_data.DrawGroup([]))
110 |             episode.append(codraw_data.ReplyGroup("ok"))
111 |             return
112 |         msg_idxs = torch.tensor(words).to(cuda_if_available)
113 | 
114 |         canvas_context = episode.get_last(codraw_data.ObserveCanvas).scene
115 | 
116 |         canvas_binary = np.zeros((dg.NUM_INDEX, 1 + dg.NUM_DEPTH + dg.NUM_FLIP), dtype=bool)
117 |         canvas_pose = np.zeros((2, dg.NUM_SUBTYPES), dtype=bool)
118 |         canvas_numerical = np.zeros((dg.NUM_INDEX, dg.NUM_NUMERICAL))
119 |         for clipart in canvas_context:
120 |             if clipart.idx in Clipart.HUMAN_IDXS:
121 |                 canvas_pose[clipart.human_idx, clipart.subtype] = True
122 | 
123 |             canvas_binary[clipart.idx, 0] = True
124 |             canvas_binary[clipart.idx, 1 + clipart.depth] = True
125 |             canvas_binary[clipart.idx, 1 + dg.NUM_DEPTH + clipart.flip] = True
126 |             canvas_numerical[clipart.idx, 0] = clipart.normed_x
127 |             canvas_numerical[clipart.idx, 1] = clipart.normed_y
128 | 
129 |         canvas_binary = np.concatenate([canvas_binary.reshape((-1,)), canvas_pose.reshape((-1,))])
130 |         canvas_numerical = canvas_numerical.reshape((-1,))
131 | 
132 |         canvas_binary = torch.tensor(canvas_binary.astype(np.uint8), dtype=torch.uint8)[None,:].to(cuda_if_available)
133 |         canvas_numerical = torch.tensor(canvas_numerical, dtype=torch.float)[None,:].to(cuda_if_available)
134 | 
135 |         hidden_feats = (
136 |             self.lang_to_hidden(msg_idxs[None,:], None)
137 |             + self.canvas_binary_to_hidden(canvas_binary.float())
138 |             + self.canvas_numerical_to_hidden(canvas_numerical)
139 |             )
140 | 
141 |         clipart_scores = self.hidden_to_clipart(hidden_feats).view(-1, dg.NUM_INDEX, (dg.NUM_ALL + 1))
142 | 
143 |         cliparts = []
144 |         prior_idxs = set([c.idx for c in canvas_context])
145 | 
146 |         flat_scores = clipart_scores[:,:,1:].view((-1, dg.NUM_ALL))
147 |         (logits_subtype, logits_depth, logits_flip, vals_numerical) = torch.split(flat_scores, [dg.NUM_SUBTYPES, dg.NUM_DEPTH, dg.NUM_FLIP, dg.NUM_NUMERICAL], dim=1)
148 |         vals_numerical = self.sigmoid_coeff * F.sigmoid(self.vals_coeff * vals_numerical)
149 |         vals_numerical = vals_numerical.cpu().detach().numpy()
150 | 
151 |         clipart_idx_scores = clipart_scores[0,:,0].cpu().detach().numpy()
152 | 
153 |         for idx in np.where(clipart_idx_scores > 0)[0]:
154 |             if idx in prior_idxs:
155 |                 continue
156 |             nx, ny = vals_numerical[idx,:]
157 |             clipart = Clipart(idx, int(logits_subtype[idx,:].argmax()), int(logits_depth[idx,:].argmax()), int(logits_flip[idx,:].argmax()), normed_x=nx, normed_y=ny)
158 |             cliparts.append(clipart)
159 |         episode.append(codraw_data.DrawGroup(cliparts))
160 |         episode.append(codraw_data.ReplyGroup("ok"))
161 | 
162 |     def get_action_fns(self):
163 |         return [drawer_observe_canvas, self.draw]
164 | 
165 | # %%
166 | 
167 | class BOWAddOnlyDrawer(BaseAddOnlyDrawer):
168 |     def init_full(self, d_embeddings=512, d_hidden=512):
169 |         self._args = dict(
170 |             d_embeddings=d_embeddings,
171 |             d_hidden=d_hidden,
172 |             )
173 |         super().init_full(d_hidden)
174 | 
175 |         self.d_embeddings = d_embeddings
176 |         self.word_embs = torch.nn.EmbeddingBag(len(self.datagen.vocabulary_dict), d_embeddings)
177 |         self.lang_to_hidden_module = nn.Linear(d_embeddings, d_hidden)
178 | 
179 |         self.to(cuda_if_available)
180 | 
181 |     def lang_to_hidden(self, msg_idxs, offsets=None):
182 |         bow_feats = self.word_embs(msg_idxs, offsets)
183 |         return self.lang_to_hidden_module(bow_feats)
184 | # %%
185 | 
186 | 
187 | class LSTMAddOnlyDrawer(BaseAddOnlyDrawer):
188 |     def init_full(self, d_embeddings=256, d_hidden=512, d_lstm=256, num_lstm_layers=1, pre_lstm_dropout=0.4, lstm_dropout=0.0):
189 |         self._args = dict(
190 |             d_embeddings=d_embeddings,
191 |             d_hidden=d_hidden,
192 |             d_lstm=256,
193 |             num_lstm_layers=num_lstm_layers,
194 |             pre_lstm_dropout=pre_lstm_dropout,
195 |             lstm_dropout=lstm_dropout,
196 |             )
197 |         super().init_full(d_hidden)
198 | 
199 |         self.d_embeddings = d_embeddings
200 |         self.word_embs = torch.nn.Embedding(len(self.datagen.vocabulary_dict), d_embeddings)
201 |         self.pre_lstm_dropout = nn.Dropout(pre_lstm_dropout)
202 |         self.lstm = nn.LSTM(d_embeddings, d_lstm, bidirectional=True, num_layers=num_lstm_layers, dropout=lstm_dropout)
203 |         # self.post_lstm_project = nn.Linear(d_lstm * 2 * num_lstm_layers, d_hidden)
204 |         # self.post_lstm_project = lambda x: x #nn.Linear(d_lstm * 2 * num_lstm_layers, d_hidden)
205 |         self.post_lstm_project = lambda x: x[:,:d_hidden]
206 |         self.to(cuda_if_available)
207 | 
208 |     def lang_to_hidden(self, msg_idxs, offsets=None):
209 |         # global dump
210 |         # dump = msg_idxs, offsets
211 |         # assert False
212 |         # bow_feats = self.word_embs(msg_idxs, offsets)
213 |         # return self.lang_to_hidden_module(bow_feats)
214 | 
215 |         if offsets is not None:
216 |             start = offsets.cpu()
217 |             end = torch.cat([start[1:], torch.tensor([msg_idxs.shape[-1]])])
218 |             undo_sorting = np.zeros(start.shape[0], dtype=int)
219 |             undo_sorting[(start - end).numpy().argsort()] = np.arange(start.shape[0], dtype=int)
220 |             words_packed = nn.utils.rnn.pack_sequence(sorted([msg_idxs[i:j] for i, j in list(zip(start.numpy(), end.numpy()))], key=lambda x: -x.shape[0]))
221 |         else:
222 |             words_packed = nn.utils.rnn.pack_sequence([msg_idxs[0,:]])
223 |             undo_sorting = np.array([0], dtype=int)
224 |         word_vecs = embedded = nn.utils.rnn.PackedSequence(
225 |             self.pre_lstm_dropout(self.word_embs(words_packed.data)),
226 |             words_packed.batch_sizes)
227 | 
228 |         _, (h_final, c_final) = self.lstm(word_vecs)
229 | 
230 |         # sentence_reps = h_final[-2:,:,:].permute(1, 2, 0).contiguous().view(undo_sorting.size, -1)
231 |         sentence_reps = c_final[-2:,:,:].permute(1, 2, 0).contiguous().view(undo_sorting.size, -1)
232 |         sentence_reps = self.post_lstm_project(sentence_reps)
233 | 
234 |         if offsets is not None:
235 |             sentence_reps = sentence_reps[undo_sorting]
236 |         return sentence_reps
237 | 
238 | # %%
239 | 
240 | class PragmaticNearestNeighborTeller(Model):
241 |     datagen_cls = NearestNeighborData
242 | 
243 |     def init_full(self, drawer_model=None, num_candidates=10):
244 |         self.drawer_model = drawer_model
245 |         self.num_candidates = num_candidates
246 | 
247 |     def set_drawer_model(self, drawer_model):
248 |         self.drawer_model = drawer_model
249 | 
250 |     def get_spec(self):
251 |         return dict(num_candidates=self.num_candidates)
252 | 
253 |     @respond_to(codraw_data.SelectClipart)
254 |     def tell(self, episode):
255 |         clipart = episode.get_last(codraw_data.SelectClipart).clipart
256 |         candidate_cliparts = heapq.nlargest(self.num_candidates, self.datagen.clipart_to_msg, key=lambda cand_clipart: clipart_similarity(cand_clipart, clipart))
257 |         # global dump
258 |         # dump = candidate_cliparts, episode
259 |         # assert False
260 | 
261 |         candidate_msgs = [self.datagen.clipart_to_msg[cand_clipart] for cand_clipart in candidate_cliparts]
262 | 
263 |         expected_context = [event.clipart for event in episode if isinstance(event, codraw_data.SelectClipart)][:-1]
264 | 
265 |         candidate_responses = [self.drawer_model.just_draw(msg, expected_context) for msg in candidate_msgs]
266 | 
267 |         best_idx = np.argmax([scene_similarity(response_scene, [clipart]) for response_scene in candidate_responses])
268 | 
269 |         best_msg = candidate_msgs[best_idx]
270 | 
271 |         episode.append(codraw_data.TellGroup(best_msg))
272 | 
273 |     def get_action_fns(self):
274 |         return [select_clipart_to_tell, self.tell]
275 | 
276 | # %%
277 | 
278 | def load_baseline2():
279 |     baseline2_specs = torch_load(Path('models/lstmaddonly_may31.pt'))
280 | 
281 |     models = {}
282 |     for k, spec in baseline2_specs.items():
283 |         print(k)
284 |         models[k] = globals()[spec['class']](spec=spec)
285 | 
286 |     # TODO(nikita): serialize these models to disk
287 |     data_nn_a = NearestNeighborData('a')
288 |     data_nn_b = NearestNeighborData('b')
289 |     print('teller_pragmaticnn_a')
290 |     models['teller_pragmaticnn_a'] = PragmaticNearestNeighborTeller(data_nn_a, drawer_model=models['drawer_lstmaddonly_a'])
291 |     print('teller_pragmaticnn_b')
292 |     models['teller_pragmaticnn_b'] = PragmaticNearestNeighborTeller(data_nn_b, drawer_model=models['drawer_lstmaddonly_b'])
293 | 
294 |     return models
295 | 


--------------------------------------------------------------------------------
/baseline2_train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | assert __name__ == "__main__", "Training script should not be imported!"
 13 | 
 14 | #%%
 15 | 
 16 | import numpy as np
 17 | from pathlib import Path
 18 | import editdistance
 19 | 
 20 | import torch
 21 | import torch.cuda
 22 | import torch.nn as nn
 23 | import torch.nn.functional as F
 24 | 
 25 | from nkfb_util import logsumexp, cuda_if_available
 26 | 
 27 | import codraw_data
 28 | from codraw_data import AbstractScene, Clipart
 29 | import abs_render
 30 | from abs_metric import scene_similarity, clipart_similarity
 31 | from episode import Episode, respond_to, response_partial
 32 | 
 33 | from datagen import BOWAddUpdateData
 34 | from baseline2_models import BOWAddOnlyDrawer, LSTMAddOnlyDrawer
 35 | import model
 36 | from model import make_fns, eval_fns
 37 | from model import scripted_tell, scripted_tell_before_peek, scripted_tell_after_peek
 38 | 
 39 | # %%
 40 | 
 41 | data_bowaddupdate_a = BOWAddUpdateData('a')
 42 | data_bowaddupdate_b = BOWAddUpdateData('b')
 43 | 
 44 | # %%
 45 | 
 46 | # drawer_bowaddonly_a = BOWAddOnlyDrawer(data_bowaddupdate_a)
 47 | # drawer_bowaddonly_b = BOWAddOnlyDrawer(data_bowaddupdate_b)
 48 | #
 49 | # optimizer_bowaddonly_a = torch.optim.Adam(drawer_bowaddonly_a.parameters())
 50 | # optimizer_bowaddonly_b = torch.optim.Adam(drawer_bowaddonly_b.parameters())
 51 | 
 52 | #%%
 53 | 
 54 | # for epoch in range(15):
 55 | #     drawer_bowaddonly_a.train()
 56 | #     for num, ex in enumerate(drawer_bowaddonly_a.datagen.get_examples_batch()):
 57 | #         optimizer_bowaddonly_a.zero_grad()
 58 | #         loss = drawer_bowaddonly_a.forward(ex)
 59 | #         loss.backward()
 60 | #         optimizer_bowaddonly_a.step()
 61 | #
 62 | #     print(f'Done epoch {epoch} loss {float(loss)}')
 63 | #     if epoch % 1 == 0:
 64 | #         for split in ('a',):
 65 | #             sims = eval_fns(make_fns(split, scripted_tell, (drawer_bowaddonly_a, drawer_bowaddonly_b)), limit=100)
 66 | #             print(split, sims.mean())
 67 | #
 68 | #             sims = eval_fns(make_fns(split, scripted_tell_before_peek, (drawer_bowaddonly_a, drawer_bowaddonly_b)), limit=100)
 69 | #             print(split, 'before', sims.mean())
 70 | #
 71 | #             sims = eval_fns(make_fns(split, scripted_tell_after_peek, (drawer_bowaddonly_a, drawer_bowaddonly_b)), limit=100)
 72 | #             print(split, 'after', sims.mean())
 73 | # %%
 74 | 
 75 | drawer_lstmaddonly_a = LSTMAddOnlyDrawer(data_bowaddupdate_a)
 76 | drawer_lstmaddonly_b = LSTMAddOnlyDrawer(data_bowaddupdate_b)
 77 | 
 78 | optimizer_lstmaddonly_a = torch.optim.Adam(drawer_lstmaddonly_a.parameters())
 79 | optimizer_lstmaddonly_b = torch.optim.Adam(drawer_lstmaddonly_b.parameters())
 80 | 
 81 | #%%
 82 | 
 83 | for epoch in range(15):
 84 |     drawer_lstmaddonly_a.train()
 85 |     for num, ex in enumerate(drawer_lstmaddonly_a.datagen.get_examples_batch()):
 86 |         optimizer_lstmaddonly_a.zero_grad()
 87 |         loss = drawer_lstmaddonly_a.forward(ex)
 88 |         loss.backward()
 89 |         optimizer_lstmaddonly_a.step()
 90 | 
 91 |     print(f'Done epoch {epoch} loss {float(loss)}')
 92 |     if epoch % 1 == 0:
 93 |         for split in ('a',):
 94 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
 95 |             print(split, sims.mean())
 96 | 
 97 |             sims = eval_fns(make_fns(split, scripted_tell_before_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
 98 |             print(split, 'before', sims.mean())
 99 | 
100 |             sims = eval_fns(make_fns(split, scripted_tell_after_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
101 |             print(split, 'after', sims.mean())
102 | #%%
103 | 
104 | for epoch in range(15):
105 |     drawer_lstmaddonly_b.train()
106 |     for num, ex in enumerate(drawer_lstmaddonly_b.datagen.get_examples_batch()):
107 |         optimizer_lstmaddonly_b.zero_grad()
108 |         loss = drawer_lstmaddonly_b.forward(ex)
109 |         loss.backward()
110 |         optimizer_lstmaddonly_b.step()
111 | 
112 |     print(f'Done epoch {epoch} loss {float(loss)}')
113 |     if epoch % 1 == 0:
114 |         for split in ('b',):
115 |             sims = eval_fns(make_fns(split, scripted_tell, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
116 |             print(split, sims.mean())
117 | 
118 |             sims = eval_fns(make_fns(split, scripted_tell_before_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
119 |             print(split, 'before', sims.mean())
120 | 
121 |             sims = eval_fns(make_fns(split, scripted_tell_after_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100)
122 |             print(split, 'after', sims.mean())
123 | 
124 | # %%
125 | 
126 | lstmaddonly_specs = dict(
127 |     drawer_lstmaddonly_a = drawer_lstmaddonly_a.spec,
128 |     drawer_lstmaddonly_b = drawer_lstmaddonly_b.spec,
129 | )
130 | 
131 | #%%
132 | 
133 | torch.save(lstmaddonly_specs, Path('models/lstmaddonly.pt'))
134 | 


--------------------------------------------------------------------------------
/baseline3_eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | import numpy as np
 13 | from pathlib import Path
 14 | import editdistance
 15 | 
 16 | import torch
 17 | import torch.cuda
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | from nkfb_util import logsumexp, cuda_if_available
 22 | 
 23 | import codraw_data
 24 | from codraw_data import AbstractScene, Clipart
 25 | import abs_render
 26 | from abs_metric import scene_similarity, clipart_similarity
 27 | from episode import Episode, Transcriber, respond_to, response_partial
 28 | 
 29 | from baseline1_models import load_baseline1
 30 | from baseline2_models import load_baseline2
 31 | from baseline3_models import load_baseline3
 32 | import model
 33 | from model import make_fns, eval_fns
 34 | 
 35 | # %%
 36 | 
 37 | compontent_evaluator = model.ComponentEvaluator.get()
 38 | 
 39 | # %%
 40 | 
 41 | models_baseline1 = load_baseline1()
 42 | models_baseline2 = load_baseline2()
 43 | models_baseline3 = load_baseline3()
 44 | 
 45 | # %%
 46 | 
 47 | tellers = [
 48 |     # ('teller_nn', (models_baseline1['teller_nn_a'], models_baseline1['teller_nn_b'])),
 49 |     # ('teller_c2seq', (models_baseline1['teller_c2seq_a'], models_baseline1['teller_c2seq_b'])),
 50 |     # ('teller_pragmaticnn', (models_baseline2['teller_pragmaticnn_a'], models_baseline2['teller_pragmaticnn_b'])),
 51 |     ('teller_scene2seq', (models_baseline3['teller_scene2seq_a'], models_baseline3['teller_scene2seq_b'])),
 52 |     ('teller_scene2seq_aux', (models_baseline3['teller_scene2seq_aux_a'], models_baseline3['teller_scene2seq_aux_b'])),
 53 |     ('teller_scene2seq_aux2', (models_baseline3['teller_scene2seq_aux2_a'], models_baseline3['teller_scene2seq_aux2_b'])),
 54 | ]
 55 | 
 56 | drawers = [
 57 |     # ('drawer_nn', (models_baseline1['drawer_nn_a'], models_baseline1['drawer_nn_b'])),
 58 |     # ('drawer_sim', (models_baseline1['drawer_sim_a'], models_baseline1['drawer_sim_b'])),
 59 |     # ('drawer_bow2c', (models_baseline1['drawer_bow2c_a'], models_baseline1['drawer_bow2c_b'])),
 60 |     # ('drawer_bow2bce', (models_baseline1['drawer_bow2bce_a'], models_baseline1['drawer_bow2bce_b'])),
 61 |     # ('drawer_bowcanvas2bce', (models_baseline1['drawer_bowcanvas2bce_a'], models_baseline1['drawer_bowcanvas2bce_b'])),
 62 |     ('drawer_lstmaddonly', (models_baseline2['drawer_lstmaddonly_a'], models_baseline2['drawer_lstmaddonly_b'])),
 63 | ]
 64 | 
 65 | # %%
 66 | print()
 67 | 
 68 | human_sims = np.array([
 69 |     scene_similarity(human_scene, true_scene)
 70 |     for true_scene, human_scene in codraw_data.get_truth_and_human_scenes('dev')
 71 |     ])
 72 | 
 73 | print(f"Human scene similarity: mean={human_sims.mean():.6f} std={human_sims.std():.6f} median={np.median(human_sims):.6f}")
 74 | 
 75 | # %%
 76 | print()
 77 | print()
 78 | # %%
 79 | 
 80 | limit = None
 81 | print("Teller           \t Drawer           \t Scene similarity")
 82 | for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
 83 |     for teller_name, teller_pair in tellers:
 84 |         for drawer_name, drawer_pair in drawers:
 85 |             for splits in splits_group:
 86 |                 sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
 87 |                 teller_caption = f"{teller_name}_{splits[0]}"
 88 |                 drawer_caption = f"{drawer_name}_{splits[1]}"
 89 |                 print(f"{teller_caption:17s}\t {drawer_caption:17s}\t",  sims.mean())
 90 |     print()
 91 | 
 92 | # %%
 93 | print()
 94 | print()
 95 | # %%
 96 | 
 97 | limit = None
 98 | print("Drawer evaluations against script")
 99 | print("Drawer           \t Scene similarity")
100 | for drawer_name, drawer_pair in drawers:
101 |     for split in ('a', 'b'):
102 |         sims = eval_fns(make_fns(split, model.scripted_tell, drawer_pair), limit=limit)
103 |         drawer_caption = f"{drawer_name}_{split}"
104 |         print(f"{drawer_caption:17s}\t",  sims.mean())
105 | 
106 | # %%
107 | print()
108 | print()
109 | # %%
110 | 
111 | limit = None
112 | print("Teller           \t Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
113 | for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
114 |     for teller_name, teller_pair in tellers:
115 |         for drawer_name, drawer_pair in drawers:
116 |             for splits in splits_group:
117 |                 components = compontent_evaluator.eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
118 |                 teller_caption = f"{teller_name}_{splits[0]}"
119 |                 drawer_caption = f"{drawer_name}_{splits[1]}"
120 |                 print(f"{teller_caption:17s}\t {drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
121 |     print()
122 | 
123 | # %%
124 | print()
125 | print()
126 | # %%
127 | 
128 | limit = None
129 | print("Drawer evaluations against script")
130 | print("Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
131 | for drawer_name, drawer_pair in drawers:
132 |     for split in ('a', 'b'):
133 |         components = compontent_evaluator.eval_fns(make_fns(split, model.scripted_tell, drawer_pair), limit=limit)
134 |         drawer_caption = f"{drawer_name}_{split}"
135 |         print(f"{drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
136 | 


--------------------------------------------------------------------------------
/baseline3_models.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | #%%
  8 | 
  9 | import numpy as np
 10 | from pathlib import Path
 11 | 
 12 | import torch
 13 | import torch.cuda
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | 
 17 | from nkfb_util import logsumexp, cuda_if_available, torch_load
 18 | from attention import AttentionSeqToMasked
 19 | 
 20 | import codraw_data
 21 | from codraw_data import AbstractScene, Clipart
 22 | import abs_render
 23 | from abs_metric import scene_similarity, clipart_similarity
 24 | from episode import Episode, Transcriber, respond_to
 25 | 
 26 | from datagen import SceneToSeqData
 27 | from model import make_fns, eval_fns
 28 | from model import Model
 29 | 
 30 | # %%
 31 | 
 32 | class SceneToSeqTeller(Model, torch.nn.Module):
 33 |     datagen_cls = SceneToSeqData
 34 | 
 35 |     def init_full(self,
 36 |             d_word_emb=256,
 37 |             d_tag_emb=128, num_heads=4, d_qkv=128,
 38 |             pre_attn_tag_dropout=0.2, attn_dropout=0.1,
 39 |             d_lstm=1024, num_lstm_layers=1,
 40 |             pre_lstm_emb_dropout=0.5,
 41 |             pre_lstm_scene_dropout=0.15,
 42 |             lstm_dropout=0.0,
 43 |             post_lstm_dropout=0.3,
 44 |             label_smoothing=0.05,
 45 |             prediction_loss_scale=5.,
 46 |             d_clipart_state_hidden=1024,
 47 |             predict_for_full_library=True,
 48 |             ):
 49 |         self._args = dict(
 50 |             d_word_emb=d_word_emb,
 51 |             d_tag_emb=d_tag_emb, num_heads=num_heads, d_qkv=d_qkv,
 52 |             pre_attn_tag_dropout=pre_attn_tag_dropout,
 53 |             attn_dropout=attn_dropout,
 54 |             d_lstm=d_lstm, num_lstm_layers=num_lstm_layers, pre_lstm_emb_dropout=pre_lstm_emb_dropout,
 55 |             pre_lstm_scene_dropout=pre_lstm_scene_dropout,
 56 |             lstm_dropout=lstm_dropout,
 57 |             post_lstm_dropout=post_lstm_dropout,
 58 |             label_smoothing=label_smoothing,
 59 |             prediction_loss_scale=prediction_loss_scale,
 60 |             d_clipart_state_hidden=d_clipart_state_hidden,
 61 |             predict_for_full_library=predict_for_full_library,
 62 |             )
 63 |         dg = self.datagen
 64 | 
 65 |         self.tag_embs = nn.Embedding(dg.NUM_TAGS, d_tag_emb)
 66 |         self.d_clipart_tags = d_tag_emb * dg.NUM_TAGS_PER_INDEX
 67 | 
 68 |         self.pre_attn_tag_dropout = nn.Dropout(pre_attn_tag_dropout)
 69 | 
 70 |         self.attn_prelstm = AttentionSeqToMasked(
 71 |             d_pre_q=d_word_emb,
 72 |             d_pre_k=self.d_clipart_tags,
 73 |             d_pre_v=self.d_clipart_tags,
 74 |             d_qk=d_qkv, d_v=d_qkv,
 75 |             num_heads=num_heads,
 76 |             attn_dropout=attn_dropout)
 77 | 
 78 |         self.attn = AttentionSeqToMasked(
 79 |             d_pre_q=d_lstm,
 80 |             d_pre_k=self.d_clipart_tags,
 81 |             d_pre_v=self.d_clipart_tags,
 82 |             d_qk=d_qkv, d_v=d_qkv,
 83 |             num_heads=num_heads,
 84 |             attn_dropout=attn_dropout)
 85 | 
 86 |         self.word_embs = nn.Embedding(len(self.datagen.vocabulary_dict), d_word_emb)
 87 | 
 88 |         self.pre_lstm_emb_dropout = nn.Dropout(pre_lstm_emb_dropout)
 89 |         self.pre_lstm_scene_dropout = nn.Dropout(pre_lstm_scene_dropout)
 90 |         self.lstm = nn.LSTM(d_word_emb + self.attn_prelstm.d_out, d_lstm, num_layers=num_lstm_layers, dropout=lstm_dropout)
 91 |         self.post_lstm_dropout = nn.Dropout(post_lstm_dropout)
 92 |         self.word_project = nn.Linear(d_lstm + self.attn.d_out, len(self.datagen.vocabulary_dict))
 93 | 
 94 |         self.label_smoothing = label_smoothing
 95 | 
 96 |         # Possible auxiliary loss for predicting clipart state
 97 |         self.prediction_loss_scale = prediction_loss_scale
 98 |         self.predict_for_full_library = predict_for_full_library
 99 |         if prediction_loss_scale > 0:
100 |             if predict_for_full_library:
101 |                 d_clipart_state_in = d_lstm + dg.NUM_INDEX
102 |             else:
103 |                 d_clipart_state_in = d_lstm
104 |             self.clipart_state_predictor = nn.Sequential(
105 |                 nn.Linear(d_clipart_state_in, d_clipart_state_hidden),
106 |                 nn.ReLU(),
107 |                 nn.Linear(d_clipart_state_hidden, dg.NUM_INDEX * dg.NUM_CLIPART_STATES),
108 |             )
109 |         else:
110 |             self.clipart_state_predictor = None
111 | 
112 |         self.to(cuda_if_available)
113 | 
114 |         self.inference_method = 'greedy'
115 |         self.sampling_temperature = 1.0
116 |         self.max_rounds = 50 # This is only changed for human eval
117 | 
118 |     def get_spec(self):
119 |         return self._args
120 | 
121 |     def print_hparams(self):
122 |         print("Hyperparameters:")
123 |         for k, v in self._args.items():
124 |             print(k, '=', v)
125 |         print()
126 | 
127 |     def forward(self, example_batch, return_loss=True, return_nll_count=False):
128 |         dg = self.datagen
129 | 
130 |         b_clipart_tags = self.tag_embs(example_batch['b_scene_tags']).view(-1, dg.NUM_INDEX, self.d_clipart_tags)
131 | 
132 |         if not (return_loss or return_nll_count):
133 |             ks_prelstm, vs_prelstm = self.attn_prelstm.precompute_kv(b_clipart_tags, b_clipart_tags)
134 |             ks, vs = self.attn.precompute_kv(b_clipart_tags, b_clipart_tags)
135 |             return example_batch['b_scene_mask'], ks_prelstm, vs_prelstm, ks, vs
136 | 
137 |         packer = example_batch['packer']
138 |         ob_clipart_tags = packer.ob_from_b(b_clipart_tags)
139 |         ob_clipart_tags = self.pre_attn_tag_dropout(ob_clipart_tags)
140 |         ob_scene_mask = packer.ob_from_b(example_batch['b_scene_mask'])
141 | 
142 |         brw_teller_tokens_in = example_batch['brw_teller_tokens_in']
143 |         if self.training:
144 |             word_dropout_probs = 1. / (1. + example_batch['brw_teller_counts_in'])
145 |             brw_word_dropout_mask = torch.rand_like(word_dropout_probs) < word_dropout_probs
146 |             brw_teller_tokens_in = torch.where(brw_word_dropout_mask, torch.full_like(brw_teller_tokens_in, dg.unk_index), brw_teller_tokens_in)
147 | 
148 |         brw_embs = self.pre_lstm_emb_dropout(self.word_embs(brw_teller_tokens_in))
149 |         orwb_embs = packer.orwb_from_brw_pack(brw_embs)
150 | 
151 |         orwb_attended_values_prelstm = self.attn_prelstm(orwb_embs, ob_clipart_tags, ob_clipart_tags, k_mask=ob_scene_mask)
152 |         orwb_lstm_in = nn.utils.rnn.PackedSequence(torch.cat([
153 |             orwb_embs.data,
154 |             orwb_attended_values_prelstm.data,
155 |             ], -1), orwb_embs.batch_sizes)
156 | 
157 |         orwb_lstm_out, _ = self.lstm(orwb_lstm_in)
158 |         orwb_lstm_out = nn.utils.rnn.PackedSequence(self.post_lstm_dropout(orwb_lstm_out.data), orwb_lstm_out.batch_sizes)
159 | 
160 |         orwb_attended_values = self.attn(orwb_lstm_out, ob_clipart_tags, ob_clipart_tags, k_mask=ob_scene_mask)
161 | 
162 |         brw_pre_project = torch.cat([
163 |             packer.brw_from_orwb_unpack(orwb_lstm_out),
164 |             packer.brw_from_orwb_unpack(orwb_attended_values),
165 |             ], -1)
166 | 
167 |         brw_word_logits = self.word_project(brw_pre_project)
168 |         brw_word_losses = F.cross_entropy(brw_word_logits, example_batch['brw_teller_tokens_out'], reduce=False)
169 | 
170 |         if self.prediction_loss_scale > 0:
171 |             brw_starts_round = (example_batch['brw_teller_tokens_in'] == dg.vocabulary_dict['<S>'])
172 |             if self.predict_for_full_library:
173 |                 br_clipart_state_predictor_in = torch.cat([
174 |                     packer.brw_from_orwb_unpack(orwb_lstm_out)[brw_starts_round],
175 |                     packer.br_from_b_expand(example_batch['b_scene_mask']).to(torch.float),
176 |                     ], -1)
177 |             else:
178 |                 br_clipart_state_predictor_in = packer.brw_from_orwb_unpack(orwb_lstm_out)[brw_starts_round]
179 |             bri_clipart_state_logits = self.clipart_state_predictor(br_clipart_state_predictor_in).view(-1, dg.NUM_CLIPART_STATES)
180 |             bri_clipart_state_losses = F.cross_entropy(bri_clipart_state_logits, example_batch['br_drawer_clipart_state'].view(-1), reduce=False)
181 |             if self.predict_for_full_library:
182 |                 br_clipart_state_losses = bri_clipart_state_losses.view(-1, dg.NUM_INDEX).sum(-1)
183 |             else:
184 |                 br_clipart_state_losses = torch.where(
185 |                     packer.br_from_b_expand(example_batch['b_scene_mask']),
186 |                     bri_clipart_state_losses.view(-1, dg.NUM_INDEX),
187 |                     torch.zeros_like(bri_clipart_state_losses.view(-1, dg.NUM_INDEX))).sum(-1)
188 | 
189 |         if return_loss:
190 |             # Label smoothing
191 |             eps = (self.label_smoothing / brw_word_logits.shape[-1])
192 |             brw_word_losses = (1. - self.label_smoothing) * brw_word_losses + eps * (-F.log_softmax(brw_word_logits, dim=-1).sum(dim=-1))
193 | 
194 |             # TODO(nikita): Packer should implement some reduction operations
195 |             per_example_word_losses = nn.utils.rnn.pad_packed_sequence(packer.orwb_from_brw_pack(brw_word_losses))[0].sum(0)
196 |             word_loss = per_example_word_losses.mean()
197 | 
198 |             if self.prediction_loss_scale > 0:
199 |                 per_example_prediction_losses = nn.utils.rnn.pad_packed_sequence(packer.srb_from_br_pack(br_clipart_state_losses))[0].sum(0)
200 |                 prediction_loss = per_example_prediction_losses.mean()
201 | 
202 |                 return self.prediction_loss_scale * prediction_loss + word_loss
203 |             else:
204 |                 return word_loss
205 | 
206 |         if return_nll_count:
207 |             # TODO(nikita): the model uses multiple tokens to signal the end of
208 |             # the last utterance, followed by the end of the conversation. These
209 |             # extra actions make perplexity not quite the same as models that
210 |             # do stop tokens differently
211 |             brw_non_unk_mask = example_batch['brw_teller_tokens_out'] != dg.unk_index
212 |             brw_nll = torch.where(brw_non_unk_mask, brw_word_losses, torch.zeros_like(brw_word_losses))
213 |             nll = float(brw_nll.sum())
214 |             count = int(brw_non_unk_mask.long().sum())
215 |             return nll, count
216 | 
217 |         assert False, "unreachable"
218 | 
219 |     @respond_to(codraw_data.ObserveTruth)
220 |     @respond_to(codraw_data.ReplyGroup)
221 |     def tell(self, episode):
222 |         if not hasattr(episode, 'to_tell'):
223 |             self.prepare(episode)
224 | 
225 |         if episode.to_tell:
226 |             events = episode.to_tell.pop(0)
227 |             episode.extend(events)
228 | 
229 |     def prepare(self, episode):
230 |         true_scene = episode.get_last(codraw_data.ObserveTruth).scene
231 | 
232 |         example_batch = self.datagen.tensors_from_episode(episode)
233 |         b_scene_mask, ks_prelstm, vs_prelstm, ks, vs = self.forward(example_batch, return_loss=False)
234 | 
235 |         to_tell = []
236 | 
237 |         lstm_state = None # carried across conversation rounds!
238 | 
239 |         for round in range(self.max_rounds):
240 |             tokens = [self.datagen.vocabulary_dict['<S>']]
241 |             events_this_round = []
242 |             # Longest utterance in all of CoDraw is 39 words
243 |             # Humans have a 140-char limit, but this is not easy to enforce with
244 |             # word-level tokenization
245 |             for wordnum in range(50):
246 |                 token_emb = self.word_embs(torch.tensor(tokens[-1], dtype=torch.long).to(cuda_if_available))[None,None,:]
247 |                 attended_values_prelstm = self.attn_prelstm(token_emb, ks=ks_prelstm, vs=vs_prelstm, k_mask=b_scene_mask)
248 |                 lstm_in = torch.cat([token_emb, attended_values_prelstm], -1)
249 |                 lstm_out, lstm_state = self.lstm(lstm_in, lstm_state)
250 |                 attended_values = self.attn(lstm_out, ks=ks, vs=vs, k_mask=b_scene_mask)
251 |                 pre_project = torch.cat([lstm_out, attended_values], -1)
252 | 
253 |                 if tokens[-1] == self.datagen.vocabulary_dict['<S>'] and self.prediction_loss_scale > 0:
254 |                     assert not events_this_round
255 |                     if self.predict_for_full_library:
256 |                         clipart_state_predictor_in = torch.cat([
257 |                             lstm_out,
258 |                             b_scene_mask.to(torch.float)[None,:,:],
259 |                             ], -1)
260 |                     else:
261 |                         clipart_state_predictor_in = lstm_out
262 |                     clipart_state_logits = self.clipart_state_predictor(clipart_state_predictor_in).view(self.datagen.NUM_INDEX, self.datagen.NUM_CLIPART_STATES)
263 |                     clipart_state_selected = clipart_state_logits.argmax(dim=-1)
264 |                     undrawn = AbstractScene([c for c in true_scene if clipart_state_selected[c.idx] == self.datagen.CLIPART_STATE_UNDRAWN])
265 |                     intention = codraw_data.TellerIntention(drawn=None, undrawn=undrawn, draw_next=None)
266 |                     events_this_round.append(intention)
267 | 
268 |                 word_logits = self.word_project(pre_project[0,0,:])
269 |                 word_logits[self.datagen.vocabulary_dict['<S>']] = -float('inf')
270 |                 if round == 0 and wordnum == 0:
271 |                     word_logits[self.datagen.vocabulary_dict['</TELL>']] = -float('inf')
272 | 
273 |                 if self.inference_method == 'greedy':
274 |                     next_token = int(word_logits.argmax())
275 |                 elif self.inference_method == 'sample':
276 |                     next_token = int(torch.multinomial(F.softmax(word_logits / self.sampling_temperature, dim=-1)[None, :], 1).item())
277 |                 else:
278 |                     raise ValueError(f"Invalid inference_method: {self.inference_method}")
279 | 
280 |                 assert next_token != self.datagen.vocabulary_dict['<S>']
281 |                 tokens.append(next_token)
282 |                 if next_token == self.datagen.vocabulary_dict['</S>']:
283 |                     break
284 |                 elif next_token == self.datagen.vocabulary_dict['</TELL>']:
285 |                     break
286 | 
287 |             if tokens[-1] == self.datagen.vocabulary_dict['</TELL>']:
288 |                 break
289 | 
290 |             msg = " ".join([self.datagen.vocabulary[i] for i in tokens[1:-1]])
291 |             events_this_round.append(codraw_data.TellGroup(msg))
292 |             to_tell.append(events_this_round)
293 | 
294 |         episode.to_tell = to_tell
295 | 
296 |     def get_action_fns(self):
297 |         return [self.tell]
298 | 
299 |     def calc_split_loss(self, split='dev'):
300 |         """
301 |         Calculates teller loss on a full split
302 |         """
303 |         datagen_spec = {**self.datagen.spec}
304 |         datagen_spec['split'] = split
305 |         datagen_dev = self.datagen_cls(spec=datagen_spec)
306 | 
307 |         assert datagen_dev.vocabulary == self.datagen.vocabulary
308 | 
309 |         losses = []
310 |         count = 0
311 |         with torch.no_grad():
312 |             self.eval()
313 |             for ex in datagen_dev.get_examples_unshuffled_batch(batch_size=128):
314 |                 batch_size = ex['b_scene_mask'].shape[0]
315 |                 loss = self.forward(ex)
316 |                 loss = float(loss) * batch_size
317 |                 losses.append(loss)
318 |                 count += batch_size
319 | 
320 |         return np.array(losses).sum() / count
321 | 
322 | # %%
323 | 
324 | def load_baseline3():
325 |     baseline3_specs = torch_load(Path('models/scene2seq_july11.pt'))
326 | 
327 |     models = {}
328 |     for k, spec in baseline3_specs.items():
329 |         print(k)
330 |         models[k] = globals()[spec['class']](spec=spec)
331 |         models[k].eval()
332 | 
333 |     return models
334 | 


--------------------------------------------------------------------------------
/baseline3_train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | assert __name__ == "__main__", "Training script should not be imported!"
 13 | 
 14 | #%%
 15 | 
 16 | import numpy as np
 17 | from pathlib import Path
 18 | 
 19 | import torch
 20 | import torch.cuda
 21 | import torch.nn as nn
 22 | import torch.nn.functional as F
 23 | 
 24 | from nkfb_util import logsumexp, cuda_if_available
 25 | 
 26 | import codraw_data
 27 | from codraw_data import AbstractScene, Clipart
 28 | import abs_render
 29 | from abs_metric import scene_similarity, clipart_similarity
 30 | from episode import Episode, Transcriber, respond_to
 31 | 
 32 | import model
 33 | from model import make_fns, eval_fns
 34 | from model import Model
 35 | from baseline2_models import load_baseline2
 36 | 
 37 | from datagen import SceneToSeqData
 38 | from baseline3_models import SceneToSeqTeller
 39 | 
 40 | 
 41 | # %%
 42 | 
 43 | # scenes_and_scripts_dev = codraw_data.get_scenes_and_scripts('dev')
 44 | 
 45 | # transcribe = Transcriber(
 46 | #     'baseline3_train.py' if INTERACTIVE else __file__,
 47 | #     scenes_and_scripts=scenes_and_scripts_dev[::110],
 48 | #     scenes_description="scenes_and_scripts_dev[::110]")
 49 | 
 50 | # %%
 51 | 
 52 | models_baseline2 = load_baseline2()
 53 | 
 54 | # %%
 55 | 
 56 | drawer_lstmaddonly_a = models_baseline2['drawer_lstmaddonly_a']
 57 | drawer_lstmaddonly_b = models_baseline2['drawer_lstmaddonly_b']
 58 | 
 59 | # %%
 60 | 
 61 | data_scene2seq_a = SceneToSeqData('a')
 62 | data_scene2seq_b = SceneToSeqData('b')
 63 | 
 64 | # %%
 65 | 
 66 | def train_teller(split, teller_pair, num_epochs=50, limit=100):
 67 |     splits_pair = split + 'a', split + 'b'
 68 |     if split == 'a':
 69 |         teller = teller_pair[0]
 70 |     elif split == 'b':
 71 |         teller = teller_pair[1]
 72 |     else:
 73 |         assert False
 74 | 
 75 |     optimizer = torch.optim.Adam(teller.parameters())
 76 | 
 77 |     print('perplexity-dev', model.calc_perplexity(teller))
 78 |     print('perplexity-a', model.calc_perplexity(teller, 'a'))
 79 | 
 80 |     print('avg-loss-dev', teller.calc_split_loss())
 81 |     print('avg-loss-a', teller.calc_split_loss('a'))
 82 | 
 83 |     for epoch in range(num_epochs):
 84 |         teller.train()
 85 |         for num, ex in enumerate(teller.datagen.get_examples_batch()):
 86 |             optimizer.zero_grad()
 87 |             loss = teller(ex)
 88 |             loss.backward()
 89 |             optimizer.step()
 90 | 
 91 |         print(f'Done epoch {epoch} loss {float(loss)}')
 92 |         if epoch % 5 == 0:
 93 |             del ex, loss # clean up memory
 94 |             print('perplexity-dev', model.calc_perplexity(teller))
 95 |             print('perplexity-a', model.calc_perplexity(teller, 'a'))
 96 |             print('avg-loss-dev', teller.calc_split_loss())
 97 |             print('avg-loss-a', teller.calc_split_loss('a'))
 98 |             for splits in splits_pair:
 99 |                 sims = eval_fns(make_fns(splits, teller_pair, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=limit)
100 |                 print(splits, sims.mean())
101 | 
102 | # %%
103 | 
104 | teller_scene2seq_a = SceneToSeqTeller(data_scene2seq_a, prediction_loss_scale=0)
105 | teller_scene2seq_b = SceneToSeqTeller(data_scene2seq_b, prediction_loss_scale=0)
106 | 
107 | train_teller('a', (teller_scene2seq_a, teller_scene2seq_b))
108 | train_teller('b', (teller_scene2seq_a, teller_scene2seq_b))
109 | 
110 | # %% scene2seq with intermediate supervision for all clipart ids
111 | 
112 | teller_scene2seq_aux_a = SceneToSeqTeller(data_scene2seq_a)
113 | teller_scene2seq_aux_b = SceneToSeqTeller(data_scene2seq_b)
114 | 
115 | train_teller('a', (teller_scene2seq_aux_a, teller_scene2seq_aux_b))
116 | train_teller('b', (teller_scene2seq_aux_a, teller_scene2seq_aux_b))
117 | 
118 | # %% scene2seq with intermediate supervision only for present cliparts
119 | 
120 | teller_scene2seq_aux2_a = SceneToSeqTeller(data_scene2seq_a, predict_for_full_library=False,  prediction_loss_scale=6.)
121 | teller_scene2seq_aux2_b = SceneToSeqTeller(data_scene2seq_b, predict_for_full_library=False,  prediction_loss_scale=6.)
122 | 
123 | train_teller('a', (teller_scene2seq_aux2_a, teller_scene2seq_aux2_b), num_epochs=40)
124 | train_teller('b', (teller_scene2seq_aux2_a, teller_scene2seq_aux2_b), num_epochs=40)
125 | 
126 | # %%
127 | 
128 | scene2seq_specs = dict(
129 |     teller_scene2seq_a = teller_scene2seq_a.spec,
130 |     teller_scene2seq_b = teller_scene2seq_b.spec,
131 |     teller_scene2seq_aux_a = teller_scene2seq_aux_a.spec,
132 |     teller_scene2seq_aux_b = teller_scene2seq_aux_b.spec,
133 |     teller_scene2seq_aux2_a = teller_scene2seq_aux2_a.spec,
134 |     teller_scene2seq_aux2_b = teller_scene2seq_aux2_b.spec,
135 | )
136 | 
137 | # %%
138 | 
139 | print()
140 | print()
141 | print("Saving models")
142 | torch.save(scene2seq_specs, Path('models/scene2seq.pt'))
143 | 
144 | # %%
145 | 
146 | print()
147 | 
148 | print("Final evaluation on full dev set (scene2seq)")
149 | for splits in ('aa', 'ab', 'ba', 'bb'):
150 |     sims = eval_fns(make_fns(splits, (teller_scene2seq_a, teller_scene2seq_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
151 |     print(splits, sims.mean())
152 | 
153 | print("Final evaluation on full dev set (scene2seq_aux)")
154 | for splits in ('aa', 'ab', 'ba', 'bb'):
155 |     sims = eval_fns(make_fns(splits, (teller_scene2seq_aux_a, teller_scene2seq_aux_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
156 |     print(splits, sims.mean())
157 | 
158 | print("Final evaluation on full dev set (scene2seq_aux2)")
159 | for splits in ('aa', 'ab', 'ba', 'bb'):
160 |     sims = eval_fns(make_fns(splits, (teller_scene2seq_aux2_a, teller_scene2seq_aux2_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
161 |     print(splits, sims.mean())
162 | 


--------------------------------------------------------------------------------
/baseline4_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from interactivity import INTERACTIVE, try_magic, try_cd
 8 | try_cd('~/dev/drawmodel/nkcodraw')
 9 | 
10 | #%%
11 | 
12 | import numpy as np
13 | from pathlib import Path
14 | import editdistance
15 | 
16 | import torch
17 | import torch.cuda
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | 
21 | from nkfb_util import logsumexp, cuda_if_available
22 | 
23 | import codraw_data
24 | from codraw_data import AbstractScene, Clipart
25 | import abs_render
26 | from abs_metric import scene_similarity, clipart_similarity
27 | from episode import Episode, Transcriber, respond_to, response_partial
28 | 
29 | from saved_models import load_models, make_pairs
30 | from eval_automatic import print_eval
31 | 
32 | # %%
33 | 
34 | models = load_models(1, 2, 3, 4)
35 | 
36 | # HACK while the model is still training
37 | models['teller_rl_b'] = models['teller_scene2seq_aux2_b']
38 | 
39 | # %%
40 | 
41 | tellers = make_pairs(models,
42 |     # 'teller_nn',
43 |     # 'teller_pragmaticnn',
44 |     # 'teller_scene2seq',
45 |     # 'teller_scene2seq_aux',
46 |     # 'teller_scene2seq_aux2',
47 |     'teller_rl',
48 | )
49 | 
50 | drawers = make_pairs(models,
51 |     # 'drawer_nn',
52 |     # 'drawer_sim',
53 |     # 'drawer_bow2c',
54 |     # 'drawer_bow2bce',
55 |     # 'drawer_bowcanvas2bce',
56 |     'drawer_lstmaddonly',
57 | )
58 | 
59 | # %%
60 | 
61 | print()
62 | print_eval(do_human=True)
63 | 
64 | # %%
65 | 
66 | print()
67 | print()
68 | print_eval(tellers, drawers, limit=None, do_pairwise=True)
69 | 
70 | # %%
71 | 
72 | print()
73 | print()
74 | print_eval(tellers, drawers, limit=None, do_script=True, do_components_pairwise=True, do_components_script=True)
75 | 


--------------------------------------------------------------------------------
/baseline4_models.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | #%%
  8 | 
  9 | import numpy as np
 10 | from pathlib import Path
 11 | 
 12 | import torch
 13 | import torch.cuda
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | 
 17 | from nkfb_util import logsumexp, cuda_if_available, torch_load
 18 | from attention import AttentionSeqToMasked
 19 | 
 20 | import codraw_data
 21 | from codraw_data import AbstractScene, Clipart
 22 | import abs_render
 23 | from abs_metric import scene_similarity, clipart_similarity
 24 | from episode import Episode, Transcriber, respond_to
 25 | 
 26 | from model import make_fns, eval_fns
 27 | from model import Model
 28 | 
 29 | from baseline3_models import SceneToSeqTeller
 30 | 
 31 | # %%
 32 | 
 33 | def process_episode(episode,
 34 |         brw_rewards, brw_discounted_rewards,
 35 |         utterance_penalty,
 36 |         gamma,
 37 |         uninformative_penalty,
 38 |         ):
 39 |     scene_sims = None
 40 |     for event in episode:
 41 |         if isinstance(event, codraw_data.ObserveTruth):
 42 |             drawn_scene = []
 43 |             true_scene = event.scene
 44 |             scene_sims = []
 45 |             reward_idxs = []
 46 |             yield event
 47 |         elif isinstance(event, codraw_data.TellGroup):
 48 |             if reward_idxs:
 49 |                 base_idx = reward_idxs[-1] + 1
 50 |             else:
 51 |                 base_idx = 0
 52 |             offset = len(event.msg.split())
 53 |             if offset >= 50:
 54 |                 offset = 50 - 1
 55 |             reward_idxs.append(base_idx + offset)
 56 |             yield event
 57 |         elif isinstance(event, (codraw_data.ObserveCanvas, codraw_data.ReplyGroup)):
 58 |             yield event
 59 |         elif isinstance(event, codraw_data.DrawGroup):
 60 |             assert drawn_scene is not None
 61 |             drawn_scene = [c for c in drawn_scene if c.idx not in [c2.idx for c2 in event.cliparts]]
 62 |             drawn_scene.extend(event.cliparts)
 63 |             scene_sims.append(scene_similarity(drawn_scene, true_scene))
 64 |             yield codraw_data.SetDrawing(drawn_scene)
 65 |         elif isinstance(event, codraw_data.SetDrawing):
 66 |             scene_sims.append(scene_similarity(event.scene, true_scene))
 67 |             yield event
 68 | 
 69 |     if scene_sims is not None:
 70 |         rewards = np.array(scene_sims) - np.array([0] + scene_sims[:-1])
 71 |         rewards = np.where(rewards > 0, rewards, -uninformative_penalty)
 72 | 
 73 |         if len(rewards) >= 50:
 74 |             rewards = np.array(list(rewards - utterance_penalty))
 75 |         else:
 76 |             rewards = np.array(list(rewards - utterance_penalty) + [0])
 77 |             if reward_idxs:
 78 |                 reward_idxs.append(reward_idxs[-1] + 1)
 79 |             else:
 80 |                 reward_idxs.append(0)
 81 | 
 82 |         new_brw_rewards = np.zeros(reward_idxs[-1] + 1)
 83 |         new_brw_rewards[np.array(reward_idxs)] = rewards
 84 |         brw_rewards.extend(list(new_brw_rewards))
 85 |         brw_discounted_rewards.extend(list(discount_rewards(new_brw_rewards, gamma)))
 86 | 
 87 | def discount_rewards(r, gamma=0.99):
 88 |     """ take 1D float array of rewards and compute discounted reward """
 89 |     # https://gist.github.com/karpathy/a4166c7fe253700972fcbc77e4ea32c5
 90 |     r = np.asarray(r)
 91 |     discounted_r = np.zeros_like(r)
 92 |     running_add = 0
 93 |     for t in reversed(range(0, r.size)):
 94 |         running_add = running_add * gamma + r[t]
 95 |         discounted_r[t] = running_add
 96 | 
 97 |     return discounted_r
 98 | 
 99 | def examples_from_episodes(episodes, dg, utterance_penalty, gamma, uninformative_penalty):
100 |     brw_rewards = []
101 |     brw_discounted_rewards = []
102 |     episodes = [list(process_episode(episode,
103 |             brw_rewards, brw_discounted_rewards,
104 |             utterance_penalty,
105 |             gamma,
106 |             uninformative_penalty,
107 |             ))
108 |         for episode in episodes]
109 |     example_batch = dg.tensors_from_episodes(episodes + [[codraw_data.ObserveTruth([])]])
110 |     example_batch['brw_rewards'] = torch.tensor(brw_rewards, dtype=torch.float,  device=cuda_if_available)
111 |     example_batch['brw_discounted_rewards'] = torch.tensor(brw_discounted_rewards, dtype=torch.float, device=cuda_if_available)
112 |     return example_batch
113 | 
114 | # %%
115 | 
116 | def collect_episodes(fns,
117 |         dg,
118 |         scenes=codraw_data.get_scenes('dev'),
119 |         batch_size=16,
120 |         utterance_penalty=0.25,
121 |         gamma=0.99,
122 |         uninformative_penalty=0.3
123 | ):
124 |     with torch.no_grad():
125 |         episodes = []
126 |         for scene in np.random.choice(scenes, batch_size):
127 |             ep = Episode.run(scene, fns)
128 |             episodes.append(ep)
129 | 
130 |         example_batch = examples_from_episodes(
131 |             episodes,
132 |             dg=dg,
133 |             utterance_penalty=utterance_penalty,
134 |             gamma=gamma,
135 |             uninformative_penalty=uninformative_penalty,
136 |             )
137 |     return episodes, example_batch
138 | 
139 | # %%
140 | 
141 | class RLSceneToSeqTeller(SceneToSeqTeller):
142 |     def disable_dropout(self):
143 |         for module in self.modules():
144 |             if isinstance(module, nn.Dropout):
145 |                 module.p = 0
146 | 
147 |     def calc_rl_loss(self, example_batch):
148 |         dg = self.datagen
149 | 
150 |         b_clipart_tags = self.tag_embs(example_batch['b_scene_tags']).view(-1, dg.NUM_INDEX, self.d_clipart_tags)
151 | 
152 |         packer = example_batch['packer']
153 |         ob_clipart_tags = packer.ob_from_b(b_clipart_tags)
154 |         ob_clipart_tags = self.pre_attn_tag_dropout(ob_clipart_tags)
155 |         ob_scene_mask = packer.ob_from_b(example_batch['b_scene_mask'])
156 | 
157 |         brw_teller_tokens_in = example_batch['brw_teller_tokens_in']
158 | 
159 |         brw_embs = self.pre_lstm_emb_dropout(self.word_embs(brw_teller_tokens_in))
160 |         orwb_embs = packer.orwb_from_brw_pack(brw_embs)
161 | 
162 |         orwb_attended_values_prelstm = self.attn_prelstm(orwb_embs, ob_clipart_tags, ob_clipart_tags, k_mask=ob_scene_mask)
163 |         orwb_lstm_in = nn.utils.rnn.PackedSequence(torch.cat([
164 |             orwb_embs.data,
165 |             orwb_attended_values_prelstm.data,
166 |             ], -1), orwb_embs.batch_sizes)
167 | 
168 |         orwb_lstm_out, _ = self.lstm(orwb_lstm_in)
169 |         orwb_lstm_out = nn.utils.rnn.PackedSequence(self.post_lstm_dropout(orwb_lstm_out.data), orwb_lstm_out.batch_sizes)
170 | 
171 |         orwb_attended_values = self.attn(orwb_lstm_out, ob_clipart_tags, ob_clipart_tags, k_mask=ob_scene_mask)
172 | 
173 |         brw_pre_project = torch.cat([
174 |             packer.brw_from_orwb_unpack(orwb_lstm_out),
175 |             packer.brw_from_orwb_unpack(orwb_attended_values),
176 |             ], -1)
177 | 
178 |         brw_word_logits = self.word_project(brw_pre_project)
179 |         brw_word_losses = F.cross_entropy(brw_word_logits, example_batch['brw_teller_tokens_out'], reduce=False)
180 | 
181 |         b_word_losses = nn.utils.rnn.pad_packed_sequence(packer.orwb_from_brw_pack(brw_word_losses))[0].sum(0)
182 |         print('mean nll', float(b_word_losses.mean()))
183 | 
184 |         # Discounting occurs at every word
185 |         # brw_discounted_rewards = example_batch['brw_discounted_rewards'][:brw_word_losses.shape[0]]
186 |         # XXX(nikita): clipping here seems wrong. Make sure there are no more crashes!
187 |         brw_discounted_rewards = example_batch['brw_discounted_rewards']
188 |         # TODO(nikita): what is the right baseline?
189 |         baseline = 0.8
190 |         brw_discounted_rewards = brw_discounted_rewards - baseline
191 | 
192 |         brw_rl_losses = brw_word_losses * brw_discounted_rewards
193 | 
194 |         rl_loss = brw_rl_losses.mean()
195 | 
196 |         return rl_loss
197 | 
198 | 
199 | # %%
200 | 
201 | def load_baseline4():
202 |     models = {}
203 | 
204 |     rl_spec_a = torch_load('models/rl_nodict_aug2.pt')
205 |     models['teller_rl_a'] = RLSceneToSeqTeller(spec=rl_spec_a)
206 |     models['teller_rl_b'] = None
207 | 
208 |     models['teller_rl_a'].eval()
209 | 
210 |     return models
211 | 


--------------------------------------------------------------------------------
/baseline4_train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | assert __name__ == "__main__", "Training script should not be imported!"
 13 | 
 14 | #%%
 15 | 
 16 | import numpy as np
 17 | from pathlib import Path
 18 | 
 19 | import torch
 20 | import torch.cuda
 21 | import torch.nn as nn
 22 | import torch.nn.functional as F
 23 | 
 24 | from nkfb_util import logsumexp, cuda_if_available, torch_load
 25 | from attention import AttentionSeqToMasked
 26 | 
 27 | import codraw_data
 28 | from codraw_data import AbstractScene, Clipart
 29 | import abs_render
 30 | from abs_metric import scene_similarity, clipart_similarity
 31 | from episode import Episode, Transcriber, respond_to
 32 | 
 33 | from model import make_fns, eval_fns
 34 | from model import Model
 35 | 
 36 | from baseline2_models import load_baseline2
 37 | from baseline3_models import load_baseline3
 38 | from baseline4_models import RLSceneToSeqTeller, collect_episodes
 39 | 
 40 | # %%
 41 | 
 42 | models_baseline2 = load_baseline2()
 43 | models_baseline3 = load_baseline3()
 44 | 
 45 | # %%
 46 | 
 47 | drawer_lstmaddonly_a, drawer_lstmaddonly_b = models_baseline2['drawer_lstmaddonly_a'], models_baseline2['drawer_lstmaddonly_b']
 48 | 
 49 | teller_scene2seq_aux2_a, teller_scene2seq_aux2_b = models_baseline3['teller_scene2seq_aux2_a'], models_baseline3['teller_scene2seq_aux2_b']
 50 | 
 51 | # %%
 52 | 
 53 | def train_teller(split, teller_pair, scenes,
 54 |         utterance_penalty=0.1,
 55 |         gamma=0.999,
 56 |         uninformative_penalty=0.3,
 57 |         batch_size=16,
 58 |         num_batches=12500,
 59 |         eval_every=2000,
 60 |         lr=0.00007,
 61 |         limit=100,
 62 |         base_name="scene2seq_rl",
 63 | ):
 64 |     print("Training hyperparameters:")
 65 |     for param in ['utterance_penalty',
 66 |                     'gamma',
 67 |                     'uninformative_penalty',
 68 |                     'batch_size',
 69 |                     'num_batches',
 70 |                     'lr',
 71 |                     'limit',
 72 |                 ]:
 73 |         print(param, '=', locals()[param])
 74 | 
 75 |     drawer_pair = drawer_lstmaddonly_a, drawer_lstmaddonly_b
 76 | 
 77 |     splits_pair = split + 'a', split + 'b'
 78 |     if split == 'a':
 79 |         teller = teller_pair[0]
 80 |     elif split == 'b':
 81 |         teller = teller_pair[1]
 82 |     else:
 83 |         assert False
 84 | 
 85 |     teller.disable_dropout()
 86 |     fns = make_fns(split + split, teller_pair, drawer_pair)
 87 |     optimizer = torch.optim.Adam(teller.parameters(), lr=lr)
 88 | 
 89 |     def validate():
 90 |         for inference_method in ['greedy', 'sample']:
 91 |             teller.inference_method = inference_method
 92 |             for splits in splits_pair:
 93 |                 sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit)
 94 |                 print(splits, f'[{inference_method}]', sims.mean())
 95 | 
 96 |     validate()
 97 | 
 98 |     teller.inference_method = 'sample'
 99 |     for batch_num in range(num_batches):
100 |         optimizer.zero_grad()
101 |         teller.eval()
102 |         episodes, ex = collect_episodes(
103 |             fns,
104 |             teller.datagen,
105 |             scenes=scenes,
106 |             batch_size=batch_size,
107 |             utterance_penalty=utterance_penalty,
108 |             gamma=gamma,
109 |             uninformative_penalty=uninformative_penalty,
110 |             )
111 | 
112 |         teller.train()
113 |         loss = teller.calc_rl_loss(ex)
114 |         loss.backward()
115 |         # grad_norm = nn.utils.clip_grad_norm_(teller.parameters(), float('inf'))
116 |         # XXX(nikita): clip gradients in an attempt to stabilize. Need to see if
117 |         # there's an underlying bug, though.
118 |         grad_norm = nn.utils.clip_grad_norm_(teller.parameters(), 1.5)
119 |         optimizer.step()
120 | 
121 |         mean_reward = float(ex['brw_rewards'].sum().item() / ex['b_scene_mask'].shape[0])
122 |         mean_len = np.mean([
123 |             len([event for event in episode if isinstance(event, codraw_data.TellGroup)])
124 |             for episode in episodes])
125 |         sims = np.array([episode.scene_similarity() for episode in episodes])
126 |         mean_sim = sims.mean()
127 |         std_sim = sims.std()
128 |         print(f'batch {batch_num} mean-reward {mean_reward} loss {float(loss)} grad {float(grad_norm)} mean-len {mean_len} mean-sim {mean_sim} std-sim {std_sim}')
129 | 
130 |         if batch_num % 5 == 0:
131 |             for event in episodes[-1]:
132 |                 if isinstance(event, codraw_data.TellGroup):
133 |                     print('   >', event.msg)
134 | 
135 |         if batch_num % 50 == 0:
136 |             del episodes, ex, loss # clean up memory
137 |             validate()
138 | 
139 |         if batch_num > 0 and batch_num % eval_every == 0:
140 |             teller.eval()
141 |             print("Printing representative sampled dialogs")
142 |             teller.inference_method = 'sample'
143 |             episodes, ex = collect_episodes(fns, teller.datagen, scenes=scenes[:1], batch_size=5)
144 |             for episode in episodes:
145 |                 for event in episode:
146 |                     if isinstance(event, codraw_data.TellGroup):
147 |                         print('   >', event.msg)
148 |                 print('similarity', episode.scene_similarity())
149 |                 print('-----')
150 | 
151 |             print("Evaluating on the full dev set")
152 |             for inference_method in ['greedy', 'sample']:
153 |                 teller.inference_method = inference_method
154 |                 for splits in splits_pair:
155 |                     sims = eval_fns(make_fns(splits, (teller_rl_a, teller_rl_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
156 |                     print(splits, f'[{inference_method}]', sims.mean())
157 | 
158 |             if base_name is not None:
159 |                 print("Serializing teller to disk")
160 |                 torch.save(teller.spec, Path(f'rl_models/{base_name}_{split}_{batch_num}.pt'))
161 | 
162 | # %%
163 | 
164 | # Change this to train a different teller
165 | TELLER_SPLIT = 'a'
166 | # TELLER_SPLIT = 'b'
167 | 
168 | # Reduce entropy: the uncertainty in the pre-trained model isn't ideal for
169 | # starting RL. It may be possible to adjust label smoothing in the pre-training,
170 | # but for now just reweigh the linear layer prior to the softmax
171 | SOFTMAX_RESCALE = 3.
172 | 
173 | # %%
174 | 
175 | teller_rl_a, teller_rl_b = None, None
176 | if TELLER_SPLIT == 'a':
177 |     teller_rl_a = RLSceneToSeqTeller(spec=teller_scene2seq_aux2_a.spec)
178 |     teller_rl_a.word_project.weight.data *= SOFTMAX_RESCALE
179 |     teller_rl_a.word_project.bias.data *= SOFTMAX_RESCALE
180 | else:
181 |     teller_rl_b = RLSceneToSeqTeller(spec=teller_scene2seq_aux2_b.spec)
182 |     teller_rl_b.word_project.weight.data *= SOFTMAX_RESCALE
183 |     teller_rl_b.word_project.bias.data *= SOFTMAX_RESCALE
184 | 
185 | # %%
186 | 
187 | print(f"Info: training on partition {TELLER_SPLIT}")
188 | scenes = np.asarray(codraw_data.get_scenes(TELLER_SPLIT))
189 | 
190 | train_teller(
191 |     TELLER_SPLIT,
192 |     (teller_rl_a, teller_rl_b),
193 |     scenes,
194 |     utterance_penalty=0.0,
195 |     gamma=0.995,
196 |     uninformative_penalty=0.3,
197 |     batch_size=16,
198 |     num_batches=60000,
199 |     eval_every=2000,
200 |     lr=0.00003,
201 |     limit=100,
202 |     base_name="b5_utt0_lr3_clip15",
203 |     )
204 | 


--------------------------------------------------------------------------------
/codraw_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | """
  8 | An event-based view of the CoDraw dataset
  9 | """
 10 | 
 11 | #%%
 12 | import numpy as np
 13 | 
 14 | from pathlib import Path
 15 | import json
 16 | from enum import Enum
 17 | from collections import namedtuple
 18 | import inspect
 19 | 
 20 | import abs_util_orig
 21 | import abs_render
 22 | 
 23 | #%%
 24 | 
 25 | if INTERACTIVE:
 26 |     DATASET_PATH = Path('../CoDraw/dataset/CoDraw_1_0.json')
 27 | else:
 28 |     DATASET_PATH = Path(__file__).parent / '../CoDraw/dataset/CoDraw_1_0.json'
 29 | 
 30 | assert DATASET_PATH.exists()
 31 | 
 32 | #%% clipart wrappers, with better docs than abs_util_orig.py
 33 | 
 34 | ClipartBase = namedtuple('Clipart',
 35 |         ['idx', 'subtype', 'depth', 'flip', 'x', 'y'])
 36 | 	# idx: integer [0-57]
 37 | 	# subtype: integer [0-34]
 38 | 	# depth: integer [0-2]
 39 | 	# flip: integer [0-1]
 40 | 	# x: float [1-500]
 41 | 	# y: float [1-400]
 42 | 
 43 | class Clipart(ClipartBase):
 44 |     __slots__ = ()
 45 |     NUM_IDX = 58
 46 |     NUM_SUBTYPE = 35
 47 |     NUM_DEPTH = 3
 48 |     NUM_FLIP = 2
 49 |     CANVAS_WIDTH = 500.0
 50 |     CANVAS_HEIGHT = 400.0
 51 | 
 52 |     NUM_EXPRESSION = 5
 53 |     NUM_POSE = 7
 54 |     assert NUM_SUBTYPE == (NUM_EXPRESSION * NUM_POSE)
 55 | 
 56 |     HUMAN_IDXS = (18, 19)
 57 | 
 58 |     def __new__(cls, idx, subtype, depth, flip, x=None, y=None, normed_x=None, normed_y=None):
 59 |         if normed_x is not None:
 60 |             if x is not None:
 61 |                 raise ValueError("The arguments x and normed_x are mutually exclusive")
 62 |             x = normed_x * cls.CANVAS_WIDTH
 63 |         elif x is None:
 64 |             raise ValueError("Either x or normed_x is required")
 65 |         if normed_y is not None:
 66 |             if y is not None:
 67 |                 raise ValueError("The arguments y and normed_y are mutually exclusive")
 68 |             y = normed_y * cls.CANVAS_HEIGHT
 69 |         elif y is None:
 70 |             raise ValueError("Either y or normed_y is required")
 71 | 
 72 |         return ClipartBase.__new__(cls, idx, subtype, depth, flip, x, y)
 73 | 
 74 |     @property
 75 |     def normed_x(self):
 76 |         return self.x / self.CANVAS_WIDTH
 77 | 
 78 |     @property
 79 |     def normed_y(self):
 80 |         return self.y / self.CANVAS_HEIGHT
 81 | 
 82 |     @property
 83 |     def expression(self):
 84 |         """
 85 |         Facial expression
 86 |         """
 87 |         return self.subtype % self.NUM_EXPRESSION
 88 | 
 89 |     @property
 90 |     def pose(self):
 91 |         """
 92 |         Body pose
 93 |         """
 94 |         return self.subtype // self.NUM_EXPRESSION
 95 | 
 96 |     @property
 97 |     def human_idx(self):
 98 |         if self.idx not in self.HUMAN_IDXS:
 99 |             raise ValueError("Cannot get human_idx of non-human clipart")
100 |         return self.idx - self.HUMAN_IDXS[0]
101 | 
102 |     @property
103 |     def render_order_key(self):
104 |         """
105 |         Key that can be used to sort cliparts by the order in which they are
106 |         rendered.
107 |         """
108 |         # Sun (idx=3) is always in the back; this is also in Abs.js
109 |         # All sky objects (idx < 8) are behind any non-sky objects
110 |         # Past that, objects are sorted by depth and then by index
111 |         return (self.idx != 3, self.idx >= 8, -self.depth, self.idx)
112 | 
113 |     def _repr_svg_(self):
114 |         return abs_render.svg_from_cliparts([self])
115 | 
116 | class AbstractScene(list):
117 |     """
118 |     Abstract scene representation that only encodes objects which are present,
119 |     and never a library of available objects that are not in the scene
120 |     """
121 |     def __init__(self, string_or_iterable):
122 |         if isinstance(string_or_iterable, str):
123 |             abs = abs_util_orig.AbsUtil(string_or_iterable)
124 |             if abs.obj is None:
125 |                 super().__init__()
126 |             else:
127 |                 super().__init__(Clipart(*c) for c in abs.obj)
128 |         else:
129 |             super().__init__(string_or_iterable)
130 | 
131 |     def __repr__(self):
132 |         return "<AbstractScene " + super().__repr__() + ">"
133 | 
134 |     def __str__(self):
135 |         return super().__repr__()
136 | 
137 |     def _repr_svg_(self):
138 |         return abs_render.svg_from_cliparts(self)
139 | 
140 |     def stringify(self):
141 |         scene_str = ""
142 |         scene_str += f"{len(self)},"
143 |         for i, clipart in enumerate(self):
144 |             img_name = abs_render.get_image_name(clipart)
145 |             prefix, num = img_name[:-5].split('_')
146 |             prefix = ['s', 'p', 'hb0', 'hb1', 'a', 'c', 'e', 't'].index(prefix)
147 |             num = int(num)
148 | 
149 |             scene_str += f"{img_name},"
150 |             scene_str += f"{i},"
151 |             scene_str += f"{num},"
152 |             scene_str += f"{prefix},"
153 |             scene_str += f"{clipart.x},"
154 |             scene_str += f"{clipart.y},"
155 |             scene_str += f"{clipart.depth},"
156 |             scene_str += f"{clipart.flip},"
157 |         return scene_str
158 | 
159 | 
160 | #%% Data loading helper for a particular split
161 | 
162 | def data_for_splits(split_or_splits):
163 |     if isinstance(split_or_splits, str):
164 |         splits = [split_or_splits]
165 |     else:
166 |         splits = split_or_splits
167 | 
168 |     data_all = json.loads(DATASET_PATH.read_text())['data']
169 |     keys_train = sorted([k for k in data_all.keys() if k.startswith('train')])
170 |     keys_dev = sorted([k for k in data_all.keys() if k.startswith('val')])
171 |     keys_test = sorted([k for k in data_all.keys() if k.startswith('test')])
172 |     keys_all = sorted(data_all.keys())
173 | 
174 | 
175 |     half_train_len = len(keys_train) // 2
176 |     keys_from_split = {
177 |         'train_a': keys_train[:half_train_len],
178 |         'a': keys_train[:half_train_len],
179 |         'train_b': keys_train[half_train_len:],
180 |         'b': keys_train[half_train_len:],
181 |         'train_full': keys_train,
182 |         'dev': keys_dev,
183 |         'test': keys_test,
184 |         'all': keys_all,
185 |     }
186 | 
187 |     res = []
188 |     for split in splits:
189 |         data_split = {k: data_all[k] for k in keys_from_split[split]}
190 |         res.append(data_split)
191 | 
192 |     return res
193 | 
194 | def cached_split_wrapper(fn):
195 |     """
196 |     Modifies the function to accept a split or list of splits instead of a
197 |     a raw data dictionary for a single split, and caches results so they don't
198 |     have to be recalculated.
199 |     """
200 |     fn.split_to_results = {}
201 |     def deco(split_or_splits):
202 |         if isinstance(split_or_splits, str):
203 |             splits = [split_or_splits]
204 |         else:
205 |             splits = split_or_splits
206 | 
207 |         uncached_splits = [split for split in splits if split not in fn.split_to_results]
208 |         uncached_splits_data = data_for_splits(uncached_splits)
209 |         for split, data in zip(uncached_splits, uncached_splits_data):
210 |             result = fn(data)
211 |             if inspect.isgenerator(result):
212 |                 result = list(result)
213 |             fn.split_to_results[split] = result
214 | 
215 |         if isinstance(split_or_splits, str):
216 |             return fn.split_to_results[split_or_splits]
217 |         else:
218 |             return [fn.split_to_results[split] for split in split_or_splits]
219 |     return deco
220 | 
221 | #%% An event-based view of the CoDraw dataset
222 | 
223 | # TODO(nikita): Agent class and actor/observer are currently doing nothing.
224 | # Is there a need for them?
225 | 
226 | class Agent(Enum):
227 |     TELLER = 0
228 |     DRAWER = 1
229 | 
230 | class Event:
231 |     def __init__(self, actor=None, observer=None):
232 |         self.actor = actor
233 |         self.observer = observer
234 | 
235 | class ObserveTruth(Event):
236 |     def __init__(self, scene):
237 |         super().__init__(observer=Agent.TELLER)
238 |         self.scene = scene
239 | 
240 |     def __repr__(self):
241 |         return f"{type(self).__name__}()"
242 | 
243 | class SelectClipart(Event):
244 |     def __init__(self, clipart):
245 |         super().__init__(actor=Agent.TELLER, observer=None)
246 |         self.clipart = clipart
247 | 
248 |     def __repr__(self):
249 |         return f"{type(self).__name__}(clipart={self.clipart})"
250 | 
251 | class TellerIntention(Event):
252 |     def __init__(self, drawn=None, undrawn=None, draw_next=None):
253 |         super().__init__(actor=Agent.TELLER, observer=None)
254 |         self.drawn = drawn
255 |         self.undrawn = undrawn
256 |         self.draw_next = draw_next
257 | 
258 |     def __repr__(self):
259 |         return f"{type(self).__name__}(drawn={self.drawn}, undrawn={self.undrawn}, draw_next={self.draw_next})"
260 | 
261 | class TellGroup(Event):
262 |     # group because each word is an action
263 |     def __init__(self, msg):
264 |         super().__init__(actor=Agent.TELLER, observer=Agent.DRAWER)
265 |         self.msg = msg
266 | 
267 |     def __repr__(self):
268 |         return f"{type(self).__name__}(msg={repr(self.msg)})"
269 | 
270 | class Peek(Event):
271 |     def __init__(self):
272 |         super().__init__(actor=Agent.TELLER, observer=None)
273 | 
274 |     def __repr__(self):
275 |         return f"{type(self).__name__}()"
276 | 
277 | class TellerObserveCanvas(Event):
278 |     def __init__(self, scene):
279 |         super().__init__(observer=Agent.TELLER)
280 |         if not isinstance(scene, AbstractScene):
281 |             scene = AbstractScene(scene)
282 |         self.scene = scene
283 | 
284 |     def __repr__(self):
285 |         return f"{type(self).__name__}({self.scene})"
286 | 
287 | class ObserveCanvas(Event):
288 |     def __init__(self, scene):
289 |         super().__init__(observer=Agent.DRAWER)
290 |         if not isinstance(scene, AbstractScene):
291 |             scene = AbstractScene(scene)
292 |         self.scene = scene
293 | 
294 |     def __repr__(self):
295 |         return f"{type(self).__name__}({self.scene})"
296 | 
297 | class DrawClipart(Event):
298 |     # Draws or moves a clipart
299 |     # Since multiple copies of the same clipart are not allowed, duplicate draw
300 |     # events with the same id will result in the removal of the older instance
301 |     # of the clipart to make way for the new one.
302 |     def __init__(self, clipart):
303 |         super().__init__(actor=Agent.DRAWER, observer=None)
304 |         self.clipart = clipart
305 | 
306 |     def __repr__(self):
307 |         return f"{type(self).__name__}(clipart={self.clipart})"
308 | 
309 | class DrawGroup(Event):
310 |     # Draws or moves multiple (or no) cliparts at the same time
311 |     # Since multiple copies of the same clipart are not allowed, duplicate draw
312 |     # events with the same id will result in the removal of the older instance
313 |     # of the clipart to make way for the new one.
314 |     def __init__(self, cliparts):
315 |         super().__init__(actor=Agent.DRAWER, observer=None)
316 |         self.cliparts = cliparts
317 | 
318 |     def __repr__(self):
319 |         return f"{type(self).__name__}(cliparts={self.cliparts})"
320 | 
321 | class SetDrawing(Event):
322 |     # Updates the drawer canvas to exactly match the scene argumentt
323 |     # This was added for transcripts of humans performing the task because
324 |     # neither DrawClipart nor DrawGroup have support for removing clipart.
325 |     def __init__(self, scene):
326 |         super().__init__(actor=Agent.DRAWER, observer=None)
327 |         self.scene = scene
328 | 
329 |     def __repr__(self):
330 |         return f"{type(self).__name__}({self.scene})"
331 | 
332 | class ReplyGroup(Event):
333 |     # group because each word is an action
334 |     def __init__(self, msg):
335 |         super().__init__(actor=Agent.DRAWER, observer=Agent.TELLER)
336 |         self.msg = msg
337 | 
338 |     def __repr__(self):
339 |         return f"{type(self).__name__}(msg={repr(self.msg)})"
340 | 
341 | #%%
342 | 
343 | def events_from_datum_place_one(datum):
344 |     # TODO(nikita): this filtering keeps just over 25% of conversational rounds
345 |     # What do I need to do to match the 37.6% number in the arxiv paper?
346 |     # perhaps I should include the cases where a clipart is updated? But that
347 |     # only seems to bring me up to around 31%
348 |     buffer = []
349 |     buffer.append(ObserveTruth(AbstractScene(datum['abs_t'])))
350 | 
351 |     for entry in datum['dialog']:
352 |         abs_b = AbstractScene(entry['abs_b'])
353 |         abs_d = AbstractScene(entry['abs_d'])
354 | 
355 |         strictly_additive = len(set(abs_b) - set(abs_d)) == 0
356 |         added_cliparts = set(abs_d) - set(abs_b)
357 |         if strictly_additive and len(added_cliparts) == 1 and entry['msg_t']:
358 |             added_clipart = list(added_cliparts)[0]
359 |             buffer.append(SelectClipart(added_clipart))
360 |             buffer.append(TellGroup(entry['msg_t']))
361 |             buffer.append(DrawClipart(added_clipart))
362 |             buffer.append(ReplyGroup(entry['msg_d']))
363 | 
364 |     if isinstance(buffer[-1], ObserveTruth):
365 |         return []
366 |     return buffer
367 | 
368 | @cached_split_wrapper
369 | def get_place_one(data):
370 |     for datum in data.values():
371 |         yield from events_from_datum_place_one(datum)
372 | 
373 | #%%
374 | 
375 | def events_from_datum_place_many(datum):
376 |     buffer = []
377 |     buffer.append(ObserveTruth(AbstractScene(datum['abs_t'])))
378 | 
379 |     for entry in datum['dialog']:
380 |         abs_b = AbstractScene(entry['abs_b'])
381 |         abs_d = AbstractScene(entry['abs_d'])
382 | 
383 |         added_cliparts = set(abs_d) - set(abs_b)
384 |         added_cliparts = sorted(added_cliparts, key=lambda c: c.render_order_key)
385 | 
386 |         buffer.append(TellGroup(entry['msg_t']))
387 |         buffer.append(DrawGroup(added_cliparts))
388 |         buffer.append(ReplyGroup(entry['msg_d']))
389 | 
390 |     if isinstance(buffer[-1], ObserveTruth):
391 |         return []
392 |     return buffer
393 | 
394 | @cached_split_wrapper
395 | def get_place_many(data):
396 |     for datum in data.values():
397 |         yield from events_from_datum_place_many(datum)
398 | 
399 | #%%
400 | 
401 | def events_from_datum_contextual_place_many(datum):
402 |     buffer = []
403 |     buffer.append(ObserveTruth(AbstractScene(datum['abs_t'])))
404 | 
405 |     for entry in datum['dialog']:
406 |         abs_b = AbstractScene(entry['abs_b'])
407 |         abs_d = AbstractScene(entry['abs_d'])
408 | 
409 |         added_cliparts = set(abs_d) - set(abs_b)
410 |         added_cliparts = sorted(added_cliparts, key=lambda c: c.render_order_key)
411 | 
412 |         buffer.append(TellGroup(entry['msg_t']))
413 |         buffer.append(ObserveCanvas(abs_b))
414 |         buffer.append(DrawGroup(added_cliparts))
415 |         buffer.append(ReplyGroup(entry['msg_d']))
416 | 
417 |     if isinstance(buffer[-1], ObserveTruth):
418 |         return []
419 |     return buffer
420 | 
421 | @cached_split_wrapper
422 | def get_contextual_place_many(data):
423 |     for datum in data.values():
424 |         yield from events_from_datum_contextual_place_many(datum)
425 | 
426 | # %%
427 | 
428 | def events_from_datum_set_clipart(datum):
429 |     buffer = []
430 |     buffer.append(ObserveTruth(AbstractScene(datum['abs_t'])))
431 | 
432 |     for entry in datum['dialog']:
433 |         abs_b = AbstractScene(entry['abs_b'])
434 |         abs_d = AbstractScene(entry['abs_d'])
435 | 
436 |         buffer.append(TellGroup(entry['msg_t']))
437 |         buffer.append(ObserveCanvas(abs_b))
438 |         buffer.append(SetDrawing(abs_d))
439 |         buffer.append(ReplyGroup(entry['msg_d']))
440 | 
441 |     if isinstance(buffer[-1], ObserveTruth):
442 |         return []
443 |     return buffer
444 | 
445 | @cached_split_wrapper
446 | def get_set_clipart(data):
447 |     for datum in data.values():
448 |         yield from events_from_datum_set_clipart(datum)
449 | 
450 | # %%
451 | 
452 | def events_from_datum_set_clipart_pre_peek(datum):
453 |     buffer = []
454 |     buffer.append(ObserveTruth(AbstractScene(datum['abs_t'])))
455 | 
456 |     for entry in datum['dialog']:
457 |         if entry.get('peeked', False):
458 |             # Note that Peek happens before TellGroup
459 |             break
460 | 
461 |         abs_b = AbstractScene(entry['abs_b'])
462 |         abs_d = AbstractScene(entry['abs_d'])
463 | 
464 |         buffer.append(TellGroup(entry['msg_t']))
465 |         buffer.append(ObserveCanvas(abs_b))
466 |         buffer.append(SetDrawing(abs_d))
467 |         buffer.append(ReplyGroup(entry['msg_d']))
468 | 
469 |     if isinstance(buffer[-1], ObserveTruth):
470 |         return []
471 |     return buffer
472 | 
473 | @cached_split_wrapper
474 | def get_set_clipart_pre_peek(data):
475 |     for datum in data.values():
476 |         yield from events_from_datum_set_clipart_pre_peek(datum)
477 | 
478 | # %%
479 | 
480 | @cached_split_wrapper
481 | def get_scenes(data):
482 |     for datum in data.values():
483 |         yield AbstractScene(datum['abs_t'])
484 | 
485 | # %%
486 | 
487 | @cached_split_wrapper
488 | def get_scenes_and_scripts(data):
489 |     for datum in data.values():
490 |         scene = AbstractScene(datum['abs_t'])
491 |         script = []
492 |         for entry in datum['dialog']:
493 |             if entry.get('peeked', False):
494 |                 script.append(Peek())
495 |                 script.append(TellerObserveCanvas(AbstractScene(entry['abs_b'])))
496 |             if entry['msg_t']:
497 |                 script.append(TellGroup(entry['msg_t']))
498 |         yield (scene, script)
499 | 
500 | # %%
501 | 
502 | @cached_split_wrapper
503 | def get_scenes_and_scripts_with_peek(data):
504 |     for datum in data.values():
505 |         scene = AbstractScene(datum['abs_t'])
506 |         script = []
507 |         have_peeked = False
508 |         for entry in datum['dialog']:
509 |             if entry.get('peeked', False):
510 |                 script.append(Peek())
511 |                 script.append(TellerObserveCanvas(AbstractScene(entry['abs_b'])))
512 |                 have_peeked = True
513 |             if entry['msg_t']:
514 |                 script.append(TellGroup(entry['msg_t']))
515 | 
516 |         # Exclude events with no Peek action, or no messages sent afterwards
517 |         if have_peeked and not isinstance(script[-1], TellerObserveCanvas):
518 |             yield (scene, script)
519 | 
520 | # %%
521 | 
522 | @cached_split_wrapper
523 | def get_truth_and_human_scenes(data):
524 |     for datum in data.values():
525 |         scene = AbstractScene(datum['abs_t'])
526 |         scene_after = None
527 |         for entry in datum['dialog']:
528 |             scene_after = entry['abs_d']
529 |         assert scene_after is not None
530 |         scene_after = AbstractScene(scene_after)
531 |         yield (scene, scene_after)
532 | 
533 | @cached_split_wrapper
534 | def get_truth_and_human_scenes_pre_peek(data):
535 |     for datum in data.values():
536 |         scene = AbstractScene(datum['abs_t'])
537 |         scene_after = None
538 |         for entry in datum['dialog']:
539 |             if entry.get('peeked', False):
540 |                 break
541 |             scene_after = entry['abs_d']
542 |         assert scene_after is not None
543 |         scene_after = AbstractScene(scene_after)
544 |         yield (scene, scene_after)
545 | 
546 | @cached_split_wrapper
547 | def get_truth_and_human_scenes_with_js_scores(data):
548 |     for datum in data.values():
549 |         scene = AbstractScene(datum['abs_t'])
550 |         scene_after = None
551 |         score_after = None
552 |         for entry in datum['dialog']:
553 |             if entry.get('score', None) is not None:
554 |                 score_after = entry['score']
555 |                 scene_after = entry['abs_d']
556 |         assert scene_after is not None
557 |         assert score_after is not None
558 |         scene_after = AbstractScene(scene_after)
559 |         yield (scene, scene_after, score_after)
560 | 


--------------------------------------------------------------------------------
/episode.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | try:
  8 |     from IPython.display import display
  9 | except ImportError:
 10 |     assert not INTERACTIVE
 11 |     def display(*args, **kwargs):
 12 |         pass
 13 | 
 14 | import functools
 15 | from pathlib import Path
 16 | import datetime
 17 | 
 18 | import abs_render
 19 | import codraw_data
 20 | from abs_metric import scene_similarity
 21 | 
 22 | class Episode(list):
 23 |     def get_last(self, event_type):
 24 |         for event in reversed(self):
 25 |             if isinstance(event, event_type):
 26 |                 return event
 27 |         return None
 28 | 
 29 |     def reconstruct(self):
 30 |         reconstructed_scene = []
 31 |         for event in self:
 32 |             if isinstance(event, codraw_data.DrawClipart):
 33 |                 reconstructed_scene = [c for c in reconstructed_scene if c.idx != event.clipart.idx]
 34 |                 reconstructed_scene.append(event.clipart)
 35 |             elif isinstance(event, codraw_data.DrawGroup):
 36 |                 reconstructed_scene = [c for c in reconstructed_scene if c.idx not in [c2.idx for c2 in event.cliparts]]
 37 |                 reconstructed_scene.extend(event.cliparts)
 38 |         return codraw_data.AbstractScene(reconstructed_scene)
 39 | 
 40 |     def display(self):
 41 |         scene = None
 42 |         for event in self:
 43 |             if isinstance(event, codraw_data.ObserveTruth):
 44 |                 assert scene is None, "Multiple ObserveTruth events not allowed in an episode"
 45 |                 scene = event.scene
 46 |             elif isinstance(event, codraw_data.SelectClipart):
 47 |                 display(event.clipart)
 48 |             elif isinstance(event, codraw_data.DrawClipart):
 49 |                 abs_render.display_cliparts([event.clipart], color='red', scale=0.75)
 50 |             elif isinstance(event, codraw_data.DrawGroup):
 51 |                 abs_render.display_cliparts(event.cliparts, color='red', scale=0.75)
 52 |             elif isinstance(event, codraw_data.TellGroup):
 53 |                 print("TELLER:", event.msg)
 54 |             elif isinstance(event, codraw_data.ReplyGroup):
 55 |                 print("DRAWER:", event.msg)
 56 |             elif isinstance(event, codraw_data.TellerIntention):
 57 |                 if event.drawn is not None:
 58 |                     abs_render.display_cliparts(event.drawn, color='purple', label='drawn', scale=0.33)
 59 |                 if event.draw_next is not None:
 60 |                     abs_render.display_cliparts(event.draw_next, color='yellow', label='draw next', scale=0.33)
 61 |                 if event.undrawn is not None:
 62 |                     abs_render.display_cliparts(event.undrawn, color='cyan', label='undrawn', scale=0.33)
 63 |         print('===')
 64 |         reconstructed_scene = self.reconstruct()
 65 |         abs_render.display_cliparts(scene, label='ground truth', scale=0.75)
 66 |         abs_render.display_cliparts(reconstructed_scene, color='red', label='reconstructed', scale=0.75)
 67 |         print('Similarity =', scene_similarity(reconstructed_scene, scene))
 68 | 
 69 |     def to_html(self):
 70 |         res = ""
 71 |         scene = None
 72 |         delayed_selected_clipart = ""
 73 |         for event in self:
 74 |             if isinstance(event, codraw_data.ObserveTruth):
 75 |                 assert scene is None, "Multiple ObserveTruth events not allowed in an episode"
 76 |                 scene = event.scene
 77 |             elif isinstance(event, codraw_data.SelectClipart):
 78 |                 delayed_selected_clipart += abs_render.svg_from_cliparts([event.clipart], inline_images=False)
 79 |             elif isinstance(event, codraw_data.DrawClipart):
 80 |                 res += delayed_selected_clipart
 81 |                 delayed_selected_clipart = ""
 82 |                 res += abs_render.svg_from_cliparts([event.clipart], color='red', inline_images=False)
 83 |             elif isinstance(event, codraw_data.DrawGroup):
 84 |                 res += delayed_selected_clipart
 85 |                 delayed_selected_clipart = ""
 86 |                 res += abs_render.svg_from_cliparts(event.cliparts, color='red', inline_images=False)
 87 |             elif isinstance(event, codraw_data.TellGroup):
 88 |                 res += f"<p>TELLER: {event.msg}</p>"
 89 |             elif isinstance(event, codraw_data.ReplyGroup):
 90 |                 res += f"<p>DRAWER: {event.msg}</p>"
 91 |             elif isinstance(event, codraw_data.TellerIntention):
 92 |                 if event.drawn is not None:
 93 |                     res += abs_render.svg_from_cliparts(event.drawn, color='purple', label='drawn', scale=0.33)
 94 |                 if event.draw_next is not None:
 95 |                     res += abs_render.svg_from_cliparts(event.draw_next, color='yellow', label='draw next', scale=0.33)
 96 |                 if event.undrawn is not None:
 97 |                     res += abs_render.svg_from_cliparts(event.undrawn, color='cyan', label='undrawn', scale=0.33)
 98 | 
 99 |         res += f"<p>===</p>"
100 |         reconstructed_scene = self.reconstruct()
101 |         res += abs_render.svg_from_cliparts(scene, label='ground truth', inline_images=False)
102 |         res += abs_render.svg_from_cliparts(reconstructed_scene, color='red', label='reconstructed', inline_images=False)
103 |         res += f"<p>Similarity = {scene_similarity(reconstructed_scene, scene)}</p>"
104 |         return res
105 | 
106 |     def write_html(self, name_or_path):
107 |         if isinstance(name_or_path, Path):
108 |             path = name_or_path
109 |         else:
110 |             path = Path(f"./renders/{name_or_path}.html").resolve()
111 |         assert not path.exists(), "File already exists!"
112 |         assert path.parent.exists(), "Parent directory does not exist"
113 |         path.write_text(self.to_html())
114 | 
115 |     def get_true_scene(self):
116 |         scene = None
117 |         for event in self:
118 |             if isinstance(event, codraw_data.ObserveTruth):
119 |                 assert scene is None, "Multiple ObserveTruth events not allowed in an episode"
120 |                 scene = event.scene
121 |         assert scene is not None, "Episode has no ObserveTruth events"
122 |         return scene
123 | 
124 |     def scene_similarity(self):
125 |         return scene_similarity(self.reconstruct(), self.get_true_scene())
126 | 
127 |     @classmethod
128 |     def run(cls, scene, fns):
129 |         episode = cls([codraw_data.ObserveTruth(scene)])
130 |         while True:
131 |             for fn in fns:
132 |                 if type(episode[-1]) in fn._trigger_types:
133 |                     old_len = len(episode)
134 |                     fn(episode)
135 |                     if len(episode) == old_len:
136 |                         return episode
137 |                     break
138 |             else:
139 |                 assert False, f"No response for event: {type(episode[-1]).__name__}"
140 | 
141 |     @classmethod
142 |     def run_script(cls, scene_and_script, fns):
143 |         scene, script = scene_and_script
144 |         episode = cls([codraw_data.ObserveTruth(scene)])
145 |         episode.script = script
146 |         episode.script_index = 0
147 | 
148 |         while True:
149 |             for fn in fns:
150 |                 if type(episode[-1]) in fn._trigger_types:
151 |                     old_len = len(episode)
152 |                     fn(episode)
153 |                     if len(episode) == old_len:
154 |                         return episode
155 |                     break
156 |             else:
157 |                 assert False, f"No response for event: {type(episode[-1]).__name__}"
158 | 
159 | 
160 | def respond_to(*event_types):
161 |     types = set([(x if issubclass(x, codraw_data.Event) else None) for x in event_types])
162 |     assert None not in types, "Invalid event type in decorator"
163 | 
164 |     def deco(fn):
165 |         if hasattr(fn, '_trigger_types'):
166 |             fn._trigger_types |= types
167 |         else:
168 |             fn._trigger_types = types
169 |         return fn
170 |     return deco
171 | 
172 | def response_partial(fn, *args, **kwargs):
173 |     res = functools.partial(fn, *args, **kwargs)
174 |     res._trigger_types = fn._trigger_types
175 |     return res
176 | 
177 | class Transcriber:
178 |     def __init__(self, filename, scenes=None, scenes_description="", scenes_and_scripts=None):
179 |         self.filename = filename
180 |         if scenes is not None:
181 |             self.scene_data = scenes
182 |             self.use_script = False
183 |         else:
184 |             self.scene_data = scenes_and_scripts
185 |             self.use_script = True
186 | 
187 |         self.scenes_description = scenes_description
188 | 
189 |     def __call__(self, name_or_path, description="", **partition_to_fns):
190 |         if isinstance(name_or_path, Path):
191 |             path = name_or_path
192 |         else:
193 |             path = Path(f"./renders/{name_or_path}.html").resolve()
194 |         assert not path.exists(), "File already exists!"
195 |         assert path.parent.exists(), "Parent directory does not exist"
196 | 
197 |         assert isinstance(description, str)
198 | 
199 |         res = ""
200 |         res += f"<p>Filename: {self.filename}</p>"
201 |         res += f"<p>Scenes: {self.scenes_description}</p>"
202 |         res += f"<p>Started: {datetime.datetime.now()}</p>"
203 |         res += f"<p>Description: {description}</p>"
204 |         for partition, fns in partition_to_fns.items():
205 |             res += f"<p></p>"
206 |             res += f"<h2>Partition {partition}</h2>"
207 |             for i, scene_datum in enumerate(self.scene_data):
208 |                 res += f'<h3 id="{partition}_{i}">Scene {i} <a href="#{partition}_{i}">[here]</a></h3>'
209 |                 if not self.use_script:
210 |                     res += Episode.run(scene_datum, fns).to_html()
211 |                 else:
212 |                     res += Episode.run_script(scene_datum, fns).to_html()
213 |         path.write_text(res)
214 | 


--------------------------------------------------------------------------------
/eval_automatic.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | 
 12 | import numpy as np
 13 | from pathlib import Path
 14 | import editdistance
 15 | 
 16 | import torch
 17 | import torch.cuda
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | from nkfb_util import logsumexp, cuda_if_available
 22 | 
 23 | import codraw_data
 24 | from codraw_data import AbstractScene, Clipart
 25 | import abs_render
 26 | from abs_metric import scene_similarity, clipart_similarity
 27 | from episode import Episode, Transcriber, respond_to, response_partial
 28 | 
 29 | import model
 30 | from model import make_fns, eval_fns
 31 | 
 32 | from saved_models import load_models, make_pairs
 33 | 
 34 | # %%
 35 | 
 36 | def print_human(limit=None, split='dev'):
 37 |     human_sims = np.array([
 38 |         scene_similarity(human_scene, true_scene)
 39 |         for true_scene, human_scene in codraw_data.get_truth_and_human_scenes('test')[:limit]
 40 |         ])
 41 | 
 42 |     print(f"Human scene similarity [{split}]: mean={human_sims.mean():.2f} std={human_sims.std():.2f} median={np.median(human_sims):.2f}")
 43 | 
 44 | # %%
 45 | 
 46 | def print_pairwise(tellers, drawers, teller_splits='ab', drawer_splits='ab', limit=None, split='dev'):
 47 |     print(f"Teller           \t Drawer           \t Scene similarity [{split}]")
 48 |     for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
 49 |         for teller_name, teller_pair in tellers:
 50 |             for drawer_name, drawer_pair in drawers:
 51 |                 for splits in splits_group:
 52 |                     if splits[0] not in teller_splits or splits[1] not in drawer_splits:
 53 |                         continue
 54 |                     sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit, split=split)
 55 |                     teller_caption = f"{teller_name}_{splits[0]}"
 56 |                     drawer_caption = f"{drawer_name}_{splits[1]}"
 57 |                     print(f"{teller_caption:17s}\t {drawer_caption:17s}\t {sims.mean():.2f}")
 58 |         print()
 59 | 
 60 | # %%
 61 | 
 62 | def print_script(drawers, drawer_splits='ab', limit=None, split='dev'):
 63 |     print("Drawer evaluations against script")
 64 |     print(f"Drawer           \t Scene similarity [{split}]")
 65 |     for drawer_name, drawer_pair in drawers:
 66 |         for drawer_split in drawer_splits:
 67 |             sims = eval_fns(make_fns(drawer_split, model.scripted_tell, drawer_pair), limit=limit, split=split)
 68 |             drawer_caption = f"{drawer_name}_{drawer_split}"
 69 |             print(f"{drawer_caption:17s}\t {sims.mean():.2f}")
 70 | 
 71 | # %%
 72 | 
 73 | component_evaluator = model.ComponentEvaluator.get()
 74 | 
 75 | # %%
 76 | 
 77 | def print_components_pairwise(tellers, drawers, teller_splits='ab', drawer_splits='ab', limit=None, split='dev'):
 78 |     print(f"Component evaluations [{split}]")
 79 |     print("Teller           \t Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
 80 |     for splits_group in [('ab', 'ba'), ('aa', 'bb')]:
 81 |         for teller_name, teller_pair in tellers:
 82 |             for drawer_name, drawer_pair in drawers:
 83 |                 for splits in splits_group:
 84 |                     if splits[0] not in teller_splits or splits[1] not in drawer_splits:
 85 |                         continue
 86 |                     components = component_evaluator.eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit, split=split)
 87 |                     teller_caption = f"{teller_name}_{splits[0]}"
 88 |                     drawer_caption = f"{drawer_name}_{splits[1]}"
 89 |                     print(f"{teller_caption:17s}\t {drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
 90 |         print()
 91 | 
 92 | def print_components_script(drawers, drawer_splits='ab', limit=None, split='dev'):
 93 |     print(f"Drawer evaluations against script [{split}]")
 94 |     print("Drawer           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
 95 |     for drawer_name, drawer_pair in drawers:
 96 |         for drawer_split in drawer_splits:
 97 |             components = component_evaluator.eval_fns(make_fns(drawer_split, model.scripted_tell, drawer_pair), limit=limit, split=split)
 98 |             drawer_caption = f"{drawer_name}_{drawer_split}"
 99 |             print(f"{drawer_caption:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
100 | 
101 | # %%
102 | 
103 | def print_eval(
104 |     tellers=None, drawers=None,
105 |     teller_splits='ab', drawer_splits='ab',
106 |     limit=None,
107 |     split='dev',
108 |     do_all=False,
109 |     do_human=False,
110 |     do_pairwise=False,
111 |     do_script=False,
112 |     do_components_pairwise=False,
113 |     do_components_script=False,
114 | ):
115 |     if do_all:
116 |         do_human = True
117 |         do_pairwise = True
118 |         do_script = True
119 |         do_components_pairwise = True
120 |         do_components_script = True
121 | 
122 |     print()
123 | 
124 |     if do_human:
125 |         print_human(limit=limit, split=split)
126 |         print()
127 |         print()
128 | 
129 |     if do_pairwise:
130 |         print_pairwise(tellers, drawers, teller_splits=teller_splits, drawer_splits=drawer_splits, limit=limit, split=split)
131 |         print()
132 |         print()
133 | 
134 |     if do_script:
135 |         print_script(drawers, drawer_splits=drawer_splits, limit=limit, split=split)
136 |         print()
137 |         print()
138 | 
139 |     if do_components_pairwise:
140 |         print_components_pairwise(tellers, drawers, teller_splits=teller_splits, drawer_splits=drawer_splits, limit=limit, split=split)
141 |         print()
142 |         print()
143 | 
144 |     if do_components_script:
145 |         print_components_script(drawers, drawer_splits=drawer_splits, limit=limit, split=split)
146 |         print()
147 |         print()
148 | 
149 | # %%
150 | 
151 | if __name__ == '__main__':
152 |     models = load_models()
153 | 
154 | # %%
155 | if __name__ == '__main__':
156 |     tellers = make_pairs(models,
157 |         'teller_nn',
158 |         # 'teller_pragmaticnn',
159 |         'teller_scene2seq',
160 |         'teller_scene2seq_aux2',
161 |         'teller_rl',
162 |     )
163 | 
164 |     drawers_for_script = make_pairs(models,
165 |         'drawer_nn',
166 |         # 'drawer_bowcanvas2bce',
167 |         'drawer_lstmaddonly',
168 |     )
169 | 
170 |     drawers_for_pairwise = make_pairs(models,
171 |         'drawer_lstmaddonly',
172 |     )
173 | 
174 |     limit=None
175 |     split='test'
176 | 
177 |     print_eval(limit=limit, split=split, do_human=True)
178 |     print_eval(tellers, drawers_for_pairwise, teller_splits='a', drawer_splits='b', limit=limit, split=split, do_pairwise=True)
179 |     print_eval(tellers, drawers_for_script, teller_splits='a', drawer_splits='b', limit=limit, split=split, do_script=True)
180 | 
181 | # %%
182 | # %%
183 | # %%
184 | # %%
185 | # %%
186 | # %%
187 | 


--------------------------------------------------------------------------------
/eval_run_bots.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | #%%
  8 | 
  9 | import json
 10 | import time
 11 | 
 12 | try:
 13 |     from eval_server_common import connect_to_redis
 14 | except ImportError:
 15 |     print("HINT: copy example.eval_server_common.py to eval_server_common.py")
 16 |     raise
 17 | import codraw_data
 18 | import episode
 19 | 
 20 | #%%
 21 | 
 22 | FAREWELL_MSG = "that's it, thanks!"
 23 | 
 24 | class Bot():
 25 |     model_name = "model_generic"
 26 |     agent_type = None
 27 |     fns = None
 28 | 
 29 |     # TODO(nikita): peek action for bot drawers is not supported
 30 |     def __init__(self, id):
 31 |         self.id = id
 32 |         self.episode = episode.Episode()
 33 |         self.role = "question" if self.agent_type == codraw_data.Agent.DRAWER else "answer"
 34 |         self.handlers = {
 35 |             'paired': self.on_paired,
 36 |             'receive message': self.on_receive_message,
 37 |             'server error': self.on_server_error, #TODO(nikita): Not emitted after I modified the server code
 38 |             'disconnected partner': self.on_disconnected_partner,
 39 |         }
 40 |         self.disconnected = False
 41 | 
 42 |         self.num_messages_sent = 0
 43 | 
 44 |     def disconnect(self):
 45 |         if self.id in type(self).active_bots:
 46 |             assert type(self).active_bots[self.id] == self
 47 |             del type(self).active_bots[self.id]
 48 | 
 49 |         if not self.disconnected:
 50 |             self.disconnected = True
 51 |             self.emit('disconnect')
 52 | 
 53 |     def emit(self, event, **msg):
 54 |         obj = {
 55 |             'botId': self.id,
 56 |             'event': event,
 57 |             'msg': msg,
 58 |         }
 59 |         self.redis.publish('visdial_server', json.dumps(obj))
 60 | 
 61 |     def send_msg(self, msg):
 62 |         self.num_messages_sent += 1
 63 |         self.emit('chat message', msg=msg, role=self.role, seqId=self.num_messages_sent)
 64 |         print("Sent chat message:", msg)
 65 | 
 66 |     def send_scene_log(self, scene):
 67 |         self.emit('scene log', scene=scene.stringify(), role=self.role, seqId=self.num_messages_sent)
 68 | 
 69 |     # TODO(nikita): implement drawer bots, including "send_scene_log" which is sent by drawer
 70 |     # socket.emit('scene log', {scene: Abs.resultAMT(), hitId: hitId, assignmentId: assignmentId, workerId: workerId, role: workerRole, seqId: noOfMsg});
 71 | 
 72 |     def run_model_actions(self, must_trigger=True):
 73 |         old_len = len(self.episode)
 74 |         terminated = self._run_model_actions()
 75 |         if terminated:
 76 |             print("No action taking. Disconnecting")
 77 |             if INTERACTIVE:
 78 |                 display(self.episode.get_true_scene())
 79 |             self.send_msg(FAREWELL_MSG)
 80 |             self.disconnect()
 81 |             return
 82 | 
 83 |         if must_trigger:
 84 |             if len(self.episode) == old_len:
 85 |                 self.disconnect()
 86 |                 assert False, f"No response for event: {type(self.episode[-1]).__name__}"
 87 | 
 88 |         msg_to_send = None
 89 |         do_send_scene_log = False
 90 |         for event in self.episode[old_len:]:
 91 |             # TODO(nikita): log latent actions, such as SelectClipart
 92 |             if isinstance(event, codraw_data.TellGroup):
 93 |                 assert msg_to_send is None, "Multiple TellGroup events added in a single round!"
 94 |                 msg_to_send = event.msg
 95 |             elif isinstance(event, codraw_data.ReplyGroup):
 96 |                 assert msg_to_send is None, "Multiple ReplyGroup events added in a single round!"
 97 |                 msg_to_send = event.msg
 98 |             elif isinstance(event, (codraw_data.DrawClipart, codraw_data.DrawGroup)):
 99 |                 do_send_scene_log = True
100 | 
101 |         if do_send_scene_log:
102 |             assert self.agent_type == codraw_data.Agent.DRAWER
103 |             self.send_scene_log(self.episode.reconstruct())
104 | 
105 |         if self.agent_type == codraw_data.Agent.TELLER:
106 |             assert msg_to_send is not None, "No message to send"
107 |             # Empty message is a signal for the drawer to begin the conversation
108 |             if msg_to_send == "" and len([x for x in self.episode if isinstance(x, codraw_data.TellGroup)]) == 1:
109 |                 msg_to_send = None
110 |                 print("Model expects the human drawer to start the conversation.")
111 |         else:
112 |             assert msg_to_send is not None or isinstance(self.episode[-1], codraw_data.ObserveTruth), "No message to send, and not the start"
113 | 
114 |         if msg_to_send is not None:
115 |             self.send_msg(msg_to_send)
116 | 
117 |     def _run_model_actions(self):
118 |         while True:
119 |             for fn in self.fns:
120 |                 if type(self.episode[-1]) in fn._trigger_types:
121 |                     old_len = len(self.episode)
122 |                     fn(self.episode)
123 |                     if len(self.episode) == old_len:
124 |                         return True # terminated
125 |                     break
126 |             else:
127 |                 # print('no trigger for', type(self.episode[-1]))
128 |                 return False
129 | 
130 |     def on_paired(self, partnerId=None, key=None, image_url=None, role=None, caption=None):
131 |         if self.disconnected:
132 |             print("[ERROR] Disconnected bot was paired!")
133 |             return
134 |         print("Paired wih human partner!")
135 |         print("image_url:", image_url)
136 |         print("partner role:", role) # Yes, the role sent in the message is for the partner
137 |         assigned_role = "question" if role == "answer" else "answer"
138 |         assert assigned_role == self.role, "Wrong role assigned to bot!"
139 | 
140 |         true_scene = codraw_data.AbstractScene(image_url)
141 |         self.episode.append(codraw_data.ObserveTruth(true_scene))
142 |         self.run_model_actions(must_trigger=False)
143 | 
144 |     def on_receive_message(self, message=None, noOfMsg=None):
145 |         if self.disconnected:
146 |             print("[ERROR] Disconnected bot received a message!")
147 |             return
148 |         print(f"Got human message {noOfMsg}: {message}")
149 |         assert message is not None
150 | 
151 |         if self.agent_type == codraw_data.Agent.TELLER:
152 |             self.episode.append(codraw_data.ReplyGroup(message))
153 |         else:
154 |             self.episode.append(codraw_data.TellGroup(message))
155 |         self.run_model_actions()
156 | 
157 |     def on_disconnected_partner(self, disable='_unused'):
158 |         print("Partner disconnected from bot! Cleanining up the bot")
159 |         self.disconnect()
160 | 
161 |     def on_server_error(self, errorMsg='[no errorMsg specified]'):
162 |         print("Error from server:", errorMsg)
163 |         self.disconnect()
164 | 
165 | # %%
166 | 
167 | 
168 | def run_loop(classes):
169 |     active_bots = {}
170 |     channel_to_cls = {}
171 | 
172 |     for cls in classes:
173 |         assert cls.agent_type in (codraw_data.Agent.TELLER, codraw_data.Agent.DRAWER), "Invalid agent_type for bot!"
174 | 
175 |         channel = f'visdial_models.{cls.model_name}'.encode('utf-8')
176 |         assert channel not in channel_to_cls, f"Duplicate model name {cls.model_name}"
177 |         channel_to_cls[channel] = cls
178 | 
179 |         if not hasattr(cls, 'redis'):
180 |             cls.redis = connect_to_redis()
181 | 
182 |         if not hasattr(cls, 'active_bots'):
183 |             cls.active_bots = active_bots
184 | 
185 |     p = cls.redis.pubsub()
186 | 
187 |     for channel in channel_to_cls:
188 |         p.subscribe(channel)
189 | 
190 |     for redis_msg in p.listen():
191 |         print("Got redis msg", redis_msg)
192 |         if redis_msg['type'] != 'message':
193 |             continue
194 | 
195 |         if redis_msg['channel'] not in channel_to_cls:
196 |             print(f"WARNING: unrecognized channel {redis_msg['channel']}")
197 |             continue
198 | 
199 |         data = json.loads(redis_msg['data'])
200 | 
201 |         id = data['botId']
202 |         event = data['event']
203 |         msg = data['msg']
204 | 
205 |         if event == 'paired':
206 |             active_bots[id] = channel_to_cls[redis_msg['channel']](id)
207 | 
208 |         if id in active_bots:
209 |             handler = active_bots[id].handlers.get(event, None)
210 |             if handler is None:
211 |                 print(f"No handler for event '{event}'")
212 |             else:
213 |                 active_bots[id].handlers[event](**msg)
214 | 
215 | # %%
216 | 
217 | def make_script_teller_class():
218 |     import model
219 | 
220 |     class ScriptTellerBot(Bot):
221 |         model_name = 'teller_script'
222 |         agent_type = codraw_data.Agent.TELLER
223 |         fns = [model.scripted_tell_before_peek]
224 |         scene_to_script = {}
225 | 
226 |         def _run_model_actions(self):
227 |             if not hasattr(self.episode, 'script'):
228 |                 script = self.scene_to_script[self.episode.get_last(codraw_data.ObserveTruth).scene.stringify()]
229 |                 self.episode.script = script
230 |                 self.episode.script_index = 0
231 |             return super()._run_model_actions()
232 | 
233 |     for scene, script in codraw_data.get_scenes_and_scripts('all'):
234 |         ScriptTellerBot.scene_to_script[scene.stringify()] = script
235 | 
236 |     return ScriptTellerBot
237 | 
238 | # %%
239 | 
240 | def model_to_bot_class(model_name, model, model_agent_type=codraw_data.Agent.TELLER):
241 |     model_name_ = model_name
242 |     class TheBot(Bot):
243 |         model_name = model_name_
244 |         agent_type = model_agent_type
245 |         fns = model.get_action_fns()
246 | 
247 |     TheBot.__name__ = type(model).__name__ + 'Bot'
248 |     TheBot.__qualname__ = TheBot.__qualname__.replace('TheBot', TheBot.__name__)
249 |     return TheBot
250 | 
251 | # %%
252 | 
253 | def run_model_pairs(tellers, drawers=[], include_script_teller=True):
254 |     classes = []
255 | 
256 |     if include_script_teller:
257 |         classes.append(make_script_teller_class())
258 | 
259 |     for teller_name, (a, b) in tellers:
260 |         if a is not None:
261 |             classes.append(model_to_bot_class(teller_name + '_a', a, codraw_data.Agent.TELLER))
262 |         if b is not None:
263 |             classes.append(model_to_bot_class(teller_name + '_b', b, codraw_data.Agent.TELLER))
264 | 
265 |     for drawer_name, (a, b) in drawers:
266 |         if a is not None:
267 |             classes.append(model_to_bot_class(drawer_name + '_a', a, codraw_data.Agent.DRAWER))
268 |         if b is not None:
269 |             classes.append(model_to_bot_class(drawer_name + '_b', b, codraw_data.Agent.DRAWER))
270 | 
271 |     run_loop(classes)
272 | 
273 | #%%
274 | 
275 | if __name__ == '__main__':
276 |     from saved_models import load_models, make_pairs
277 |     models = load_models()
278 |     models['teller_scene2seq_a'].max_rounds = 20
279 |     models['teller_scene2seq_aux2_a'].max_rounds = 20
280 |     models['teller_rl_a'].max_rounds = 20
281 |     # TODO(nikita): change max_rounds for partition-b tellers, too
282 |     tellers = make_pairs(models,
283 |         'teller_nn',
284 |         'teller_pragmaticnn',
285 |         'teller_scene2seq',
286 |         'teller_scene2seq_aux2',
287 |         'teller_rl',
288 |     )
289 | 
290 |     drawers = make_pairs(models,
291 |         'drawer_nn',
292 |         'drawer_bowcanvas2bce',
293 |         'drawer_lstmaddonly',
294 |     )
295 | 
296 |     run_model_pairs(tellers, drawers)
297 | 


--------------------------------------------------------------------------------
/eval_transcripts.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from interactivity import INTERACTIVE, try_magic, try_cd
  8 | try_cd('~/dev/drawmodel/nkcodraw')
  9 | 
 10 | #%%
 11 | import json
 12 | import numpy as np
 13 | 
 14 | import codraw_data
 15 | import model
 16 | from abs_metric import scene_similarity
 17 | from pathlib import Path
 18 | 
 19 | #%%
 20 | 
 21 | TRANSCRIPTS_PATH = Path('transcripts-eval-v1.json')
 22 | TRANSCRIPTS_SPLIT = 'test'
 23 | 
 24 | #%%
 25 | 
 26 | transcripts = json.loads(TRANSCRIPTS_PATH.read_text())
 27 | 
 28 | #%%
 29 | 
 30 | def get_transcript_results(transcripts):
 31 |     data = transcripts['data']
 32 |     for datum in data.values():
 33 |         model_name = datum['model_name']
 34 |         scene = codraw_data.AbstractScene(datum['abs_t'])
 35 |         scene_after = None
 36 |         for entry in datum['dialog']:
 37 |             scene_after = entry['abs_d']
 38 |         assert scene_after is not None
 39 |         scene_after = codraw_data.AbstractScene(scene_after)
 40 |         yield (model_name, scene, scene_after)
 41 | 
 42 | #%%
 43 | 
 44 | compontent_evaluator = model.ComponentEvaluator.get()
 45 | 
 46 | #%%
 47 | 
 48 | true_to_human = {}
 49 | for true_scene, human_scene in codraw_data.get_truth_and_human_scenes(TRANSCRIPTS_SPLIT):
 50 |     true_to_human[tuple(true_scene)] = human_scene
 51 | 
 52 | # %%
 53 | 
 54 | model_to_sims = {}
 55 | model_to_numer = {}
 56 | model_to_denom = {}
 57 | true_scenes_set = set()
 58 | for model_name, true_scene, reconstructed_scene in get_transcript_results(transcripts):
 59 |     if model_name not in model_to_sims:
 60 |         model_to_sims[model_name] = []
 61 |     if model_name not in model_to_numer:
 62 |         assert model_name not in model_to_denom
 63 |         model_to_numer[model_name] = []
 64 |         model_to_denom[model_name] = []
 65 |     model_to_sims[model_name].append(scene_similarity(reconstructed_scene, true_scene))
 66 |     numer, denom = compontent_evaluator.eval_scene(reconstructed_scene, true_scene)
 67 |     model_to_numer[model_name].append(numer)
 68 |     model_to_denom[model_name].append(denom)
 69 |     true_scenes_set.add(tuple(true_scene))
 70 | 
 71 | #%%
 72 | 
 73 | print("Model           \t Scene similarity")
 74 | for model_name, sims in model_to_sims.items():
 75 |     print(f"{model_name:17s}\t {np.array(sims).mean():.2f}")
 76 | 
 77 | sims = np.array([scene_similarity(true_to_human[scene], scene) for scene in true_scenes_set])
 78 | print(f"{'human':17s}\t {np.array(sims).mean():.2f}")
 79 | 
 80 | #%%
 81 | print()
 82 | print()
 83 | #%%
 84 | 
 85 | print("Model           \t  Dir   \t Expr(human)\t Pose(human)\t Depth  \t xy (sq.)\t x-only  \t y-only")
 86 | for model_name in model_to_numer:
 87 |     numer = model_to_numer[model_name]
 88 |     denom = model_to_denom[model_name]
 89 |     components = np.array(numer).sum(0) / np.array(denom).sum(0)
 90 |     components = 1.0 - (components - compontent_evaluator.human_scores) / (compontent_evaluator.baseline_scores - compontent_evaluator.human_scores)
 91 |     print(f"{model_name:17s}\t",  "\t".join(f"{num: .6f}" for num in components))
 92 | 
 93 | human_numer_denom = [compontent_evaluator.eval_scene(true_to_human[scene], scene) for scene in true_scenes_set]
 94 | components = np.array([x[0] for x in human_numer_denom]).sum(0) / np.array([x[1] for x in human_numer_denom]).sum(0)
 95 | components = 1.0 - (components - compontent_evaluator.human_scores) / (compontent_evaluator.baseline_scores - compontent_evaluator.human_scores)
 96 | print(f"{'human':17s}\t",  "\t".join(f"{num: .6f}" for num in components))
 97 | 
 98 | 
 99 | #%%
100 | 


--------------------------------------------------------------------------------
/example.eval_server_common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import redis
 8 | 
 9 | REDIS_HOST = 'localhost'
10 | REDIS_PORT = 6379
11 | REDIS_PASSWORD = 'YOUR PASSWORD HERE'
12 | 
13 | REDIS_CONNECTION = None
14 | 
15 | def connect_to_redis():
16 |     global REDIS_CONNECTION
17 |     if REDIS_CONNECTION is None:
18 |         REDIS_CONNECTION = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=0)
19 |     return REDIS_CONNECTION
20 | 


--------------------------------------------------------------------------------
/exp28_scenenn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | """
  8 | Scene-level nearest-neighbor teller
  9 | """
 10 | 
 11 | from interactivity import INTERACTIVE, try_magic, try_cd
 12 | try_cd('~/dev/drawmodel/nkcodraw')
 13 | 
 14 | #%%
 15 | 
 16 | import numpy as np
 17 | from pathlib import Path
 18 | 
 19 | import codraw_data
 20 | from codraw_data import AbstractScene, Clipart
 21 | import abs_render
 22 | from abs_metric import scene_similarity, clipart_similarity
 23 | from episode import Episode, Transcriber, respond_to
 24 | 
 25 | import model
 26 | from model import make_fns, eval_fns
 27 | from model import Model
 28 | 
 29 | from baseline2_models import load_baseline2
 30 | 
 31 | # %%
 32 | 
 33 | scenes_and_scripts_dev = codraw_data.get_scenes_and_scripts('dev')
 34 | 
 35 | transcribe = Transcriber(
 36 |     'exp28_scenenn.py' if INTERACTIVE else __file__,
 37 |     scenes_and_scripts=scenes_and_scripts_dev[::110],
 38 |     scenes_description="scenes_and_scripts_dev[::110]")
 39 | 
 40 | # %%
 41 | 
 42 | models_baseline2 = load_baseline2()
 43 | 
 44 | # %%
 45 | 
 46 | drawer_lstmaddonly_a = models_baseline2['drawer_lstmaddonly_a']
 47 | drawer_lstmaddonly_b = models_baseline2['drawer_lstmaddonly_b']
 48 | 
 49 | # %%
 50 | 
 51 | from datagen import Datagen
 52 | class SceneNearestNeighborData(Datagen):
 53 |     def init_full(self):
 54 |         self.build_dicts()
 55 | 
 56 |     def init_from_spec(self):
 57 |         self.build_dicts()
 58 | 
 59 |     def build_dicts(self):
 60 |         self.scene_to_msgs = {}
 61 | 
 62 |         # calculate events
 63 |         events = codraw_data.get_contextual_place_many(self.split)
 64 | 
 65 |         scene = None
 66 |         msgs = None
 67 | 
 68 |         it = iter(events)
 69 |         for event in it:
 70 |             if isinstance(event, codraw_data.ObserveTruth):
 71 |                 if scene is not None and msgs is not None:
 72 |                     self.scene_to_msgs[tuple(scene)] = msgs
 73 |                 scene = event.scene
 74 |                 msgs = []
 75 |             elif isinstance(event, codraw_data.TellGroup):
 76 |                 msgs.append(event.msg)
 77 | 
 78 |         if scene is not None and msgs is not None:
 79 |             self.scene_to_msgs[tuple(scene)] = msgs
 80 | 
 81 | # %%
 82 | 
 83 | class SceneNearestNeighborTeller(Model):
 84 |     datagen_cls = SceneNearestNeighborData
 85 | 
 86 |     def prepare(self, episode):
 87 |         scene = episode.get_last(codraw_data.ObserveTruth).scene
 88 |         best_similarity = -1
 89 |         best_msgs = []
 90 |         best_scene_tuple = None
 91 |         for cand_scene_tuple in self.datagen.scene_to_msgs:
 92 |             cand_sim = scene_similarity(cand_scene_tuple, scene)
 93 |             if cand_sim > best_similarity:
 94 |                 best_similarity = cand_sim
 95 |                 best_msgs = self.datagen.scene_to_msgs[cand_scene_tuple]
 96 |                 best_scene_tuple = cand_scene_tuple
 97 | 
 98 |         # display(AbstractScene(scene))
 99 |         # display(AbstractScene(best_scene_tuple))
100 |         # display(best_similarity)
101 |         episode.to_tell = best_msgs[::] # make a copy!
102 | 
103 |     @respond_to(codraw_data.ObserveTruth)
104 |     @respond_to(codraw_data.ReplyGroup)
105 |     def tell(self, episode):
106 |         if not hasattr(episode, 'to_tell'):
107 |             self.prepare(episode)
108 | 
109 |         if episode.to_tell:
110 |             msg = episode.to_tell.pop(0)
111 |             episode.append(codraw_data.TellGroup(msg))
112 | 
113 | 
114 |     def get_action_fns(self):
115 |         return [self.tell]
116 | 
117 | # %%
118 | 
119 | data_scenenn_a = SceneNearestNeighborData('a')
120 | data_scenenn_b = SceneNearestNeighborData('b')
121 | 
122 | # %%
123 | 
124 | teller_scenenn_a = SceneNearestNeighborTeller(data_scenenn_a)
125 | teller_scenenn_b = SceneNearestNeighborTeller(data_scenenn_b)
126 | 
127 | # %%
128 | 
129 | # Episode.run(codraw_data.get_scenes('dev')[0], make_fns('aa', (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b))).display()
130 | 
131 | # %%
132 | # %%
133 | # %%
134 | 
135 | print()
136 | print()
137 | print("Final evaluation on full dev set")
138 | 
139 | # %%
140 | 
141 | for splits in ('aa', 'ab', 'ba', 'bb'):
142 |     sims = eval_fns(make_fns(splits, (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
143 |     print(splits, sims.mean())
144 | # aa 1.3095491909624886
145 | # ab 1.3115692170881366
146 | 
147 | # nohier aa 2.229799264350204
148 | # nohier ab 2.255167911899865
149 | 
150 | # %%
151 | 
152 | for splits in ('ba', 'bb'):
153 |     sims = eval_fns(make_fns(splits, (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None)
154 |     print(splits, sims.mean())
155 | 
156 | # %%
157 | 
158 | transcribe("exp28_scenenn",
159 |     aa=make_fns('aa', (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)),
160 |     ab=make_fns('ab', (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)),
161 | )
162 | 
163 | # %%
164 | 
165 | # hieraddonlyseq = dict(
166 | #     drawer_hieraddonlyseq_a = drawer_hieraddonlyseq_a.spec,
167 | #     drawer_hieraddonlyseq_b = drawer_hieraddonlyseq_b.spec,
168 | # )
169 | 
170 | #%%
171 | 
172 | # torch.save(hieraddonlyseq, Path('models/hieraddonlyseq.pt'))
173 | 
174 | # %%
175 | # %%
176 | # %%
177 | # %%
178 | 


--------------------------------------------------------------------------------
/interactivity.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | try:
 8 |     get_ipython()
 9 |     INTERACTIVE=True
10 | except:
11 |     INTERACTIVE=False
12 | 
13 | def try_magic(*args, **kwargs):
14 |     if not INTERACTIVE:
15 |         return
16 |     return get_ipython().magic(*args, **kwargs)
17 | 
18 | def try_cd(loc):
19 |     if not INTERACTIVE:
20 |         return
21 |     return get_ipython().magic(f'%cd {loc}')
22 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | from pathlib import Path
  9 | import editdistance
 10 | 
 11 | import torch
 12 | import torch.cuda
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | 
 16 | from nkfb_util import logsumexp, cuda_if_available
 17 | 
 18 | import codraw_data
 19 | from codraw_data import AbstractScene, Clipart
 20 | import abs_render
 21 | from abs_metric import scene_similarity, clipart_similarity
 22 | from episode import Episode, respond_to, response_partial
 23 | 
 24 | #%%
 25 | 
 26 | @respond_to(codraw_data.ObserveTruth)
 27 | @respond_to(codraw_data.ReplyGroup)
 28 | def select_clipart_to_tell(episode):
 29 |     cliparts = set(episode.get_last(codraw_data.ObserveTruth).scene)
 30 |     cliparts -= set([e.clipart for e in episode if isinstance(e, codraw_data.SelectClipart)])
 31 |     if cliparts:
 32 |         cliparts = list(sorted(cliparts))
 33 |         clipart = cliparts[0]
 34 |         # For now, don't randomize the clipart selection order.
 35 |         #cliparts[np.random.choice(len(cliparts))]
 36 |         episode.append(codraw_data.SelectClipart(clipart))
 37 | 
 38 | @respond_to(codraw_data.ObserveTruth)
 39 | @respond_to(codraw_data.ReplyGroup)
 40 | def scripted_tell(episode):
 41 |     if episode.script_index < len(episode.script):
 42 |         event = episode.script[episode.script_index]
 43 |         if isinstance(event, codraw_data.Peek):
 44 |             # Skip to the next non-peek event
 45 |             assert isinstance(episode.script[episode.script_index + 1], codraw_data.TellerObserveCanvas)
 46 |             episode.script_index += 2
 47 |             return scripted_tell(episode)
 48 |         episode.script_index += 1
 49 |         episode.append(event)
 50 | 
 51 | @respond_to(codraw_data.ObserveTruth)
 52 | @respond_to(codraw_data.ReplyGroup)
 53 | def scripted_tell_before_peek(episode):
 54 |     if episode.script_index < len(episode.script):
 55 |         event = episode.script[episode.script_index]
 56 |         if isinstance(event, codraw_data.Peek):
 57 |             return
 58 |         episode.script_index += 1
 59 |         episode.append(event)
 60 | 
 61 | @respond_to(codraw_data.ObserveTruth)
 62 | @respond_to(codraw_data.ReplyGroup)
 63 | def scripted_tell_after_peek(episode):
 64 |     if episode.script_index == 0:
 65 |         while episode.script_index < len(episode.script):
 66 |             event = episode.script[episode.script_index]
 67 |             episode.script_index += 1
 68 |             if not isinstance(event, codraw_data.Peek):
 69 |                 continue
 70 |             event = episode.script[episode.script_index]
 71 |             assert isinstance(event, codraw_data.TellerObserveCanvas)
 72 |             start_scene = event.scene
 73 |             episode.script_index += 1
 74 |             break
 75 |         else:
 76 |             assert False, "Could not find Peek event in the script!"
 77 |         episode.append(codraw_data.DrawGroup(start_scene))
 78 |         assert episode.script_index < len(episode.script)
 79 | 
 80 |     if episode.script_index < len(episode.script):
 81 |         event = episode.script[episode.script_index]
 82 |         episode.script_index += 1
 83 |         episode.append(event)
 84 | 
 85 | @respond_to(codraw_data.TellGroup)
 86 | def draw_nothing(episode):
 87 |     episode.append(codraw_data.DrawGroup([]))
 88 |     episode.append(codraw_data.ReplyGroup("ok"))
 89 | 
 90 | @respond_to(codraw_data.TellGroup)
 91 | def drawer_observe_canvas(episode):
 92 |     # TODO(nikita): can cache for higher efficiency
 93 |     scene = episode.reconstruct()
 94 |     event = codraw_data.ObserveCanvas(scene)
 95 |     episode.append(event)
 96 | 
 97 | def make_fns(splits, *objs_or_pairs):
 98 |     split_to_use = 0
 99 |     res = []
100 |     for obj_or_pair in objs_or_pairs:
101 |         if isinstance(obj_or_pair, tuple):
102 |             assert len(obj_or_pair) == 2
103 |             if splits[split_to_use] == 'a':
104 |                 obj = obj_or_pair[0]
105 |             elif splits[split_to_use] == 'b':
106 |                 obj = obj_or_pair[1]
107 |             else:
108 |                 raise ValueError(f"Invalid split: {splits[split_to_use]}")
109 |             split_to_use += 1
110 |         else:
111 |             obj = obj_or_pair
112 | 
113 |         if isinstance(obj, nn.Module):
114 |             # Switch pytorch modules to evaluation mode
115 |             obj.eval()
116 | 
117 |         if hasattr(obj, 'get_action_fns'):
118 |             res.extend(obj.get_action_fns())
119 |         else:
120 |             res.append(obj)
121 | 
122 |     assert split_to_use == len(splits), "Too many splits specified"
123 |     return res
124 | 
125 | def episodes_from_fns(fns, limit=None, split='dev'):
126 |     use_scripts = (scripted_tell in fns) or (scripted_tell_before_peek in fns)
127 |     if scripted_tell_after_peek in fns:
128 |         use_scripts = True
129 |         run_from = codraw_data.get_scenes_and_scripts_with_peek(split)
130 |     elif use_scripts:
131 |         run_from = codraw_data.get_scenes_and_scripts(split)
132 |     else:
133 |         run_from = codraw_data.get_scenes(split)
134 | 
135 |     if limit is not None:
136 |         run_from = run_from[:limit]
137 | 
138 |     sims = []
139 |     with torch.no_grad():
140 |         for run_from_single in run_from:
141 |             if use_scripts:
142 |                 episode = Episode.run_script(run_from_single, fns)
143 |             else:
144 |                 episode = Episode.run(run_from_single, fns)
145 |             yield episode
146 | 
147 | def eval_fns(fns, limit=None, split='dev'):
148 |     sims = [episode.scene_similarity() for episode in episodes_from_fns(fns, limit=limit, split=split)]
149 |     return np.array(sims)
150 | 
151 | #%%
152 | 
153 | def calc_perplexity(teller, split='dev'):
154 |     """
155 |     Calculates teller perplexity. Does not work with all teller classes, e.g.
156 |     perplexity has not been defined for the nearest-neighbor tellers.
157 |     """
158 |     datagen_spec = {**teller.datagen.spec}
159 |     datagen_spec['split'] = split
160 |     datagen_dev = teller.datagen_cls(spec=datagen_spec)
161 | 
162 |     assert datagen_dev.vocabulary == teller.datagen.vocabulary
163 | 
164 |     nlls = []
165 |     counts = []
166 |     with torch.no_grad():
167 |         teller.eval()
168 |         for ex in datagen_dev.get_examples_unshuffled_batch(batch_size=128):
169 |             nll, count = teller(ex, return_loss=False, return_nll_count=True)
170 |             nlls.append(nll)
171 |             counts.append(count)
172 | 
173 |     nll_per_word = np.array(nlls).sum() / np.array(counts).sum()
174 | 
175 |     return np.exp(nll_per_word)
176 | 
177 | #%%
178 | class ComponentEvaluator:
179 |     NUM_FEATURES = 7
180 | 
181 |     _instance_cache = {}
182 | 
183 |     @classmethod
184 |     def get(cls, split_for_baseline='train_full'):
185 |         if split_for_baseline not in cls._instance_cache:
186 |             cls._instance_cache[split_for_baseline] = cls(split_for_baseline)
187 |         return cls._instance_cache[split_for_baseline]
188 | 
189 |     def __init__(self, split_for_baseline='train_full'):
190 |         cliparts_by_idx = {idx: [] for idx in range(58)}
191 |         for scene in codraw_data.get_scenes(split_for_baseline):
192 |             for clipart in scene:
193 |                 cliparts_by_idx[clipart.idx].append(clipart)
194 | 
195 |         self.idx_to_exemplar = {}
196 |         for idx in cliparts_by_idx:
197 |             if idx in Clipart.HUMAN_IDXS:
198 |                 expression, _ = torch.mode(torch.tensor([c.expression for c in cliparts_by_idx[idx]]))
199 |                 pose, _ = torch.mode(torch.tensor([c.pose for c in cliparts_by_idx[idx]]))
200 |                 subtype = pose * Clipart.NUM_EXPRESSION + expression
201 |             else:
202 |                 subtype = 0
203 |             depth, _ = torch.mode(torch.tensor([c.depth for c in cliparts_by_idx[idx]]))
204 |             flip, _ = torch.mode(torch.tensor([c.flip for c in cliparts_by_idx[idx]]))
205 | 
206 |             x = np.mean([c.x for c in cliparts_by_idx[idx]])
207 |             y = np.mean([c.y for c in cliparts_by_idx[idx]])
208 |             self.idx_to_exemplar[idx] = Clipart(idx, int(subtype), int(depth), int(flip), x, y)
209 | 
210 |         # Calculate prior baseline, and human performance
211 |         human_numer = np.zeros(self.NUM_FEATURES)
212 |         human_denom = np.zeros(self.NUM_FEATURES)
213 |         baseline_numer = np.zeros(self.NUM_FEATURES)
214 |         baseline_denom = np.zeros(self.NUM_FEATURES)
215 |         for scene_true, scene_human in codraw_data.get_truth_and_human_scenes('dev'):
216 |             ep_numer, ep_denom = self.eval_scene(scene_human, scene_true)
217 |             human_numer += ep_numer
218 |             human_denom += ep_denom
219 |             ep_numer, ep_denom = self.eval_scene([], scene_true)
220 |             baseline_numer += ep_numer
221 |             baseline_denom += ep_denom
222 | 
223 |         self.human_scores = human_numer / human_denom
224 |         self.baseline_scores = baseline_numer / baseline_denom
225 | 
226 |     def eval_scene(self, pred, target):
227 |         res_numer = np.zeros(self.NUM_FEATURES)
228 |         res_denom = np.zeros(self.NUM_FEATURES)
229 | 
230 |         for truth_clipart in target:
231 |             other_cliparts = [c for c in pred if c.idx == truth_clipart.idx]
232 |             if other_cliparts:
233 |                 other_clipart = other_cliparts[0]
234 |             else:
235 |                 other_clipart = self.idx_to_exemplar[truth_clipart.idx]
236 | 
237 |             feats_numer = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
238 |             feats_denom = [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]
239 |             feats_numer[0] = float(truth_clipart.flip != other_clipart.flip)
240 |             if truth_clipart.idx in Clipart.HUMAN_IDXS:
241 |                 feats_numer[1] = float(truth_clipart.expression != other_clipart.expression)
242 |                 feats_numer[2] = float(truth_clipart.pose != other_clipart.pose)
243 |                 feats_denom[1] = 1.0
244 |                 feats_denom[2] = 1.0
245 |             feats_numer[3] = float(truth_clipart.depth != other_clipart.depth)
246 |             displacements = np.array([truth_clipart.normed_x - other_clipart.normed_x, truth_clipart.normed_y - other_clipart.normed_y])
247 |             feats_numer[4] = np.sum(displacements ** 2)
248 |             feats_numer[5], feats_numer[6] = np.abs(displacements)
249 | 
250 |             res_numer += feats_numer
251 |             res_denom += feats_denom
252 |         return res_numer, res_denom
253 | 
254 |     def eval_episode(self, episode):
255 |         return self.eval_scene(episode.reconstruct(), episode.get_true_scene())
256 | 
257 |     def eval_fns(self, fns, limit=None, split='dev', unscaled=False):
258 |         numer = np.zeros(self.NUM_FEATURES)
259 |         denom = np.zeros(self.NUM_FEATURES)
260 |         for episode in episodes_from_fns(fns, limit=limit, split=split):
261 |             ep_numer, ep_denom = self.eval_episode(episode)
262 |             numer += ep_numer
263 |             denom += ep_denom
264 | 
265 |         res = numer / denom
266 |         if not unscaled:
267 |             res = (res - self.human_scores) / (self.baseline_scores - self.human_scores)
268 |             res = 1.0 - res
269 | 
270 |         return res
271 | 
272 | #%%
273 | 
274 | class Model(object):
275 |     datagen_cls = None
276 |     def __init__(self, datagen=None, spec=None, **kwargs):
277 |         super().__init__()
278 |         if spec is not None:
279 |             assert self.datagen_cls is not None
280 |             assert self.datagen_cls.__name__ == spec['datagen_class']
281 |             self.datagen = self.datagen_cls(spec=spec['datagen_spec'])
282 |             self.init_from_spec(**{k: v for (k,v) in spec.items() if k not in ['class', 'datagen_spec', 'datagen_class', 'state_dict']})
283 |             if 'state_dict' in spec:
284 |                 self.load_state_dict(spec['state_dict'])
285 |                 self.to(cuda_if_available)
286 |             self.post_init_from_spec()
287 |         else:
288 |             assert isinstance(datagen, self.datagen_cls)
289 |             self.datagen = datagen
290 |             self.init_full(**kwargs)
291 |             if hasattr(self, 'state_dict'):
292 |                 self.to(cuda_if_available)
293 | 
294 |     def init_full(self):
295 |         pass
296 | 
297 |     def init_from_spec(self, **kwargs):
298 |         self.init_full(**kwargs)
299 | 
300 |     def post_init_from_spec(self):
301 |         pass
302 | 
303 |     def get_action_fns(self):
304 |         raise NotImplementedError("Subclasses should override this")
305 | 
306 |     def get_spec(self):
307 |         return {}
308 | 
309 |     @property
310 |     def spec(self):
311 |         res = {
312 |             'class': type(self).__name__,
313 |             'datagen_class': type(self.datagen).__name__,
314 |             'datagen_spec': self.datagen.spec,
315 |             **self.get_spec(),
316 |         }
317 |         if hasattr(self, 'state_dict'):
318 |             res['state_dict'] = self.state_dict()
319 |         return res
320 | 
321 |     # This method doesn't work because models are defined in other files, so
322 |     # globals() fails to register them. TODO(nikita): better deserialization
323 |     # helper?
324 |     # @staticmethod
325 |     # def new_from_spec(spec):
326 |     #     model_class = globals()[spec['class']]
327 |     #     return model_class(spec=spec)
328 | 
329 |     def just_tell(self, clipart, *args, **kwargs):
330 |         assert hasattr(self, 'tell'), "Model is not a teller"
331 |         if isinstance(self, nn.Module):
332 |             self.eval()
333 |         episode = Episode([codraw_data.SelectClipart(clipart)])
334 |         self.tell(episode, *args, **kwargs)
335 |         return episode.get_last(codraw_data.TellGroup).msg
336 | 
337 |     def just_draw(self, msg, scene=[], *args, **kwargs):
338 |         assert hasattr(self, 'draw'), "Model is not a drawer"
339 |         episode = Episode([codraw_data.TellGroup(msg), codraw_data.ObserveCanvas(scene)])
340 |         if isinstance(self, nn.Module):
341 |             self.eval()
342 |         self.draw(episode, *args, **kwargs)
343 |         event_multi = episode.get_last(codraw_data.DrawGroup)
344 |         if event_multi is not None:
345 |             return codraw_data.AbstractScene(event_multi.cliparts)
346 | 
347 |         event_single = episode.get_last(codraw_data.DrawClipart)
348 |         return event_single.clipart
349 | 


--------------------------------------------------------------------------------
/nkfb_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | __all__ = ['cpu', 'cuda_if_available', 'logsumexp', 'torch_load']
 8 | 
 9 | import torch
10 | 
11 | # %%
12 | 
13 | cpu = torch.device('cpu')
14 | 
15 | if torch.cuda.is_available():
16 |     cuda_if_available = torch.device('cuda')
17 | else:
18 |     cuda_if_available = cpu
19 | 
20 | # %%
21 | 
22 | # https://github.com/pytorch/pytorch/issues/2591
23 | def logsumexp(x, dim=None, keepdim=False):
24 |     if dim is None:
25 |         x, dim = x.view(-1), 0
26 |     xm, _ = torch.max(x, dim, keepdim=True)
27 |     x = torch.where(
28 |         (xm == float('inf')) | (xm == float('-inf')),
29 |         xm,
30 |         xm + torch.log(torch.sum(torch.exp(x - xm), dim, keepdim=True)))
31 |     return x if keepdim else x.squeeze(dim)
32 | 
33 | # %%
34 | 
35 | def torch_load(*args, **kwargs):
36 |     if cuda_if_available == cpu:
37 |         return torch.load(*args, map_location=lambda storage, loc: storage, **kwargs)
38 |     else:
39 |         return torch.load(*args, **kwargs)
40 | 


--------------------------------------------------------------------------------
/packer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | """
  8 | Provides the Packer class, which is useful for managing a hierarchy where each
  9 | batch element has a variable number of conversation rounds, and each round may
 10 | consist of a variable number of messages.
 11 | """
 12 | 
 13 | #%%
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | from torch.nn.utils.rnn import PackedSequence
 18 | 
 19 | # %%
 20 | 
 21 | class Packer:
 22 |     def __init__(self, list_brw):
 23 |         coords = []
 24 |         b_lens = []
 25 |         br_lens = []
 26 | 
 27 |         coords_flat = []
 28 |         b_lens_flat = []
 29 |         for b, list_rw in enumerate(list_brw):
 30 |             b_lens.append(len(list_rw))
 31 |             len_flat = 0
 32 |             for r, list_w in enumerate(list_rw):
 33 |                 br_lens.append(len(list_w))
 34 |                 for w, _ in enumerate(list_w):
 35 |                     coords.append([b, r, w])
 36 |                     coords_flat.append([b, len_flat + w])
 37 |                 len_flat += len(list_w)
 38 |             b_lens_flat.append(len_flat)
 39 | 
 40 |         self.coords_brw = np.array(coords, dtype=int)
 41 |         self.b_lens = np.array(b_lens, dtype=int)
 42 |         self.br_lens = np.array(br_lens, dtype=int)
 43 | 
 44 |         self.coords_flat = np.array(coords_flat, dtype=int)
 45 |         self.b_lens_flat = np.array(b_lens_flat, dtype=int)
 46 | 
 47 |         self.coords_br, self.indices_br2brw = np.unique(self.coords_brw[:,:-1], axis=0, return_inverse=True)
 48 |         _, self.indices_b2br = np.unique(self.coords_br[:,:-1], axis=0, return_inverse=True)
 49 |         self.indices_b2brw = self.indices_b2br[self.indices_br2brw]
 50 | 
 51 |         self.dense_shape = np.max(self.coords_brw, 0) + 1
 52 | 
 53 |         # Must use stable sorts here, which is why kind='mergesort'
 54 |         self.indices_b2sb = np.argsort(-self.b_lens, kind='mergesort')
 55 |         sort_by_num_rounds = np.argsort(-self.b_lens[self.indices_b2br], kind='mergesort')
 56 |         sort_by_round = np.argsort(self.coords_br[sort_by_num_rounds][:,-1], kind='mergesort')
 57 |         self.indices_br2srb = sort_by_num_rounds[sort_by_round]
 58 | 
 59 |         self.indices_br2sx = np.argsort(-self.br_lens, kind='mergesort')
 60 |         sort_by_num_words = np.argsort(-self.br_lens[self.indices_br2brw], kind='mergesort')
 61 |         sort_by_word_idx = np.argsort(self.coords_brw[sort_by_num_words][:,-1], kind='mergesort')
 62 |         self.indices_brw2swx = sort_by_num_words[sort_by_word_idx]
 63 | 
 64 |         _, batch_sizes_srb = np.unique(self.coords_br[self.indices_br2srb][:,-1], return_counts=True)
 65 |         _, batch_sizes_swx = np.unique(self.coords_brw[self.indices_brw2swx][:,-1], return_counts=True)
 66 |         self.batch_sizes_srb = torch.tensor(batch_sizes_srb, dtype=torch.long)
 67 |         self.batch_sizes_swx = torch.tensor(batch_sizes_swx, dtype=torch.long)
 68 | 
 69 |         self.indices_srb2br = np.argsort(self.indices_br2srb, kind='mergesort')
 70 |         self.indices_swx2brw = np.argsort(self.indices_brw2swx, kind='mergesort')
 71 |         self.indices_sb2b = np.argsort(self.indices_b2sb, kind='mergesort')
 72 |         self.indices_sx2br = np.argsort(self.indices_br2sx, kind='mergesort')
 73 | 
 74 |         # For flat
 75 |         self.indices_b2ob = np.argsort(-self.b_lens_flat, kind='mergesort')
 76 |         sort_by_flat_words = np.argsort(-self.b_lens_flat[self.indices_b2brw], kind='mergesort')
 77 |         sort_by_flat_word_idx = np.argsort(self.coords_flat[sort_by_flat_words][:,-1], kind='mergesort')
 78 |         self.indices_brw2orwb = sort_by_flat_words[sort_by_flat_word_idx]
 79 | 
 80 |         _, batch_sizes_orwb = np.unique(self.coords_flat[self.indices_brw2orwb][:,-1], return_counts=True)
 81 |         self.batch_sizes_orwb = torch.tensor(batch_sizes_orwb, dtype=torch.long)
 82 | 
 83 |         self.indices_ob2b = np.argsort(self.indices_b2ob, kind='mergesort')
 84 |         self.indices_orwb2brw = np.argsort(self.indices_brw2orwb, kind='mergesort')
 85 | 
 86 |     def brw_from_list(self, list_brw):
 87 |         vals = []
 88 |         for list_rw in list_brw:
 89 |             for list_w in list_rw:
 90 |                 vals.extend(list_w)
 91 |         assert len(vals) == self.coords_brw.shape[0]
 92 |         if torch.is_tensor(vals[0]):
 93 |             return torch.stack(vals)
 94 |         else:
 95 |             return torch.tensor(vals)
 96 | 
 97 |     def br_from_list(self, list_br):
 98 |         vals = []
 99 |         for list_r in list_br:
100 |             vals.extend(list_r)
101 |         assert len(vals) == self.coords_br.shape[0]
102 |         if torch.is_tensor(vals[0]):
103 |             return torch.stack(vals)
104 |         else:
105 |             return torch.tensor(vals)
106 | 
107 |     def br_from_b_expand(self, b_in):
108 |         return b_in[self.indices_b2br]
109 | 
110 |     def brw_from_br_expand(self, br_in):
111 |         return br_in[self.indices_br2brw]
112 | 
113 |     def brw_from_b_expand(self, b_in):
114 |         return b_in[self.indices_b2brw]
115 | 
116 |     def srb_from_br_pack(self, br_in):
117 |         return PackedSequence(
118 |             br_in[self.indices_br2srb],
119 |             self.batch_sizes_srb
120 |             )
121 | 
122 |     def swx_from_brw_pack(self, brw_in):
123 |         return PackedSequence(
124 |             brw_in[self.indices_brw2swx],
125 |             self.batch_sizes_swx
126 |             )
127 | 
128 |     def br_from_srb_unpack(self, srb_in):
129 |         return srb_in.data[self.indices_srb2br]
130 | 
131 |     def brw_from_swx_unpack(self, swx_in):
132 |         return swx_in.data[self.indices_swx2brw]
133 | 
134 |     def br_from_sx(self, sx_in):
135 |         return sx_in[self.indices_sx2br]
136 | 
137 |     def b_from_sb(self, sb_in):
138 |         return sb_in[self.indices_sb2b]
139 | 
140 |     def sx_from_br(self, br_in):
141 |         return br_in[self.indices_br2sx]
142 | 
143 |     def sb_from_b(self, b_in):
144 |         return b_in[self.indices_b2sb]
145 | 
146 |     # For flat
147 |     def orwb_from_brw_pack(self, brw_in):
148 |         return PackedSequence(
149 |             brw_in[self.indices_brw2orwb],
150 |             self.batch_sizes_orwb
151 |             )
152 | 
153 |     def brw_from_orwb_unpack(self, orwb_in):
154 |         return orwb_in.data[self.indices_orwb2brw]
155 | 
156 |     def b_from_ob(self, ob_in):
157 |         return ob_in[self.indices_ob2b]
158 | 
159 |     def ob_from_b(self, b_in):
160 |         return b_in[self.indices_b2ob]
161 | 


--------------------------------------------------------------------------------
/saved_models.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | #%%
 8 | 
 9 | def load_models(*partitions):
10 |     if not partitions:
11 |         partitions = (1, 2, 3, 4)
12 | 
13 |     models = {}
14 | 
15 |     if 1 in partitions:
16 |         from baseline1_models import load_baseline1
17 |         models.update(load_baseline1())
18 |     if 2 in partitions:
19 |         from baseline2_models import load_baseline2
20 |         models.update(load_baseline2())
21 |     if 3 in partitions:
22 |         from baseline3_models import load_baseline3
23 |         models.update(load_baseline3())
24 |     if 4 in partitions:
25 |         from baseline4_models import load_baseline4
26 |         models.update(load_baseline4())
27 | 
28 |     return models
29 | 
30 | #%%
31 | 
32 | def make_pairs(models, *names):
33 |     if models is None:
34 |         models = load_models()
35 | 
36 |     res = []
37 |     for name in names:
38 |         res.append((name, (models[name + '_a'], models[name + '_b'])))
39 | 
40 |     return res
41 | 


--------------------------------------------------------------------------------