├── src ├── __init__.py ├── util.py ├── glove.py ├── data_objects_turked.py ├── trained_factors.py ├── data_turked.py ├── main.py ├── settings.py └── data.py ├── docs ├── _config.yml ├── max_thumb.jpeg ├── yejin_thumb.jpg ├── thumb-all-resized.png ├── css │ └── default.css ├── data │ └── config │ │ └── default.json ├── todo.md ├── index.md └── factorgraph-viz.js ├── lib └── ngramdb │ ├── ngramdb │ ├── __init__.py │ ├── util.py │ ├── ngramtoken.py │ ├── constants.py │ └── ngramdb.py │ └── setup.py ├── factorgraph-viz.png ├── data └── verbphysics │ ├── action-frames │ ├── train-5 │ │ ├── train.txt │ │ ├── dev.txt │ │ └── test.txt │ └── train-20 │ │ ├── train.txt │ │ ├── dev.txt │ │ └── test.txt │ └── objects │ ├── train-5 │ └── train.csv │ └── train-20 │ └── train.csv ├── requirements.txt ├── .travis.yml ├── .gitignore ├── LICENSE.txt ├── scripts └── data.sh └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /lib/ngramdb/ngramdb/__init__.py: -------------------------------------------------------------------------------- 1 | from ngramdb import NgramDb 2 | -------------------------------------------------------------------------------- /docs/max_thumb.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwnlp/verbphysics/HEAD/docs/max_thumb.jpeg -------------------------------------------------------------------------------- /docs/yejin_thumb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwnlp/verbphysics/HEAD/docs/yejin_thumb.jpg -------------------------------------------------------------------------------- /factorgraph-viz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwnlp/verbphysics/HEAD/factorgraph-viz.png -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-5/train.txt: -------------------------------------------------------------------------------- 1 | took 2 | grew 3 | washed 4 | trimmed 5 | made 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | factorgraph 2 | numpy 3 | pandas 4 | nltk 5 | tqdm 6 | myria-python 7 | tabulate 8 | -------------------------------------------------------------------------------- /docs/thumb-all-resized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwnlp/verbphysics/HEAD/docs/thumb-all-resized.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | python: 4 | - "2.7" 5 | install: 6 | - pip install -r requirements.txt 7 | - pip install lib/ngramdb/ 8 | - ./scripts/data.sh 9 | script: 10 | - python -m src.main 11 | -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-20/train.txt: -------------------------------------------------------------------------------- 1 | took 2 | grew 3 | washed 4 | trimmed 5 | made 6 | got 7 | looked 8 | wrote 9 | entered 10 | kept 11 | lived 12 | played 13 | placed 14 | served 15 | arrived 16 | stopped 17 | changed 18 | accepted 19 | cast 20 | developed 21 | -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-20/dev.txt: -------------------------------------------------------------------------------- 1 | threw 2 | drank 3 | swung 4 | conquered 5 | towed 6 | snipped 7 | saw 8 | put 9 | received 10 | turned 11 | stood 12 | opened 13 | passed 14 | set 15 | wore 16 | raised 17 | died 18 | caught 19 | worked 20 | led 21 | formed 22 | moved 23 | obtained 24 | added 25 | lifted 26 | contained 27 | gained 28 | drove 29 | covered 30 | touched 31 | -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | This ended up being smaller than I expected. 3 | 4 | author: mbforbes 5 | """ 6 | 7 | import os 8 | 9 | 10 | def ensure_dir(directory): 11 | ''' 12 | Makes directory and all needed parent dirs if it doesn't exist. 13 | 14 | Args: 15 | directory (str) 16 | ''' 17 | if not os.path.isdir(directory): 18 | os.makedirs(directory) 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generic python crap 2 | *.pyc 3 | 4 | # Data that should be retrieved when setting up repository. 5 | # See scripts/data.sh. 6 | data/ngramdb/ 7 | data/glove/ 8 | data/emb/ 9 | 10 | # Every time the system is run it logs files and produces diagnostic output of 11 | # its decisions. 12 | log/ 13 | output/ 14 | 15 | # Visualization data gets dumped to this directory. 16 | viz/ 17 | -------------------------------------------------------------------------------- /lib/ngramdb/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='ngramdb', 4 | version='0.1.1', 5 | description='Provides access to the Myria DB of syntactic n-grams.', 6 | author='lzilles', 7 | author_email='lzilles@cs.washington.edu', 8 | packages=['ngramdb'], 9 | dependency_links=[ 10 | 'https://github.com/uwescience/myria-python/archive/master.zip'], 11 | # install_requires=['myria-python'], 12 | zip_safe=False) 13 | -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-5/dev.txt: -------------------------------------------------------------------------------- 1 | got 2 | looked 3 | wrote 4 | entered 5 | kept 6 | lived 7 | played 8 | placed 9 | served 10 | arrived 11 | stopped 12 | changed 13 | accepted 14 | cast 15 | developed 16 | threw 17 | drank 18 | swung 19 | conquered 20 | towed 21 | snipped 22 | saw 23 | put 24 | received 25 | turned 26 | stood 27 | opened 28 | passed 29 | set 30 | wore 31 | raised 32 | died 33 | caught 34 | worked 35 | led 36 | formed 37 | moved 38 | obtained 39 | added 40 | lifted 41 | contained 42 | gained 43 | drove 44 | covered 45 | touched 46 | -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-5/test.txt: -------------------------------------------------------------------------------- 1 | walked 2 | dropped 3 | cut 4 | gazed 5 | jumped 6 | dug 7 | scaled 8 | toppled 9 | hiked 10 | squashed 11 | gave 12 | found 13 | went 14 | came 15 | heard 16 | held 17 | reached 18 | sat 19 | began 20 | sent 21 | showed 22 | fell 23 | used 24 | shook 25 | drew 26 | carried 27 | called 28 | ran 29 | laid 30 | followed 31 | remained 32 | returned 33 | appeared 34 | rose 35 | pulled 36 | broke 37 | produced 38 | bought 39 | expressed 40 | crossed 41 | struck 42 | picked 43 | won 44 | filled 45 | built 46 | pushed 47 | ordered 48 | poured 49 | waited 50 | ate 51 | -------------------------------------------------------------------------------- /data/verbphysics/action-frames/train-20/test.txt: -------------------------------------------------------------------------------- 1 | walked 2 | dropped 3 | cut 4 | gazed 5 | jumped 6 | dug 7 | scaled 8 | toppled 9 | hiked 10 | squashed 11 | gave 12 | found 13 | went 14 | came 15 | heard 16 | held 17 | reached 18 | sat 19 | began 20 | sent 21 | showed 22 | fell 23 | used 24 | shook 25 | drew 26 | carried 27 | called 28 | ran 29 | laid 30 | followed 31 | remained 32 | returned 33 | appeared 34 | rose 35 | pulled 36 | broke 37 | produced 38 | bought 39 | expressed 40 | crossed 41 | struck 42 | picked 43 | won 44 | filled 45 | built 46 | pushed 47 | ordered 48 | poured 49 | waited 50 | ate 51 | -------------------------------------------------------------------------------- /docs/css/default.css: -------------------------------------------------------------------------------- 1 | .links line { 2 | /*stroke: #999;*/ 3 | stroke-opacity: 0.6; 4 | } 5 | 6 | .nodes circle { 7 | stroke: #aaa; 8 | stroke-width: 1px; 9 | } 10 | 11 | .facs rect { 12 | stroke: #aaa; 13 | stroke-width: 1px; 14 | } 15 | 16 | .rvtext { 17 | font-size: 1em; 18 | } 19 | 20 | .factext { 21 | font-size: 0.7em; 22 | } 23 | 24 | #suggestionNotice { 25 | font-weight: bold; 26 | visibility: hidden; 27 | } 28 | 29 | button.suggestion { 30 | background-color: lightgray; 31 | margin: 5px; 32 | padding: 5px; 33 | border: none; 34 | border-radius: 8px; 35 | } 36 | 37 | button.suggestion:hover { 38 | background-color: #159957; 39 | color: white; 40 | cursor: pointer; 41 | } 42 | 43 | svg { 44 | border: 1px solid #159957; 45 | } 46 | 47 | 48 | p.limited { 49 | color: slategray; 50 | font-style: italic; 51 | display: inline-block; 52 | padding: 4px; 53 | } -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Maxwell Forbes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/data/config/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_dir": "https://cdn.rawgit.com/mbforbes/verbphysics-examples/f30d8eb1387208aaefff3c0b06f50a3cd8b20800/action-frames/", 3 | "data_filenames": "data/options/example-list.json", 4 | "startup_filename": "size-threw_dp_into", 5 | "autocomplete_limit": 50, 6 | "display_prefix": "Action frame: ", 7 | "size": { 8 | "rv": 8, 9 | "factor": 8 10 | }, 11 | "position": { 12 | "leftScale": 0.0, 13 | "leftSubtype": "frame", 14 | "leftStrength": 0.7, 15 | "centerScale": 0.33, 16 | "rightScale": 0.84, 17 | "rightSubtype": "noun", 18 | "rightStrength": 0.7, 19 | "upScale": 0.0, 20 | "upSubtype": "seed", 21 | "upStrength": 0.7, 22 | "downScale": 1.0, 23 | "downSubtype": "xfactor", 24 | "downStrength": 0.7, 25 | "middleStrength": 0.1 26 | }, 27 | "color": { 28 | "none": "whitesmoke", 29 | "unsureColor": "lightslategray", 30 | "unsureCutoff": 0.4, 31 | "values": [ 32 | "tomato", 33 | "royalblue", 34 | "lightslategray" 35 | ] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /docs/todo.md: -------------------------------------------------------------------------------- 1 | # todo 2 | 3 | ## other repos 4 | - [x] factor graph 5 | - [x] actual code (cleanup) 6 | - [x] store data somewhere programmatically accessible 7 | 8 | ## github.io page 9 | - [x] brief description 10 | - [ ] demo 11 | - [x] live loading of new graphs 12 | - [x] user input w/ autocomplete suggestions 13 | - [x] clickable suggestions 14 | - [ ] generate data (FGs and options file) 15 | - [ ] ensure demo works on project (github.io) page 16 | - [ ] preload one example 17 | - [ ] suggestions above input box of things to type 18 | - [ ] table below of what the different frame types mean (prefix, example 19 | name, frame type, example frame) 20 | - [x] abstract 21 | - [x] paper vis -> link to paper 22 | - [x] author pics + links 23 | - [x] bibtex 24 | - [x] data (instructions, download links) 25 | - [x] code link / notice for detailed instructions 26 | - [x] acknowledgements (incl stanford vision lab for project page inspiration) 27 | 28 | ## readme 29 | 30 | - [x] badges 31 | - [x] overview 32 | - [x] link to github.io page 33 | - [x] installation (code, aux data) 34 | - [x] running 35 | - [x] data 36 | - [x] viz (desc + picture) 37 | - [x] see also for py-factorgraph 38 | -------------------------------------------------------------------------------- /scripts/data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # verbphysics 5 | # 6 | # Data retrieval script. 7 | # 8 | # author: mbforbes 9 | # 10 | 11 | # Get and extract ngramdb (cached query and pmi) data 12 | mkdir data/ngramdb/ 13 | cd data/ngramdb/ 14 | curl https://storage.googleapis.com/ai2-mosaic-public/projects/verb-physics/ngramdb-cache.tar.gz > ngramdb-cache.tar.gz 15 | tar -xzf ngramdb-cache.tar.gz 16 | rm ngramdb-cache.tar.gz 17 | cd ../.. 18 | 19 | # Get and convert GloVe (word embedding) data 20 | mkdir data/glove/ 21 | curl https://nlp.stanford.edu/data/wordvecs/glove.6B.zip > data/glove/glove.6B.zip 22 | unzip data/glove/glove.6B.zip -d data/glove/ 23 | python src/glove.py 24 | cd data/glove/ 25 | rm glove.6B.100d.txt 26 | rm glove.6B.200d.txt 27 | rm glove.6B.300d.txt 28 | rm glove.6B.50d.txt 29 | rm glove.6B.zip 30 | cd ../.. 31 | 32 | # Get embedding-trained unary factor weights 33 | mkdir data/emb/ 34 | cd data/emb/ 35 | curl https://storage.googleapis.com/ai2-mosaic-public/projects/verb-physics/emb-trained-weights.tar.gz > emb-trained-weights.tar.gz 36 | tar -xzf emb-trained-weights.tar.gz 37 | rm emb-trained-weights.tar.gz 38 | cd ../.. 39 | 40 | # Get wordnet data for NLTK 41 | python -m nltk.downloader wordnet 42 | -------------------------------------------------------------------------------- /lib/ngramdb/ngramdb/util.py: -------------------------------------------------------------------------------- 1 | def aliased_relation(relation, alias): 2 | return ' '.join((relation, alias)) 3 | 4 | 5 | def make_predicate(relation_alias, attribute, value, relationship='='): 6 | return "{}.{}{}{}".format(relation_alias, attribute, relationship, value) 7 | 8 | 9 | def collapse_ngram_surface(ngrams): 10 | return sorted(( 11 | (k, sum(x[1] for x in g)) 12 | for k, g in itertools.groupby(( 13 | (n.surface(), n.freq) 14 | for n in sorted( 15 | ngrams, 16 | key=lambda x: x.surface())), 17 | lambda x: x[0]) 18 | ), key=lambda x: x[1], reverse=True) 19 | 20 | 21 | def pprint_ngram_list(ngram_list): 22 | surface_width = max( 23 | max(len(t.surface) for n in ngram_list for t in n) + 1, 6) 24 | 25 | ngram_format_str = "ID: {}\tFreq: {}\tHeight: {}" 26 | token_format_str = \ 27 | "{0:>2}\t{1:<" + str(surface_width) + "}\t{2:<5}\t{3}{4}" 28 | 29 | all_lines = [] 30 | 31 | for n in ngram_list: 32 | try: 33 | all_lines.append( 34 | ngram_format_str.format(n.nid, n.freq, n.height) 35 | ) 36 | except AttributeError: 37 | all_lines.append( 38 | ngram_format_str.format(n.nid, n.freq, "") 39 | ) 40 | 41 | all_lines += [ 42 | token_format_str.format( 43 | t.position, 44 | t.surface, 45 | t.postag, 46 | t.deprel, 47 | '-' + str(t.headposition) if t.headposition > -1 else '') 48 | for t in n] 49 | 50 | all_lines.append('') 51 | 52 | # return '\n'.join(all_lines) 53 | print('\n'.join(all_lines)) 54 | 55 | 56 | def collapsed_histogram(kv_list): 57 | c = Counter() 58 | for k, v in kv_list: 59 | c[k] += v 60 | return c 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # verbphysics 2 | 3 | [![Build Status](https://travis-ci.org/uwnlp/verbphysics.svg?branch=master)](https://travis-ci.org/uwnlp/verbphysics) 4 | [![license MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/uwnlp/verbphysics/blob/master/LICENSE.txt) 5 | 6 | ## About 7 | 8 | This repository contains the data and reference implementation for the paper 9 | 10 | **Verb Physics: Relative Physical Knowledge of Actions and Objects** 11 | Maxwell Forbes and Yejin Choi 12 | _ACL 2017_ 13 | 14 | See the [Verb Physics project page](https://uwnlp.github.io/verbphysics/) for 15 | more details (model visualiation, paper link, bibtex citation). 16 | 17 | ## Installation 18 | 19 | The code is written in Python 2.7. We recommend a fresh virtualenv. 20 | 21 | ```sh 22 | # Install the required python libraries 23 | pip install -r requirements.txt 24 | 25 | # Install the locally-packaged `ngramdb` library (written by Li Zilles). 26 | pip install lib/ngramdb/ 27 | 28 | # Download the data (cached ngramdb data; GloVe embeddings; trained factor 29 | # weights; NLTK data). 30 | ./scripts/data.sh 31 | ``` 32 | 33 | Our [Travis-CI 34 | script](https://github.com/uwnlp/verbphysics/blob/master/.travis.yml) validates 35 | the above installation instructions by running them on a fresh machine after 36 | every code modification. 37 | 38 | ## Running 39 | 40 | By default, the code is setup to run a particular model from the paper (**our 41 | model (A)**) 42 | 43 | ```sh 44 | python -m src.main 45 | ``` 46 | 47 | You can view all of the default configurations by running with `--help` 48 | 49 | ``` 50 | python -m src.main --help 51 | usage: main.py [-h] [--config CONFIG] [--poly POLY] [--viz] 52 | 53 | verbphysics reference implementation 54 | 55 | optional arguments: 56 | -h, --help show this help message and exit 57 | --config CONFIG hyperparameter configuration to use; options: model_a | 58 | playing | model_b_objpairs | model_b_frames (default: 59 | model_a 60 | --poly POLY Whether to try polynomially-many hyperparameter config 61 | combinations (True, default) or vary config dimension 62 | sequentially (False). 63 | --viz Whether to dump model / data to JSON for visualization 64 | (default False). 65 | ``` 66 | 67 | Settings (hyperparameter) configurations are found in `src/settings.py`. You 68 | can modify the `playing` dictionary found in `src/main.py` with your own 69 | configuration and run the custom model using `--config=playing`. 70 | 71 | ## Data 72 | 73 | The `verbphysics` data is found under `data/verbphysics/`. 74 | 75 | ### Task setup as in the ACL 2017 paper 76 | 77 | When predicting action frames, only 5% action frame data should be used. Either 78 | 5% (our model A) or 20% object pair data (our model B) may be used to assist in 79 | action frame prediction. 80 | 81 | When predicting object pairs, only 5% object pair data should be used. Either 5% 82 | (our model A) or 20% action frame data (our model B) may be used to assist in 83 | object pair prediction. 84 | 85 | ### Attribute names in code 86 | 87 | For legacy reasons, the code has different names for some attributes. The actual 88 | data (i.e., the questions asked to Mechanical Turk workers) use the attributes 89 | reported in the paper. 90 | 91 | attribute | name in code 92 | --- | --- 93 | `size` | `size` 94 | `weight` | `weight` 95 | `strength` | `hardness` 96 | `rigidness` | `rigidness` 97 | `speed` | `verb-speed` 98 | 99 | ## Visualization 100 | 101 | You can use [`factorgraph-viz`](https://github.com/mbforbes/factorgraph-viz) to 102 | visualize `verbphysics` factor graph models interactively in your web browser. 103 | To produce visualization data, add the command line argument `--viz`. 104 | 105 | The [Verb Physics project page](https://uwnlp.github.io/verbphysics/) has a 106 | live demo of this running. 107 | 108 | ![An example rendering of a factor graph using the factorgraph-viz library](factorgraph-viz.png) 109 | 110 | ## See also 111 | 112 | The [`py-factorgraph`](https://github.com/mbforbes/py-factorgraph) library 113 | provides the underlying factor graph implementation. 114 | -------------------------------------------------------------------------------- /src/glove.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Get GloVe distance between words. 3 | 4 | This code adapted from 5 | https://github.com/stanfordnlp/GloVe/blob/master/eval/python/distance.py 6 | ''' 7 | 8 | # IMPORTS 9 | # ----------------------------------------------------------------------------- 10 | 11 | # Builtins 12 | import cPickle as pickle 13 | 14 | # 3rd party 15 | import numpy as np 16 | 17 | 18 | # CONSTANTS 19 | # ----------------------------------------------------------------------------- 20 | 21 | # 300d constants 22 | MODEL_FILE_300D = 'data/glove/glove.6B.300d.txt' 23 | MODEL_FILE_300D_NORM = 'data/glove/glove.6B.300d-weights-norm.npy' 24 | VOCAB_FILE = 'data/glove/glove.6B.vocab.pickle' 25 | 26 | 27 | # CLASSES 28 | # ----------------------------------------------------------------------------- 29 | 30 | class Glove(object): 31 | ''' 32 | Once loaded (g = Glove()), can get word vector using 33 | 34 | g.w[g.vocab[word], :] 35 | 36 | For example, 37 | 38 | g.w[g.vocab['fish'], :] 39 | 40 | This vector will have unit length (l2 norm). 41 | ''' 42 | 43 | def __init__(self, model_file_norm=MODEL_FILE_300D_NORM, vocab_file=VOCAB_FILE): 44 | ''' 45 | Load normalized vectors and vocab from a cache. Use Glove.convert(...) 46 | to make. 47 | 48 | Args: 49 | model_file_norm (str) 50 | vocab_file (str) 51 | ''' 52 | 53 | w_norm, vocab = self.load_npy(model_file_norm, vocab_file) 54 | 55 | # use 56 | self.w = w_norm 57 | self.vocab = vocab 58 | 59 | def load_npy(self, model_file_norm, vocab_file): 60 | w_norm = np.load(model_file_norm) 61 | with open(vocab_file, 'r') as f: 62 | vocab = pickle.load(f) 63 | return w_norm, vocab 64 | 65 | @staticmethod 66 | def convert(in_model_file=MODEL_FILE_300D, 67 | out_model_file_norm=MODEL_FILE_300D_NORM, 68 | out_vocab_file=VOCAB_FILE): 69 | ''' 70 | Takes a raw model file and saves to disk (a) a vocab file that indexes 71 | the model np.ndarray (w matrix), (b) a normalized model file {str: int} 72 | (vocab). 73 | 74 | Args: 75 | in_model_file (str): Path to original (downloaded) GloVe file. 76 | model_file_norm (str): Path to write norm weights to. 77 | vocab_file (str): Path to write vocab to. 78 | ''' 79 | vocab, vectors = {}, {} 80 | with open(in_model_file, 'r') as f: 81 | i = 0 82 | for line in f: 83 | vals = line.rstrip().split(' ') 84 | word = vals[0] 85 | vocab[word] = i 86 | vectors[word] = [float(x) for x in vals[1:]] 87 | i += 1 88 | vocab_size = len(vocab) 89 | vector_dim = len(vectors['the']) # yay 90 | w = np.zeros((vocab_size, vector_dim)) 91 | for word, v in vectors.iteritems(): 92 | if word == '': 93 | continue 94 | w[vocab[word], :] = v 95 | 96 | # normalize each word vector to unit length (l2 norm) 97 | w_norm = np.zeros(w.shape) 98 | d = (np.sum(w ** 2, 1) ** (0.5)) 99 | w_norm = (w.T / d).T 100 | 101 | # save 102 | np.save(out_model_file_norm, w_norm) 103 | with open(out_vocab_file, 'w') as f: 104 | pickle.dump(vocab, f) 105 | 106 | # NOTE: This left here for your convenience if you decide to adapt this 107 | # code. 108 | # return w_norm, vocab 109 | 110 | def distance(self, target, queries): 111 | ''' 112 | Args: 113 | target str 114 | queries [str] 115 | 116 | Returns: 117 | np.array of length len(queries): distance (from 1 (close) to 0 (I 118 | think) (far)) of each word in queries to target according to w 119 | ''' 120 | res = np.zeros(len(queries)) 121 | if target not in self.vocab: 122 | return res 123 | vec_result = self.w[self.vocab[target], :] # indexes self.w; don't mutate! 124 | vec_norm = np.zeros(vec_result.shape) 125 | d = (np.sum(vec_result ** 2,) ** (0.5)) 126 | vec_norm = (vec_result.T / d).T 127 | dist = np.dot(self.w, vec_norm.T) 128 | 129 | # compute dist for each query 130 | for i, q in enumerate(queries): 131 | res[i] = dist[self.vocab[q]] if q in self.vocab else 0.0 132 | return res 133 | 134 | 135 | if __name__ == '__main__': 136 | Glove.convert() 137 | -------------------------------------------------------------------------------- /src/data_objects_turked.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loading up the (processed) turked *OBJECT PAIR* data. 3 | 4 | author: mbforbes 5 | """ 6 | 7 | # IMPORTS 8 | # ----------------------------------------------------------------------------- 9 | 10 | # stdlib 11 | import logging 12 | import sys 13 | 14 | # 3rd party 15 | import pandas as pd 16 | 17 | 18 | # CONSTANTS 19 | # ----------------------------------------------------------------------------- 20 | 21 | DIR_5 = 'data/verbphysics/objects/train-5/' 22 | DIR_20 = 'data/verbphysics/objects/train-20/' 23 | 24 | # The attr names are inconsistent in my code. This translates from external 25 | # (e.g. AttrGraph) names to internal (turked object pairs) names. 26 | ATTR_TRANSLATION = { 27 | 'hardness': 'strength', 28 | 'verb-speed': 'speed', 29 | } 30 | 31 | # For internal sanity checking: the complete list of attributes. 32 | OUR_ATTRS = ['size', 'weight', 'strength', 'rigidness', 'speed'] 33 | 34 | # obj1 vs obj2, where vs is one of: 35 | LABEL_GREATER = 1 36 | LABEL_EQ = 0 37 | LABEL_LESSER = -1 38 | LABEL_UNK = -42 39 | 40 | PERSON_DATA = 'person' 41 | PERSON_TOKEN = 'PERSON' 42 | 43 | logger = logging.getLogger(__name__) 44 | 45 | 46 | # CLASSES 47 | # ----------------------------------------------------------------------------- 48 | 49 | class DataObjectsTurked(object): 50 | 51 | @staticmethod 52 | def load_raw(partition, attr_raw, agreement_needed, remove_unk=True, directory=DIR_5): 53 | """ 54 | Loads up partition, filtering out those with agreement < 55 | agreement_needed and those with majority of 'unk'. 56 | 57 | Args: 58 | partition (str): 'train'/'dev'/'test' 59 | attr_raw (str): 'size', 'weight', 'hardness' (mapped to 'strength'), 60 | 'rigidness', 'verb-speed' (mapped to 'speed') 61 | agreement_needed (int): 2 or 3 62 | directory (str): directory to load data from. use DIR_20 to use 20% 63 | of data, DIR_5 to use 5% 64 | 65 | Returns: 66 | [[str, str, int]]: [[obj1, obj2, majority label]] 67 | """ 68 | # translation, if needed 69 | attr = attr_raw if attr_raw not in ATTR_TRANSLATION else ATTR_TRANSLATION[attr_raw] 70 | if attr not in OUR_ATTRS: 71 | logger.error('Unknown attribute: "%s"' % (attr)) 72 | sys.exit(1) 73 | 74 | # load, filter, and transform to list 75 | fn = directory + partition + '.csv' 76 | df = pd.read_csv(fn) 77 | filtered = df[(df[attr + '-agree'] >= agreement_needed)] 78 | if remove_unk: 79 | filtered = filtered[(filtered[attr + '-maj'] != LABEL_UNK)] 80 | data = filtered[['obj1', 'obj2', attr + '-maj']] 81 | lst = data.values.tolist() 82 | 83 | # switch our lowercased person token to the original 84 | for l in lst: 85 | for i in [0, 1]: 86 | if l[i] == PERSON_DATA: 87 | l[i] = PERSON_TOKEN 88 | 89 | return lst 90 | 91 | @staticmethod 92 | def load(partition, attr_raw, agreement_needed, greater_pot, eq_pot, lesser_pot, split): 93 | """ 94 | Loads up partition, filtering out those with agreement < 95 | agreement_needed and those with majority of 'unk'. Then changes gold 96 | labels to the provided potentials. 97 | 98 | Args: 99 | partition (str): 'train'/'dev'/'test' 100 | attr_raw (str): 'size', 'weight', 'hardness' (mapped to 'strength'), 101 | 'rigidness', 'verb-speed' (mapped to 'speed') 102 | agreement_needed (int): 2 or 3 103 | greater_pot (np.ndarray: 1 x 3) 104 | eq_pot (np.ndarray: 1 x 3) 105 | lessert_pot (np.ndarray: 1 x 3) 106 | 107 | Returns: 108 | [[str, str, np.ndarray]]: [[obj1, obj2, potential]] 109 | """ 110 | if split == 5: 111 | directory = DIR_5 112 | elif split == 20: 113 | directory = DIR_20 114 | else: 115 | logger.error('Unimplemented split: %r', split) 116 | sys.exit(1) 117 | lst = DataObjectsTurked.load_raw(partition, attr_raw, agreement_needed, True, directory) 118 | 119 | # create our own mini mapping for assigning potentials below 120 | potmap = { 121 | LABEL_GREATER: greater_pot, 122 | LABEL_EQ: eq_pot, 123 | LABEL_LESSER: lesser_pot, 124 | } 125 | 126 | # replace each list's label with the corresponding passed potential 127 | for l in lst: 128 | l[-1] = potmap[l[-1]] 129 | 130 | return lst 131 | -------------------------------------------------------------------------------- /lib/ngramdb/ngramdb/ngramtoken.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from collections import defaultdict, Counter 3 | 4 | 5 | class Ngram(object): 6 | def __init__(self, tokens, 7 | nid=None, freq=None): 8 | self.tokens = tokens 9 | self.headpositions = [t.headposition for t in self.tokens] 10 | 11 | try: 12 | for t in self.tokens: 13 | if t.headposition > -1: 14 | t.head = self.tokens[t.headposition] 15 | t.head.children.append(t) 16 | 17 | else: 18 | t.depth = 0 19 | 20 | for t in self.tokens: 21 | if t.depth is None: 22 | depth = 0 23 | current = t 24 | while current.depth != 0 and depth < len(self.tokens): 25 | depth += 1 26 | current = current.head 27 | 28 | if depth >= len(self.tokens): 29 | raise IndexError 30 | 31 | t.depth = depth 32 | 33 | self.height = max(t.depth for t in self.tokens if t is not None) 34 | 35 | except IndexError: 36 | pass 37 | 38 | self.nid = nid 39 | self.freq = freq 40 | 41 | @property 42 | def postags(self): 43 | return [t.postag for t in self.tokens] 44 | 45 | @property 46 | def deprels(self): 47 | return [t.deprel for t in self.tokens] 48 | 49 | @property 50 | def words(self): 51 | return [t.surface for t in self.tokens] 52 | 53 | def __repr__(self): 54 | kwargs = ["=".join((k, repr(v))) for k, v in self.__dict__.items() 55 | if v is not None and k != "tokens"] 56 | return "Ngram({}, {})".format(self.tokens, ', '.join(kwargs)) 57 | 58 | def __str__(self): 59 | return self.rawstring + " (freq: {})".format(self.freq) 60 | 61 | def __iter__(self): 62 | return iter(self.tokens) 63 | 64 | def __len__(self): 65 | return len(self.tokens) 66 | 67 | def __getitem__(self, key): 68 | return self.tokens[key] 69 | 70 | def __setitem__(self, key, value): 71 | raise TypeError("Can't replace Token in Ngram!") 72 | 73 | @property 74 | def surface(self): 75 | return ' '.join(w for w in self.words) 76 | 77 | @property 78 | def rawstring(self): 79 | return ' '.join(t.rawprint for t in self.tokens) 80 | 81 | 82 | class Token(object): 83 | def __init__(self, surface, 84 | position=None, postag=None, deprel=None, headposition=None, 85 | freq=None): 86 | 87 | self.surface = surface 88 | 89 | self.position = position 90 | 91 | self.postag = postag 92 | self.deprel = deprel 93 | 94 | self.headposition = headposition if headposition != -1 else None 95 | self.head = None 96 | 97 | self.children = [] 98 | 99 | self.depth = None 100 | 101 | self.freq = freq 102 | 103 | def __repr__(self): 104 | kwargs = ["{}={}".format(k, v) for k, v in self.__dict__.items() 105 | if v is not None and k not in ("surface", "head")] 106 | return "Token({}, {})".format(repr(self.surface), ', '.join(kwargs)) 107 | 108 | def __str__(self): 109 | return self.rawprint 110 | 111 | @property 112 | def rawprint(self): 113 | try: 114 | return '{}/{}/{}/{}'.format( 115 | self.surface, self.postag, self.deprel, self.headposition) 116 | except: 117 | return '{}/{}/{}/{}'.format( 118 | self.surface, self.postag, self.deprel, 0) 119 | 120 | 121 | def ngrams_from_tupledict(tuples): 122 | def keyfunc(x): 123 | return x['nid'] 124 | results = [] 125 | 126 | for key, group in itertools.groupby(tuples, keyfunc): 127 | group = list(group) 128 | 129 | tokens = [Token(t['surface'], t['position']-1, t['postag'], 130 | t['deprel'], headposition=t['headposition']-1) 131 | for t in group] 132 | 133 | ngram_freq = None if 'freq' not in group[0] else group[0]['freq'] 134 | 135 | ngram = Ngram(tokens, key, freq=ngram_freq) 136 | 137 | # filter out the TRASH :( 138 | # TODO: why is there TRASH 139 | # if [t.position for t in ngram] == list(range(len(ngram))) \ 140 | # and all(t.headposition < len(ngram) for t in ngram): 141 | results.append(ngram) 142 | 143 | return results 144 | 145 | 146 | def tokens_from_tupledict(tuples, collapse=True): 147 | def keyfunc(x): 148 | return x['surface'] 149 | 150 | results = [] 151 | if collapse: 152 | tuples.sort(key=lambda x: (x['surface'], -x['freq'] 153 | if 'freq' in x else 0)) 154 | for key, group in itertools.groupby(tuples, keyfunc): 155 | g = list(group) 156 | if 'freq' in g[0]: 157 | freq = sum(t['freq'] for t in g) 158 | results.append(Token(key, freq=freq)) 159 | 160 | results.sort(key=lambda t: t.freq if t.freq is not None else 0, 161 | reverse=True) 162 | 163 | else: 164 | for t in tuples: 165 | results.append(Token(t['surface'], t['tid'], t['postag'], 166 | t['deprel'], freq=t['freq'])) 167 | 168 | return results 169 | -------------------------------------------------------------------------------- /lib/ngramdb/ngramdb/constants.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | # myria server and port 4 | REST_URL = "rest.myria.cs.washington.edu" 5 | REST_PORT = 1776 6 | 7 | # relation names in Myria 8 | NGRAM_RELATION = "\"lzilles:ngrams:ngram\"" 9 | TOKEN_RELATION = "\"lzilles:ngrams:token\"" 10 | DEP_RELATION = "\"lzilles:ngrams:dependency\"" 11 | POS_RELATION = "\"lzilles:ngrams:partofspeech\"" 12 | TT_RELATION = "\"lzilles:ngrams:ngram_token_token\"" 13 | 14 | # query limitations 15 | MIN_WORD_LEN = 3 16 | MIN_WORD_COUNT = 1 17 | 18 | # top-level sql query templates 19 | SQL_CONTEXT_TEMPLATE = """SELECT 20 | t.nid, nginfo.freq, t.position, t.surface, pos.postag, dep.deprel, 21 | t.headposition 22 | FROM 23 | "lzilles:ngrams:token" t, 24 | "lzilles:ngrams:dependency" dep, 25 | "lzilles:ngrams:partofspeech" pos, 26 | ({subquery}) AS nginfo 27 | 28 | WHERE 29 | nginfo.nid=t.nid 30 | AND pos.posid=t.posid 31 | AND dep.depid=t.depid 32 | 33 | ORDER BY 34 | nginfo.freq DESC, 35 | (t.nid, t.position) ASC; 36 | """ 37 | 38 | SQL_COUNT_TEMPLATE = """SELECT SUM(nginfo.freq) FROM ({subquery}) AS nginfo;""" 39 | 40 | # json query plan templates 41 | JSON_COUNT_TEMPLATE = json.loads(""" 42 | { 43 | "fragments": [ 44 | { 45 | "operators": [ 46 | { 47 | "opId": 0, 48 | "opType": "DbQueryScan", 49 | 50 | "schema": { 51 | "columnNames": [ 52 | "sum" 53 | ], 54 | "columnTypes": [ 55 | "LONG_TYPE" 56 | ] 57 | } 58 | 59 | }, 60 | { 61 | "opId": 1, 62 | "argChild": 0, 63 | "opType": "CollectProducer" 64 | } 65 | ] 66 | }, 67 | { 68 | "operators": [ 69 | { 70 | "opId": 2, 71 | "opType": "CollectConsumer", 72 | "argOperatorId": 1 73 | }, 74 | { 75 | "opId": 3, 76 | "opType": "Aggregate", 77 | "argChild": 2, 78 | "aggregators": [ 79 | { 80 | "type": "SingleColumn", 81 | "column": 0, 82 | "aggOps": ["SUM"] 83 | } 84 | ] 85 | }, 86 | { 87 | "opId": 4, 88 | "argChild": 3, 89 | "opType": "DbInsert", 90 | "argOverwriteTable": true, 91 | "relationKey": { 92 | "programName": "ngramoutput", 93 | "relationName": "TEMPOUTCOUNT", 94 | "userName": "lzilles" 95 | } 96 | } 97 | ] 98 | } 99 | ], 100 | 101 | "logicalRa": "", 102 | "rawQuery": "[ ngram count test ]", 103 | "language": "sql" 104 | } 105 | """) 106 | 107 | JSON_CONTEXT_TEMPLATE = json.loads(""" 108 | { 109 | "fragments": [ 110 | { 111 | "operators": [ 112 | { 113 | "opId": 0, 114 | "opType": "DbQueryScan", 115 | 116 | "schema": { 117 | "columnNames": [ 118 | "nid", 119 | "freq", 120 | "position", 121 | "surface", 122 | "postag", 123 | "deprel", 124 | "headposition" 125 | ], 126 | "columnTypes": [ 127 | "LONG_TYPE", 128 | "INT_TYPE", 129 | "INT_TYPE", 130 | "STRING_TYPE", 131 | "STRING_TYPE", 132 | "STRING_TYPE", 133 | "INT_TYPE" 134 | ] 135 | } 136 | 137 | }, 138 | { 139 | "opId": 1, 140 | "argChild": 0, 141 | "opType": "CollectProducer" 142 | } 143 | ] 144 | }, 145 | { 146 | "operators": [ 147 | { 148 | "opId": 2, 149 | "opType": "CollectConsumer", 150 | "argOperatorId": 1 151 | }, 152 | { 153 | "opId": 3, 154 | "opType": "InMemoryOrderBy", 155 | "opName": "InMemSort(results)", 156 | "argChild": 2, 157 | "argSortColumns": [ 158 | 1, 159 | 0, 160 | 2 161 | ], 162 | "argAscending": [ 163 | false, 164 | true, 165 | true 166 | ] 167 | }, 168 | { 169 | "opId": 4, 170 | "argChild": 3, 171 | "opType": "DbInsert", 172 | "argOverwriteTable": true, 173 | "relationKey": { 174 | "programName": "ngramoutput", 175 | "relationName": "TEMPOUT", 176 | "userName": "lzilles" 177 | } 178 | } 179 | ] 180 | } 181 | ], 182 | 183 | "logicalRa": "", 184 | "rawQuery": "[ ngram test ]", 185 | "language": "sql" 186 | } 187 | """) 188 | -------------------------------------------------------------------------------- /src/trained_factors.py: -------------------------------------------------------------------------------- 1 | """ 2 | Factors whose potentials are trained (on the training data) (duh). 3 | 4 | author: mbforbes 5 | """ 6 | 7 | # IMPORTS 8 | # ----------------------------------------------------------------------------- 9 | 10 | # stdlib 11 | import code # code.interact(local=dict(globals(), **locals())) 12 | from collections import Counter 13 | import sys 14 | 15 | # 3rd party 16 | import numpy as np 17 | import pandas as pd 18 | 19 | # local 20 | import data_turked as td 21 | import data_objects_turked as dot 22 | 23 | 24 | # CLASSES 25 | # ----------------------------------------------------------------------------- 26 | 27 | class UnaryFrameEmbedding(object): 28 | 29 | def __init__(self, framesplit): 30 | """ 31 | Args: 32 | framesplit (int) 33 | """ 34 | if framesplit == 5: 35 | filename = 'data/emb/frames-train5.csv' 36 | elif framesplit == 20: 37 | filename = 'data/emb/frames-train20.csv' 38 | else: 39 | print 'ERROR: Unknown frame split %r' % (framesplit) 40 | sys.exit(1) 41 | 42 | self.df = pd.read_csv(filename) 43 | 44 | def get(self, attr, framestr): 45 | """ 46 | Args: 47 | attr (str) 48 | framestr (str) 49 | 50 | Returns: 51 | np.ndarray of shape (3,) representing 52 | [p(>), p(<), p(=)] 53 | """ 54 | row = self.df[(self.df['attr'] == attr) & (self.df['framestr'] == framestr)] 55 | return row[['prob_greater', 'prob_lesser', 'prob_eq']].get_values().flatten() 56 | 57 | 58 | class UnaryObjpairEmbedding(object): 59 | 60 | def __init__(self, objpairsplit): 61 | """ 62 | Args: 63 | objpairsplit (int) 64 | """ 65 | if objpairsplit == 5: 66 | filename = 'data/emb/objpairs-train5.csv' 67 | elif objpairsplit == 20: 68 | filename = 'data/emb/objpairs-train20.csv' 69 | else: 70 | print 'ERROR: Unknown objpair split %r' % (objpairsplit) 71 | sys.exit(1) 72 | 73 | self.df = pd.read_csv(filename) 74 | 75 | def get(self, attr, obj1, obj2): 76 | """ 77 | Args: 78 | attr (str) 79 | obj1 (str) 80 | obj2 (str) 81 | 82 | Returns: 83 | np.ndarray of shape (3,) representing 84 | [p(>), p(<), p(=)] 85 | """ 86 | row = self.df[ 87 | (self.df['attr'] == attr) & 88 | (self.df['obj1'] == obj1) & 89 | (self.df['obj2'] == obj2)] 90 | return row[['prob_greater', 'prob_lesser', 'prob_eq']].get_values().flatten() 91 | 92 | 93 | class SelPrefEmbedding(object): 94 | 95 | def __init__(self, filename): 96 | self.df = pd.read_csv(filename) 97 | 98 | def get(self, attr, frame, obj1, obj2): 99 | """ 100 | Args: 101 | attr (str) 102 | frame (str) 103 | obj1 (str) 104 | obj2 (str) 105 | 106 | Returns: 107 | np.ndarray of shape (3,3) representing 108 | objp > objp = objp < 109 | frame > [[ p p p] 110 | frame = [ p p p] 111 | frame < [ p p p]] 112 | """ 113 | row = self.df[ 114 | (self.df['attr'] == attr) & 115 | (self.df['frame'] == frame) & 116 | (self.df['obj1'] == obj1) & 117 | (self.df['obj2'] == obj2)] 118 | try: 119 | return row[['gg', 'ge', 'gl', 'eg', 'ee', 'el', 'lg', 'le', 'll']].get_values().flatten().reshape((3,3)) 120 | except: 121 | code.interact(local=dict(globals(), **locals())) 122 | 123 | 124 | class SelPrefMLE(object): 125 | 126 | def __init__(self, pmi): 127 | """ 128 | Args: 129 | pmi (ngramdb.PMI) 130 | """ 131 | self.pmi = pmi 132 | 133 | def get(self, attr, frame_agreement_needed, objpair_agreement_needed, 134 | pmi_cutoff, objsplit): 135 | """ 136 | Args: 137 | attr (str) 138 | frame_agreement_needed (int) 139 | objpair_agreement_needed (int) 140 | pmi_cutoff (float) 141 | objsplit (int) 142 | 143 | Returns: 144 | np.ndarray (3,3) selectional preference potential (frame, objpair) 145 | for attr 146 | """ 147 | if objsplit == 5: 148 | objdir = dot.DIR_5 149 | elif objsplit == 20: 150 | objdir = dot.DIR_20 151 | else: 152 | print 'ERROR: Unimplemented split: %r' % (split) 153 | sys.exit(1) 154 | 155 | frames_expanded = td.TurkedData.load_raw( 156 | 'train', attr, frame_agreement_needed) 157 | # pull off just v_s_p str and gold label 158 | frames = [(fe[4], fe[2]) for fe in frames_expanded] 159 | objpairs = dot.DataObjectsTurked.load_raw( 160 | 'train', attr, objpair_agreement_needed, True, objdir) 161 | 162 | # counts maps frame gold -> objpair gold. init'ing now rather than 163 | # checking for missing later. 164 | counts = { 165 | td.LABEL_GREATER: Counter(), 166 | td.LABEL_LESSER: Counter(), 167 | td.LABEL_EQ: Counter(), 168 | } 169 | for f in frames: 170 | framestr, frame_gold = f 171 | for o in objpairs: 172 | obj1, obj2, objpair_gold = o 173 | 174 | # get PMI. only count if >= cutoff 175 | pmi_score = self.pmi.query(framestr, (obj1, obj2)) 176 | if pmi_score >= pmi_cutoff: 177 | counts[frame_gold][objpair_gold] += 1 178 | 179 | flat = np.array([ 180 | float(counts[td.LABEL_GREATER][dot.LABEL_GREATER]), 181 | float(counts[td.LABEL_GREATER][dot.LABEL_LESSER]), 182 | float(counts[td.LABEL_GREATER][dot.LABEL_EQ]), 183 | float(counts[td.LABEL_LESSER][dot.LABEL_GREATER]), 184 | float(counts[td.LABEL_LESSER][dot.LABEL_LESSER]), 185 | float(counts[td.LABEL_LESSER][dot.LABEL_EQ]), 186 | float(counts[td.LABEL_EQ][dot.LABEL_GREATER]), 187 | float(counts[td.LABEL_EQ][dot.LABEL_LESSER]), 188 | float(counts[td.LABEL_EQ][dot.LABEL_EQ]), 189 | ]) 190 | 191 | # per-row norm (i.e. marginal) 192 | res = flat.reshape((3,3)) 193 | for i in range(res.shape[0]): 194 | res[i, :] /= sum(res[i, :]) 195 | 196 | return res 197 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: verbphysics 3 | tagline: Maxwell Forbes & Yejin Choi — ACL 2017 4 | --- 5 | 6 | # About 7 | 8 | The **Verb Physics** project explores how our choice of verbs entails relations 9 | between the physical properties of the objects we talk about. 10 | 11 | > Mary threw _____. 12 | 13 | _Whatever Mary threw (a ball? a rock?) is probably smaller and weighs less than 14 | her_ 15 | 16 | > Ricardo walked into _____. 17 | 18 | _Whatever Ricardo walked into (the library? his office?) is probably larger 19 | than him_ 20 | 21 | # Demo 22 | 23 | Explore an interactive visualization of our factor graph model on the Verb 24 | Physics dataset. Click and drag on components of the factor graph to move them 25 | around. 26 | 27 |
28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 |

36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
45 |

Type below to select an action frame to visualize. 46 | All action frames names start with one of the five attributes: "size," 47 | "weight," "strength," "rigidness," or "speed."

48 | 50 | 51 |
52 |

Completions (live) (clickable):

53 |

54 |
55 | 56 | ## Explanation 57 | 58 | The interactive diagram draws a small piece of the factor graph that is focused 59 | on the selected action frame. The colors correspond to the model's decisions 60 | about each random variable. Red indicates a decision 61 | that a random variable should take the value `>`, blue represents `<`, and grey represents 63 | `=`. (Grey is uncommon). 64 | 65 | These decisions have different meanings depending on what the random variable 66 | represents. There are two different types of random variables: 67 | 68 | 1. **Object pairs** - If a random variable represents two objects—for example, 69 | `person_vs_house`—then the decision for that random variable represents the 70 | model's choice about the relation of those two objects along the given 71 | attribute. For example, if we are looking at an action frame for `size`, 72 | then we would expect `person_vs_house` to take the value `<`, because people 73 | are generally smaller than houses. 74 | 75 | 2. **Action frames** — If a random variable represents an action frame—for 76 | example, `threw_d`—then the decisions for that random variable represents 77 | the model's choice about the relation of two objects that would fit in that 78 | action frame. For example, if we are looking at an action frame for `size`, 79 | then we would expect `threw_d` (which represents ` threw `; see 80 | below for more details) to take the value `>`, because people are generally 81 | larger in size than the objects that they throw. 82 | 83 | ## Action frame names 84 | 85 | The format for the action frame names is: 86 | 87 | ``` 88 | -_[_] 89 | ``` 90 | 91 | The possible attributes are: `size`, `weight`, `strength`, `rigidness`, `speed`. 92 | 93 | There are five possible action frame constructions. Each corresponds to a 94 | syntactic template. 95 | 96 | Construction | Syntax template | Example | Example sentence 97 | --- | --- | --- | --- 98 | **`d`** | ` ` | `threw_d` | "I threw the rock." 99 | **`od`** | ` ` | `hit_od` | "The tape hit the ground." 100 | **`p`** | ` ` | `threw_p_out` | "I threw out the trash." 101 | **`op`** | ` ` | `landed_op_in` | "The trash landed in the bin." 102 | **`dp`** | ` ` | `threw_dp_into` | "I threw the trash into the bin." 103 | 104 | # Abstract 105 | 106 | Learning commonsense knowledge from natural language text is nontrivial due to 107 | reporting bias: people rarely state the obvious, e.g., "My house is bigger than 108 | me." However, while rarely stated explicitly, this trivial everyday knowledge 109 | does influence the way people talk about the world, which provides indirect 110 | clues to reason about the world. For example, a statement like, "Tyler entered 111 | his house" implies that his house is bigger than Tyler. 112 | 113 | In this paper, we present an approach to infer relative physical knowledge of 114 | actions and objects along five dimensions (e.g., size, weight, and strength) 115 | from unstructured natural language text. We frame knowledge acquisition as joint 116 | inference over two closely related problems: learning (1) relative physical 117 | knowledge of object pairs and (2) physical implications of actions when applied 118 | to those object pairs. Empirical results demonstrate that it is possible to 119 | extract knowledge of actions and objects from language and that joint inference 120 | over different types of knowledge improves performance. 121 | 122 | # Authors 123 | 124 |
125 | 126 | A picture of Maxwell Forbes 127 | 128 |

Maxwell Forbes

129 |
130 | 131 |
132 | 133 | A picture of Yejin Choi 134 | 135 |

Yejin Choi

136 |
137 | 138 | # Paper 139 | 140 | The paper is available on [arXiv](https://arxiv.org/abs/1706.03799). 141 | 142 | [![a thumbnail rendering of the ACL 2017 verb physics paper](thumb-all-resized.png)](https://arxiv.org/abs/1706.03799) 143 | 144 | # Bibtex 145 | 146 | ``` 147 | @inproceedings{forbes2017verb, 148 | title = {Verb Physics: Relative Physical Knowledge of Actions and Objects}, 149 | author = {Maxwell Forbes and Yejin Choi}, 150 | booktitle = {ACL}, 151 | year = {2017} 152 | } 153 | ``` 154 | 155 | # Data 156 | 157 | The data is available in the [`verbphysics` GitHub repository under 158 | `data/`](https://github.com/uwnlp/verbphysics/tree/master/data). 159 | 160 | See the repository [README](https://github.com/uwnlp/verbphysics#data) for more 161 | information on the data splits and task setup. 162 | 163 | # Code 164 | 165 | Visit the [`verbphysics` GitHub 166 | repository](https://github.com/uwnlp/verbphysics) for our reference 167 | implementation and instructions for running our code. 168 | 169 | It is released under the permissive MIT license. 170 | 171 | ## Thanks 172 | 173 | - to [Hannah Rashkin](https://homes.cs.washington.edu/~hrashkin/) for 174 | inspiration with her [connotation frames 175 | visualizer](https://homes.cs.washington.edu/~hrashkin/connframe_vis.php) 176 | 177 | - to the [Stanford Vision Lab](http://vision.stanford.edu/) for inspiration 178 | with good project webpage designs ([example](http://cs.stanford.edu/people/ranjaykrishna/im2p/index.html)) 179 | -------------------------------------------------------------------------------- /data/verbphysics/objects/train-5/train.csv: -------------------------------------------------------------------------------- 1 | ,obj1,obj2,size-agree,size-maj,weight-agree,weight-maj,strength-agree,strength-maj,rigidness-agree,rigidness-maj,speed-agree,speed-maj 2 | 0,person,dress,1,-42,3,1,3,1,1,-42,1,-42 3 | 1,person,step,3,1,3,1,3,-1,2,-1,3,1 4 | 2,body,mouth,3,1,3,1,3,1,2,1,2,-1 5 | 3,sun,coal,3,1,3,1,2,1,1,-42,2,1 6 | 4,vessel,something,3,-42,3,-42,3,-42,3,-42,3,-42 7 | 5,place,farm,3,-42,3,-42,2,-42,3,-42,2,0 8 | 6,master,dress,3,1,3,1,3,1,3,1,3,1 9 | 7,ground,body,3,1,3,1,3,1,3,1,3,-1 10 | 8,ash,mouth,3,-1,3,-1,3,-1,2,-1,2,-1 11 | 9,gentleman,knife,3,1,3,1,2,-1,2,-1,2,1 12 | 10,train,face,3,1,3,1,3,1,3,1,3,1 13 | 11,friend,mouth,3,1,3,1,3,1,2,1,2,0 14 | 12,energy,sun,2,-42,2,-1,1,-42,2,-42,2,-42 15 | 13,father,basin,3,1,2,1,2,1,2,-1,3,1 16 | 14,bag,gate,3,-1,3,-1,3,-1,3,-1,2,-42 17 | 15,brother,book,3,1,3,1,3,1,3,-1,3,1 18 | 16,way,road,2,-42,3,-42,2,0,2,0,2,0 19 | 17,back,something,2,-42,2,-42,2,-42,2,-42,2,-42 20 | 18,lady,car,3,-1,3,-1,3,-1,3,-1,3,-1 21 | 19,dinner,daughter,3,-1,3,-1,3,-1,1,-42,3,-1 22 | 20,person,lad,3,0,3,0,3,0,3,0,3,0 23 | 21,fist,hand,2,-1,3,0,2,0,2,0,2,0 24 | 22,ground,room,1,-42,2,1,2,1,1,-42,2,-42 25 | 23,child,doorway,3,-1,2,-1,2,-1,3,-1,3,1 26 | 24,victim,face,3,1,3,1,2,1,3,0,2,0 27 | 25,rain,light,1,-42,1,-42,2,-42,1,-42,3,-1 28 | 26,horse,coal,3,1,3,1,2,1,2,-1,3,1 29 | 27,poet,door,2,-1,2,1,1,-42,2,-1,2,-42 30 | 28,brother,ball,3,1,3,1,3,1,3,-1,2,-1 31 | 29,lady,direction,3,-42,2,-42,2,-42,2,-42,2,-42 32 | 30,house,sea,3,-1,2,-1,3,-42,2,-42,2,-1 33 | 31,coach,arm,3,1,3,1,3,1,2,0,2,0 34 | 32,lady,object,3,-42,3,-42,3,-42,3,-42,3,-42 35 | 33,something,hand,2,1,2,1,2,-42,2,-42,2,-42 36 | 34,father,seal,1,-42,1,-42,1,-42,2,-42,2,-42 37 | 35,edition,place,3,-42,3,-42,3,-42,3,-42,3,-42 38 | 36,room,wife,2,1,2,1,2,1,2,1,2,-42 39 | 37,messenger,camp,2,-1,2,-1,2,-42,2,-1,2,1 40 | 38,window,floor,3,-1,3,-1,3,-1,2,0,3,0 41 | 39,place,hand,3,1,2,1,2,1,2,1,3,-1 42 | 40,door,floor,3,-1,3,-1,2,0,3,0,2,1 43 | 41,bay,boat,3,1,2,-42,1,-42,1,-42,3,-1 44 | 42,food,way,3,-42,3,-42,3,-42,3,-42,3,-42 45 | 43,hat,back,3,-1,3,-1,3,-1,3,-1,2,-1 46 | 44,someone,dinner,3,1,3,1,2,1,1,-42,3,1 47 | 45,someone,fool,2,-42,1,-42,2,-42,2,-42,2,-42 48 | 46,stone,hand,1,-42,2,1,3,1,3,1,2,-1 49 | 47,ice,head,2,-42,1,-42,2,1,2,1,2,-42 50 | 48,coach,hat,3,1,3,1,3,1,3,1,3,1 51 | 49,ear,something,3,-42,3,-42,3,-42,3,-42,3,-42 52 | 50,someone,boy,2,-42,2,-42,2,-42,2,0,1,-42 53 | 51,stone,bed,2,-1,2,-1,2,1,3,1,1,-42 54 | 52,person,daughter,2,1,2,1,2,1,1,-42,2,1 55 | 53,person,barn,3,-1,3,-1,3,-1,3,-1,3,1 56 | 54,sun,tree,3,1,3,1,2,1,2,-42,2,1 57 | 55,door,light,2,1,3,1,3,1,2,1,2,-1 58 | 56,ball,mouth,1,-42,1,-42,1,-42,2,1,1,-42 59 | 57,child,picture,3,1,3,1,3,1,3,-1,3,1 60 | 58,brother,hand,3,1,3,1,3,1,2,0,3,0 61 | 59,back,air,2,-42,2,1,2,1,2,1,2,-42 62 | 60,gentleman,ball,2,1,2,1,2,1,2,1,1,-42 63 | 61,window,end,2,-42,2,-42,2,-42,2,-42,2,-42 64 | 62,step,road,3,-1,3,-1,3,-1,2,-1,2,1 65 | 63,result,element,2,-42,1,-42,1,-42,2,-42,2,-42 66 | 64,parent,child,3,1,3,1,3,1,2,0,2,0 67 | 65,sun,sail,3,1,3,1,2,1,2,-42,2,1 68 | 66,river,breath,2,1,3,1,3,1,2,1,3,1 69 | 67,vessel,anchor,3,1,3,1,2,1,2,0,2,1 70 | 68,friend,newspaper,3,1,3,1,3,1,3,1,3,1 71 | 69,everything,master,3,1,3,1,2,1,2,-42,2,-42 72 | 70,coast,place,2,1,2,1,2,-42,2,-42,1,-42 73 | 71,state,way,2,-42,2,-42,2,-42,2,-42,3,-42 74 | 72,anchor,mouth,2,1,2,1,2,1,2,1,3,-42 75 | 73,hair,room,3,-1,3,-1,3,-1,3,-1,1,-42 76 | 74,sea,sail,3,1,3,1,2,1,2,-1,2,1 77 | 75,temple,something,3,-42,3,-42,3,-42,3,-42,3,-42 78 | 76,system,end,3,-42,3,-42,3,-42,3,-42,3,-42 79 | 77,stone,way,2,-42,2,-42,2,-42,2,-42,2,-42 80 | 78,sun,ear,3,1,3,1,3,1,2,1,2,1 81 | 79,anything,end,2,-42,3,-42,3,-42,3,-42,2,-42 82 | 80,father,truck,3,-1,3,-1,3,-1,3,-1,3,-1 83 | 81,head,ball,2,0,3,1,2,1,1,-42,3,-1 84 | 82,hip,hand,3,1,3,1,2,1,1,-42,2,-1 85 | 83,body,direction,2,-42,2,-42,2,-42,2,-42,2,-42 86 | 84,king,camp,3,-1,3,-1,2,-1,1,-42,3,1 87 | 85,bag,way,3,-42,3,-42,3,-42,3,-42,3,-42 88 | 86,person,wife,3,0,3,0,3,0,3,0,3,0 89 | 87,hair,floor,3,-1,3,-1,3,-1,3,-1,2,1 90 | 88,ball,light,1,-42,2,1,2,1,1,-42,3,-1 91 | 89,heaven,face,2,1,2,-42,2,-42,2,-42,2,-1 92 | 90,knife,throat,1,-42,2,-1,2,1,2,1,2,-42 93 | 91,someone,light,1,-42,3,1,2,1,2,-1,2,-1 94 | 92,chair,window,2,0,2,1,3,1,2,0,3,0 95 | 93,person,fox,3,1,3,1,2,1,2,0,3,-1 96 | 94,sea,middle,2,-42,3,-42,3,-42,3,-42,3,-42 97 | 95,messenger,master,1,-42,1,-42,1,-42,2,0,1,-42 98 | 96,system,something,3,-42,3,-42,3,-42,3,-42,3,-42 99 | 97,shirt,hand,3,1,2,-1,2,-1,1,-42,1,-42 100 | 98,person,ice,3,1,3,1,3,1,2,-1,2,1 101 | 99,step,flood,2,-1,2,-42,2,-42,2,-42,2,-42 102 | 100,daughter,call,2,-42,2,1,2,-42,2,-42,3,-42 103 | 101,eye,fist,3,-1,3,-1,3,-1,2,-1,2,-42 104 | 102,house,hill,3,-1,3,-1,3,-1,2,-1,2,0 105 | 103,stream,hand,3,1,2,1,2,-42,2,-1,3,1 106 | 104,current,shore,2,1,1,-42,2,1,2,-1,3,1 107 | 105,sea,call,2,-42,2,-42,2,-42,2,-42,2,-42 108 | 106,ship,hand,3,1,3,1,3,1,3,1,1,-42 109 | 107,child,glass,2,1,2,1,1,-42,2,-1,2,1 110 | 108,way,end,3,-42,3,-42,3,-42,3,-42,3,-42 111 | 109,lady,eye,2,1,2,1,2,1,2,1,1,-42 112 | 110,house,back,1,-42,1,-42,1,-42,1,-42,2,-1 113 | 111,fist,mouth,2,0,1,-42,2,1,2,1,1,-42 114 | 112,door,wife,2,1,1,-42,1,-42,2,1,2,-1 115 | 113,bay,way,3,-42,3,-42,3,-42,3,-42,3,-42 116 | 114,object,hand,3,-42,3,-42,3,-42,3,-42,3,-42 117 | 115,flood,end,3,-42,3,-42,3,-42,3,-42,3,-42 118 | 116,eye,direction,3,-42,3,-42,3,-42,2,-42,2,-42 119 | 117,river,boat,3,1,3,1,2,1,2,-1,2,1 120 | 118,brother,coal,3,1,2,1,3,1,2,-1,3,1 121 | 119,victim,house,3,-1,3,-1,2,-1,2,-1,3,1 122 | 120,brother,clothes,2,1,3,1,3,1,3,1,2,0 123 | 121,child,purse,3,1,3,1,3,1,2,-1,3,1 124 | 122,bank,flood,3,-42,3,-42,2,-42,2,-42,2,-1 125 | 123,house,farm,3,-1,3,-1,2,-1,1,-42,3,0 126 | 124,side,current,3,-42,3,-42,3,-42,2,-42,2,-42 127 | 125,gentleman,book,3,1,3,1,3,1,3,-1,3,1 128 | 126,ground,king,3,1,3,1,3,1,3,1,3,-1 129 | 127,father,world,3,-1,3,-1,3,-1,3,-1,2,1 130 | 128,wall,hand,3,1,3,1,3,1,3,1,3,-1 131 | 129,grass,hand,3,-1,3,-1,3,-1,2,1,3,-1 132 | 130,bank,suit,3,1,3,1,3,1,3,1,1,-42 133 | 131,patient,glass,2,1,3,1,1,-42,2,-1,2,1 134 | 132,gentleman,train,3,-1,3,-1,3,-1,3,-1,3,-1 135 | 133,meal,piece,3,1,3,1,2,-42,2,-42,2,-42 136 | 134,sun,breath,3,1,3,1,3,1,1,-42,1,-42 137 | 135,everything,child,3,1,3,1,3,1,2,1,2,1 138 | 136,hat,response,2,-42,2,1,2,-42,1,-42,3,-1 139 | 137,torrent,mountain,2,-1,2,-1,1,-42,2,-42,3,1 140 | 138,boy,farm,3,-1,2,-1,3,-1,2,-1,3,1 141 | 139,office,picture,3,1,3,1,1,-42,1,-42,3,0 142 | 140,gentleman,stream,3,-1,3,-1,1,-42,3,1,2,-1 143 | 141,house,barn,1,-42,1,-42,2,0,2,0,2,0 144 | 142,bag,everything,2,-42,2,-42,2,-42,2,-42,2,-42 145 | 143,coach,bank,2,-1,2,-1,2,-1,2,-1,2,1 146 | 144,gentleman,eye,3,1,3,1,3,1,2,-1,2,1 147 | 145,person,ship,3,-1,3,-1,3,-1,3,-1,2,-1 148 | 146,someone,eye,2,1,2,1,2,1,1,-42,1,-42 149 | 147,father,light,2,-42,3,1,2,1,3,1,2,-1 150 | 148,river,sun,3,-1,3,-1,3,-1,2,-1,2,-42 151 | 149,sun,head,3,1,3,1,3,1,2,1,2,1 152 | 150,someone,piece,2,-42,2,-42,3,-42,3,-42,3,-42 153 | 151,gentleman,room,3,-1,3,-1,3,-1,3,-1,3,1 154 | 152,lady,stream,3,-1,2,-1,2,1,2,1,3,-1 155 | 153,foot,wall,3,-1,3,-1,3,-1,3,-1,3,1 156 | 154,breath,soul,2,-42,2,0,1,-42,2,0,1,-42 157 | 155,daughter,anything,2,-42,2,-42,3,-42,2,-42,3,-42 158 | 156,back,room,2,-42,2,-42,2,-42,2,-42,2,-42 159 | 157,scene,room,2,-42,2,-42,2,-42,2,-42,2,-42 160 | 158,hair,effect,3,-42,2,-42,2,-42,3,-42,2,-42 161 | 159,king,effect,1,-42,2,-42,2,-42,2,-42,2,-42 162 | 160,car,hand,3,1,3,1,3,1,3,1,3,1 163 | 161,town,picture,3,1,3,1,3,1,2,1,2,-42 164 | 162,lady,picture,3,1,3,1,3,1,2,-1,3,1 165 | 163,window,air,2,-1,3,1,2,1,3,1,3,-1 166 | 164,piano,suit,3,1,3,1,3,1,2,1,2,-42 167 | 165,father,bag,3,1,3,1,3,1,2,1,3,1 168 | 166,exile,end,2,-42,2,-42,2,-42,2,-42,2,-42 169 | 167,house,picture,3,1,3,1,3,1,3,1,2,-1 170 | 168,office,air,2,-1,2,1,2,-42,1,-42,2,-1 171 | 169,skirt,knee,1,-42,2,-1,1,-42,2,-1,2,-1 172 | 170,body,room,3,-1,2,-1,2,-1,3,-1,3,1 173 | 171,someone,child,3,1,3,1,3,1,2,0,1,-42 174 | 172,lady,hand,2,1,2,1,2,0,1,-42,1,-42 175 | 173,person,elbow,3,1,3,1,2,1,2,0,2,0 176 | 174,river,current,3,1,2,1,2,-1,2,0,2,-1 177 | 175,head,light,2,-1,3,1,3,1,3,1,2,-1 178 | 176,fox,goose,2,1,2,1,2,1,3,0,2,1 179 | 177,person,deck,3,-1,3,-1,3,-1,3,-1,3,1 180 | 178,boy,something,3,-42,3,-42,3,-42,3,-42,3,-42 181 | 179,phone,room,3,-1,3,-1,2,-1,2,-1,1,-42 182 | 180,call,way,2,-42,2,-42,2,-42,2,-42,2,-42 183 | 181,boy,face,3,1,3,1,3,1,1,-42,2,0 184 | 182,energy,hand,2,-42,2,-1,1,-42,2,-1,2,1 185 | -------------------------------------------------------------------------------- /src/data_turked.py: -------------------------------------------------------------------------------- 1 | """ 2 | data_turked is for turked gold annotations of *FRAMES*. Often imported as 'td' 3 | as in 'TurkedData'. 4 | 5 | author: mbforbes 6 | """ 7 | 8 | # IMPORTS 9 | # ----------------------------------------------------------------------------- 10 | 11 | # builtins 12 | import logging 13 | import os 14 | 15 | # 3rd party 16 | import pandas as pd 17 | 18 | 19 | # CONSTANTS 20 | # ----------------------------------------------------------------------------- 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | LABEL_GREATER = 1 25 | LABEL_EQ = 0 26 | LABEL_LESSER = -1 27 | LABEL_UNK = -42 28 | 29 | # The processed file with frame data. 30 | PROCESSED_FILE = 'data/verbphysics/action-frames/action-frames.csv' 31 | 32 | # Directories where we saved train/dev/test splits. 33 | SPLIT_DIR_5 = 'data/verbphysics/action-frames/train-5/' 34 | SPLIT_DIR_20 = 'data/verbphysics/action-frames/train-20/' 35 | 36 | 37 | # CLASSES 38 | # ----------------------------------------------------------------------------- 39 | 40 | class TurkedData(object): 41 | """ 42 | Methods for loading / working with the turked data format, which is a dict 43 | of the form (henceforth known as TurkedDict): 44 | 45 | { 46 | 'size': [ 47 | (verb_sub[_prep]_1, np.ndarray), 48 | (verb_sub[_prep]_2, np.ndarray), 49 | ... 50 | ], 51 | 'weight': [ 52 | ... 53 | ], 54 | ... 55 | } 56 | """ 57 | 58 | @staticmethod 59 | def load(fn, agreement_needed, bigger_pot, smaller_pot, eq_pot): 60 | """ 61 | Loads Turked data using specified settings. 62 | 63 | NOTE(mbforbes): Could do different potentials for stronger agreement. 64 | 65 | Args: 66 | fn (str): Pandas converted CSV file. To generate this, run 67 | `notebooks/verb_process.ipynb`. 68 | 69 | agreement_needed (int): number out of 3 of agreement needed before 70 | using a turked data point 71 | 72 | bigger_pot (np.ndarray of 1x3) Potentials for data with "bigger" GT. 73 | 74 | smaller_pot (np.ndarray of 1x3) Potentials for data with "smaller" 75 | GT. 76 | 77 | eq_pot (np.ndarray of 1x3) Potentials for data with "equal" GT. 78 | 79 | Returns: 80 | TurkedDict 81 | 82 | """ 83 | label_pot_map = { 84 | LABEL_GREATER: bigger_pot, 85 | LABEL_EQ: eq_pot, 86 | LABEL_LESSER: smaller_pot, 87 | } 88 | 89 | # select attributes to load 90 | attrs = ['size', 'weight', 'verb-speed', 'hardness', 'rigidness'] 91 | 92 | # load up 93 | df = pd.read_csv(fn) 94 | 95 | res = {} 96 | for attr in attrs: 97 | col_ag = attr + '-agree' 98 | col_maj = attr + '-maj' 99 | tuples = [] 100 | 101 | # Pick only rows that agree on a non-UNK result. 102 | data = df[(df[col_ag] >= agreement_needed) & (df[col_maj] != LABEL_UNK)] 103 | 104 | # NOTE(mbforbes): We could have two variants of the potentials, one 105 | # for unanimous agreement, and a less strong one for 2/3 agreement. 106 | # This uses one for all. 107 | for _, row in data.iterrows(): 108 | v, s, p = row['verb'], row['sub'], row['prep'] 109 | name = TurkedData.vsp_to_str(v, s, p) 110 | pot = label_pot_map[row[col_maj]] 111 | tuples += [(name, pot)] 112 | 113 | # Save this attribute's tuples 114 | res[attr] = tuples 115 | return res 116 | 117 | @staticmethod 118 | def load_raw(partition, attr, agreement_needed, fn=PROCESSED_FILE, split_dir=SPLIT_DIR_5): 119 | """ 120 | Loads up `attr` data of `partition` of csv file `fn`, filtering out 121 | those with agreement < agreement_needed and those with majority of 'unk'. 122 | 123 | Returns list where each item is: 124 | 125 | (verb, preposition|None, gold_label, one-hot vector of frame type, v_s_p) 126 | 127 | Where gold_label is one of: 128 | 129 | LABEL_GREATER (1) 130 | LABEL_EQ (0) 131 | LABEL_LESSER (-1) 132 | 133 | Frame type is a 5-d one-hot vector of frame type: 134 | 135 | [_d, _p, _od, _op, _dp] 136 | 137 | And v_s_p is the string representation of the full frame, i.e., 138 | 139 | verb_sub_preposition 140 | 141 | Args: 142 | partition (str) one of {'train', 'dev', 'test'} 143 | attr (str) 144 | agreement_needed (int) 145 | fn (str) location of the processed csv data file 146 | d (str, default=SPLIT_DIR_5) directory where we find train / dev / 147 | test verb splits 148 | 149 | Returns: 150 | [(str, str|None, int, [int], str)] 151 | """ 152 | # load up verbs in partition list 153 | with open(os.path.join(split_dir, partition + '.txt')) as f: 154 | verbs = set([line.strip() for line in f.readlines()]) 155 | 156 | # load full data 157 | df = pd.read_csv(fn) 158 | 159 | # filter agreement_needed for attr (and out maj unks). Wanted to filter 160 | # verbs here but can't broadcast the 'in set' operation, I guess. 161 | filtered = df[ 162 | (df[attr + '-agree'] >= agreement_needed) & 163 | (df[attr + '-maj'] != LABEL_UNK)] 164 | 165 | # sub -> one hot vector. This could be represented more concisely (e.g., 166 | # the index to one-hot), but this is clearer to look at. 167 | sub_to_onehot = { 168 | '_d': [1, 0, 0, 0, 0], 169 | '_p': [0, 1, 0, 0, 0], 170 | '_od': [0, 0, 1, 0, 0], 171 | '_op': [0, 0, 0, 1, 0], 172 | '_dp': [0, 0, 0, 0, 1], 173 | } 174 | 175 | # create results, filtering out verbs not in partition 176 | res = [] 177 | for _, row in filtered.iterrows(): 178 | if row['verb'] not in verbs: 179 | continue 180 | res.append(( 181 | row['verb'], 182 | row['prep'] if not pd.isnull(row['prep']) else None, 183 | row[attr + '-maj'], 184 | sub_to_onehot[row['sub']], 185 | TurkedData.vsp_to_str(row['verb'], row['sub'], row['prep']), 186 | )) 187 | return res 188 | 189 | @staticmethod 190 | def n_verbs(data): 191 | """ 192 | Counts the number of unique verbs in data. 193 | 194 | Args: 195 | data (single attribute list (element) of TurkedDict) 196 | 197 | Returns: 198 | int 199 | """ 200 | verbs = [] 201 | for d in data: 202 | node = d[0] 203 | v, _, _ = TurkedData.str_to_vsp(node) 204 | verbs += [v] 205 | return len(set(verbs)) 206 | 207 | @staticmethod 208 | def train_dev_test_split(data, d): 209 | """ 210 | Splits data into train, dev, test sections. 211 | 212 | Args: 213 | data (single attribute list (element) of TurkedDict) 214 | 215 | d (str) Directory in which we find train.txt, dev.txt, test.txt 216 | files, which are just one verb per line verb list files. 217 | 218 | Returns: 219 | 3x tuple, each is a list just like the input data arg. 220 | """ 221 | # Load up verb lists 222 | verb_splits = ['train', 'dev', 'test'] 223 | verb_map = {} 224 | for s in verb_splits: 225 | with open(os.path.join(d, s + '.txt')) as f: 226 | for v in f.readlines(): 227 | verb_map[v.strip()] = s 228 | 229 | # Split data 230 | data_splits = { 231 | 'train': [], 232 | 'dev': [], 233 | 'test': [], 234 | } 235 | for datum in data: 236 | v, _, _ = TurkedData.str_to_vsp(datum[0]) 237 | split = verb_map[v] 238 | data_splits[split] += [datum] 239 | 240 | # Log info 241 | total_frames = sum([len(frames) for _, frames in data_splits.iteritems()]) 242 | logger.debug('Data splits:') 243 | for split in verb_splits: 244 | frames = data_splits[split] 245 | logger.debug('\t%s: %d frames (%0.2f%%) (%d verbs)', split, len(frames), float(len(frames) * 100) / total_frames, TurkedData.n_verbs(frames)) 246 | 247 | return data_splits['train'], data_splits['dev'], data_splits['test'] 248 | 249 | @staticmethod 250 | def str_to_vsp(node): 251 | """ 252 | Args: 253 | str 254 | 255 | Returns: 256 | str (verb), 257 | str (_sub) (yes, includes the '_'), 258 | str|None (prep) (or None if sub has no prep) 259 | """ 260 | pieces = node.split('_') 261 | if len(pieces) == 2: 262 | # No prep 263 | return pieces[0], '_' + pieces[1], None 264 | elif len(pieces) == 3: 265 | # Has a prep 266 | return pieces[0], '_' + pieces[1], pieces[2] 267 | else: 268 | assert False, 'Malformed node string: %r' % (node) 269 | 270 | @staticmethod 271 | def vsp_to_str(v, s, p): 272 | """ 273 | Args: 274 | v (str) verb 275 | s (str) sub (_p, _d, etc.) 276 | p (str) preposition 277 | 278 | Returns: 279 | str 280 | """ 281 | res = v + s 282 | if not pd.isnull(p): 283 | res += '_' + p 284 | return res 285 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entry point to verbphysics system. 3 | 4 | author: mbforbes 5 | """ 6 | 7 | # IMPORTS 8 | # ----------------------------------------------------------------------------- 9 | 10 | # Logging first this was a fun bug. 11 | import logging 12 | import util 13 | util.ensure_dir('log/') 14 | logging.basicConfig( 15 | level=logging.DEBUG, 16 | format='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', 17 | datefmt='%m-%d %H:%M:%S', 18 | filename='log/latest.log', 19 | filemode='w') 20 | base_logger = logging.getLogger(__name__) 21 | 22 | # builtins 23 | import argparse 24 | import sys 25 | import time 26 | 27 | # 3rd party 28 | import factorgraph as fg 29 | 30 | # local 31 | import attrgraph 32 | import data as ng 33 | # If I really have to do this then it's a gross oversight of python's. 34 | from data import SizeQueryN 35 | import glove 36 | import data_turked as td 37 | from settings import Settings 38 | 39 | 40 | # GLOBALS (SORRY) 41 | # ----------------------------------------------------------------------------- 42 | 43 | CONSOLE_LOG_LEVEL = logging.DEBUG 44 | VIZ_OUTPUT_DIR = 'viz/' 45 | FRAMES_FILENAME = 'data/verbphysics/action-frames/action-frames.csv' 46 | FRAMES_TRAIN_5_DIR = 'data/verbphysics/action-frames/train-5/' 47 | FRAMES_TRAIN_20_DIR = 'data/verbphysics/action-frames/train-20/' 48 | 49 | # Setting configurations follow. 50 | 51 | playing = { 52 | # Set your desired config here. Default values are defined in settings.py 53 | # in Settings._get_default_map(). 54 | } 55 | 56 | # Archival configurations. 57 | 58 | model_a = { 59 | Settings.Eval: [Settings.EVAL_DEV, Settings.EVAL_TEST], 60 | Settings.GloveVerbSimThresh: [0.4], 61 | Settings.GloveNounSimThresh: [0.4], 62 | Settings.SelPrefPMICutoff: [5.0], 63 | Settings.IncludeSelPrefFactors: [True], 64 | Settings.IncludeXgraph: [False], 65 | Settings.IncludeVerbSimFactors: [True], 66 | Settings.IncludeNounSimFactors: [True], 67 | Settings.IncludeInfWithinverbSimframeFactors: [False], 68 | Settings.ObjpairSplit: [5], 69 | Settings.FrameSplit: [5], 70 | } 71 | 72 | model_b_frames = { 73 | Settings.Eval: [Settings.EVAL_DEV, Settings.EVAL_TEST], 74 | Settings.GloveVerbSimThresh: [0.4], 75 | Settings.GloveNounSimThresh: [0.4], 76 | Settings.SelPrefPMICutoff: [4.0], 77 | Settings.IncludeSelPrefFactors: [True], 78 | Settings.IncludeXgraph: [False], 79 | Settings.IncludeVerbSimFactors: [False], 80 | Settings.IncludeNounSimFactors: [True], 81 | Settings.IncludeInfWithinverbSimframeFactors: [True], 82 | Settings.ObjpairSplit: [20], 83 | Settings.FrameSplit: [5], 84 | } 85 | 86 | model_b_objpairs = { 87 | Settings.Eval: [Settings.EVAL_DEV, Settings.EVAL_TEST], 88 | Settings.GloveVerbSimThresh: [0.5], 89 | Settings.GloveNounSimThresh: [0.45], 90 | Settings.SelPrefPMICutoff: [4.0], 91 | Settings.IncludeSelPrefFactors: [True], 92 | Settings.IncludeXgraph: [True], 93 | Settings.IncludeVerbSimFactors: [True], 94 | Settings.IncludeNounSimFactors: [True], 95 | Settings.IncludeInfWithinverbSimframeFactors: [True], 96 | Settings.ObjpairSplit: [5], 97 | Settings.FrameSplit: [20], 98 | } 99 | 100 | 101 | # FUNCTIONS 102 | # ----------------------------------------------------------------------------- 103 | 104 | def _setup_logging(backup=False): 105 | util.ensure_dir('log/') 106 | 107 | # Also log to backup file with date. 108 | if backup: 109 | fh = logging.FileHandler('log/' + time.strftime('%y-%m-%d_%H-%M-%S') + 110 | '.log') 111 | fh.setLevel(logging.DEBUG) 112 | f_formatter = logging.Formatter( 113 | fmt='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', 114 | datefmt='%H:%M:%S' 115 | ) 116 | fh.setFormatter(f_formatter) 117 | logging.getLogger('').addHandler(fh) 118 | 119 | # Also log to console. 120 | console = logging.StreamHandler() 121 | console.setLevel(CONSOLE_LOG_LEVEL) 122 | c_formatter = logging.Formatter( 123 | fmt='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', 124 | datefmt='%H:%M:%S' 125 | ) 126 | console.setFormatter(c_formatter) 127 | logging.getLogger('').addHandler(console) 128 | 129 | 130 | def _build_xgraph(graphs, tuples, pot): 131 | """ 132 | Makes the interconnected (across knowledge dimension) graph. 133 | 134 | Args: 135 | graphs ([AttrGraph]) 136 | tuples ([(str, str)]) Attr pairs to add cxns between frame RVs 137 | pot (np.ndarray of shape 3x3) 138 | 139 | Returns 140 | fg.Graph: the xgraph 141 | """ 142 | xgraph = fg.Graph(debug=False) 143 | base_logger.debug('Adding xgraph xfactors...') 144 | total = 0 145 | for t in tuples: 146 | attr1, attr2 = t 147 | g1 = [g for g in graphs if g.name == attr1] 148 | g2 = [g for g in graphs if g.name == attr2] 149 | 150 | # Might not have one or both of the graphs because of the current 151 | # settings. 152 | if len(g1) != 1 or len(g2) != 1: 153 | base_logger.debug( 154 | '\t skipping links between missing graphs %s and %s', attr1, 155 | attr2) 156 | continue 157 | 158 | g1 = g1[0] 159 | g2 = g2[0] 160 | 161 | # Find RVs that match across both graphs. Pruned RVs won't be returned 162 | # by get_rvs() (as they are actually deleted from the graph's underlying 163 | # dict), but we do want to make sure we're only linking frames. 164 | matches = [] 165 | for rv_name, rv1 in g1.graph.get_rvs().iteritems(): 166 | if rv1.meta['type'] != 'frame': 167 | continue 168 | if g2.graph.has_rv(rv_name): 169 | rv2 = g2.graph.get_rvs()[rv_name] 170 | matches.append([rv1, rv2]) 171 | 172 | # add factors to our linking graph 173 | for match in matches: 174 | xgraph.factor(match, 'xfactor', pot, {'type': 'xfactor'}) 175 | 176 | # reporting 177 | base_logger.debug( 178 | '\t added %d links between frame RVs between %s and %s', 179 | len(matches), attr1, attr2) 180 | total += len(matches) 181 | base_logger.debug('Added %d xgraph xfactors in total' % (total)) 182 | return xgraph 183 | 184 | 185 | def _overall_stats(label, tuples): 186 | """ 187 | Computes overall accuracy; returns in 'Settings'-friendly format. 188 | 189 | Args: tuples([(int, int)]) Each entry is (# correct, # total) label (str) 190 | What to call this 191 | 192 | Returns: (str, str) key, val of settings column to add 193 | """ 194 | n_correct = sum(tp[0] for tp in tuples) 195 | n_total = sum(tp[1] for tp in tuples) 196 | return 'OVERALL %s acc' % (label), '%d/%d (%0.2f%%)' % ( 197 | n_correct, n_total, (n_correct*100.0)/n_total) 198 | 199 | 200 | def main(config, product, viz): 201 | """ 202 | Runs the verbphysics system using combinations of configurations specified 203 | by config. 204 | 205 | Args: 206 | config (dict): The configuration dictionary to use. Keys should be 207 | Settings.XXX string constants; vals should be lists of values to 208 | try. 209 | 210 | product (bool): Whether to try all (polynomially many) 211 | combinations of settings specified in config (True), or whether to 212 | try varying along each config setting individually (linearly many) 213 | (False). 214 | 215 | viz (bool): Whether to dump visualization data of the built model. 216 | """ 217 | # load up stuff needed 218 | base_logger.debug('Loading ngramdb cached data...') 219 | d = ng.Data() 220 | base_logger.debug('Loading PMI...') 221 | pmi = ng.PMI() 222 | base_logger.debug('Loading GloVe...') 223 | glv = glove.Glove() 224 | 225 | # Init settings. 226 | settings = Settings() 227 | if product: 228 | settings.trial_product(config) 229 | else: 230 | settings.trial_sequence(config) 231 | 232 | # Keep cycling through experiments. 233 | base_logger.debug('Beginning experiments...') 234 | while(settings.next()): 235 | # Load data and init graphs 236 | base_logger.debug('Loading turked data...') 237 | verb_data = td.TurkedData.load( 238 | FRAMES_FILENAME, 239 | settings.get(Settings.AgreementNeeded), 240 | settings.get(Settings.GTBiggerPot), 241 | settings.get(Settings.GTSmallerPot), 242 | settings.get(Settings.GTEqPot)) 243 | eval_mode = settings.get(Settings.Eval) 244 | frame_split = settings.get(Settings.FrameSplit) 245 | if frame_split == 5: 246 | framesplitdir = FRAMES_TRAIN_5_DIR 247 | elif frame_split == 20: 248 | framesplitdir = FRAMES_TRAIN_20_DIR 249 | else: 250 | base_logger.error('Unknown frame split: %r', frame_split) 251 | sys.exit(1) 252 | graphs = [attrgraph.AttrGraph(glv, d, pmi, verb_data, a, eval_mode, 253 | framesplitdir) for a in settings.get(Settings.Attrs)] 254 | 255 | # Build attr graphs 256 | for g in graphs: 257 | g.build(settings) 258 | 259 | # Run LBP 260 | normalize = settings.get(Settings.NormalizeLBP) 261 | maxiters = settings.get(Settings.LBPMaxIters) 262 | if not settings.get(Settings.IncludeXgraph): 263 | # no connections between graphs; run each independently 264 | for g in graphs: 265 | g.run(True, normalize, maxiters, True) 266 | else: 267 | # build special graph that has connections between graphs 268 | xgraph = _build_xgraph(graphs, settings.get(Settings.XgraphTuples), 269 | settings.get(Settings.XgraphPot)) 270 | 271 | # init all the graphs 272 | xgraph.init_messages() 273 | for g in graphs: 274 | g.graph.init_messages() 275 | 276 | # Run LBP piecewise across all graphs (including xgraph) 277 | for i in range(1, maxiters + 1): 278 | base_logger.debug('Running LBP iter %d on all graphs...', i) 279 | convg = True 280 | 281 | # run for the attr graphs 282 | for g in graphs: 283 | convg &= g.run(False, normalize, 1, False) 284 | 285 | # run for the xgraph 286 | xconvg, _ = xgraph.lbp(False, normalize, 1, False) 287 | convg &= xconvg 288 | 289 | # check convergence 290 | if convg: 291 | base_logger.debug('All graphs converged! Stopping LBP.') 292 | break 293 | 294 | # Decide what to eval (5 splits only) 295 | objpair_split = settings.get(Settings.ObjpairSplit) 296 | eval_frames = frame_split == 5 297 | eval_objpairs = objpair_split == 5 298 | 299 | # Eval and pre-viz 300 | verb_res_list, np_res_list = [], [] 301 | for g in graphs: 302 | verb_res, np_res = g.eval(settings, eval_frames, eval_objpairs, 303 | True) 304 | verb_res_list.append(verb_res) 305 | np_res_list.append(np_res) 306 | g.save_marginals() 307 | 308 | # Compute & save overall statistics 309 | if eval_frames: 310 | settings.add_result(*_overall_stats('frame', verb_res_list)) 311 | if eval_objpairs: 312 | settings.add_result(*_overall_stats('np', np_res_list)) 313 | 314 | # Viz 315 | if viz: 316 | for g in graphs: 317 | g.viz(VIZ_OUTPUT_DIR) 318 | 319 | settings.log_results() 320 | 321 | 322 | if __name__ == '__main__': 323 | # Logic we don't want to worry about throughout 324 | _setup_logging(backup=True) 325 | 326 | # these are the possible configs to choose from 327 | config_options = { 328 | 'playing': playing, 329 | 'model_a': model_a, 330 | 'model_b_frames': model_b_frames, 331 | 'model_b_objpairs': model_b_objpairs, 332 | } 333 | config_opt_str = ' | '.join(config_options.keys()) 334 | 335 | # cmd line 336 | parser = argparse.ArgumentParser( 337 | description='verbphysics reference implementation') 338 | parser.add_argument( 339 | '--config', metavar='CONFIG', default='model_a', 340 | help='hyperparameter configuration to use; options: ' + 341 | config_opt_str + ' (default: model_a') 342 | parser.add_argument( 343 | '--poly', type=bool, default=True, help='Whether to try ' 344 | 'polynomially-many hyperparameter config combinations (True, default) ' 345 | 'or vary config dimension sequentially (False). ' 346 | ) 347 | parser.add_argument( 348 | '--viz', action='store_true', help='Whether to dump model / data to ' 349 | 'JSON for visualization (default False).' 350 | ) 351 | args = parser.parse_args() 352 | 353 | # checking 354 | if args.config not in config_options: 355 | print 'Error: "%s" unknown config. Options are %s' % (args.config, 356 | config_opt_str) 357 | sys.exit(1) 358 | 359 | main(config_options[args.config], args.poly, args.viz) 360 | -------------------------------------------------------------------------------- /docs/factorgraph-viz.js: -------------------------------------------------------------------------------- 1 | // 2 | // factorgraph-viz 3 | // 4 | // Visualizing factor graphs using d3-force. 5 | // 6 | // author: mbforbes 7 | // 8 | // 9 | // factorgraph-viz 10 | // 11 | // Visualizing factor graphs using d3-force. 12 | // 13 | // author: mbforbes 14 | // 15 | /** 16 | * nodetype returns a function that will take FGNodes as arguments and return 17 | * whether they match the desired type. 18 | * @param desired 19 | */ 20 | function nodetype(desired) { 21 | return function (node) { 22 | return node.type === desired; 23 | }; 24 | } 25 | /** 26 | * nodesubtype returns a function that will take FGNodes as arguments and return 27 | * whether they match the desired subtype. 28 | * @param desired 29 | */ 30 | function nodesubtype(desired) { 31 | return function (node) { 32 | // TODO: do we want to check the node's focus? 33 | // let focus = node.focus || false; 34 | let focus = false; 35 | return (!focus) && node.subtype === desired; 36 | }; 37 | } 38 | /** 39 | * nodefocus returns whether a node is the node to focus on visually. 40 | * @param node 41 | */ 42 | function nodefocus(node) { 43 | return node.focus || false; 44 | } 45 | /** 46 | * textclass returns the class that should be applied to the text surrounding 47 | * the provided node. 48 | * @param node 49 | */ 50 | function textclass(node) { 51 | return node.type === 'rv' ? 'rvtext' : 'factext'; 52 | } 53 | /** 54 | * nodename determines the text that is rendered next to a node. 55 | * @param node 56 | */ 57 | function nodename(node) { 58 | if (node.type == 'fac') { 59 | // maybe add extra info (e.g. sel pref fac is reversed) 60 | let specific = ''; 61 | if (node.specific != null) { 62 | specific = ' [' + node.specific + ']'; 63 | } 64 | return node.subtype + specific; 65 | } 66 | else { 67 | // rv 68 | return node.id; 69 | } 70 | } 71 | // 72 | // factorgraph-viz 73 | // 74 | // Visualizing factor graphs using d3-force. 75 | // 76 | // author: mbforbes 77 | // 78 | // 79 | // util.ts has a few helper functions, mostly regarding colorizing. 80 | // 81 | /// 82 | function argmax(arr) { 83 | if (arr.length < 1) { 84 | return -1; 85 | } 86 | let max_val = arr[0], max_idx = 0; 87 | for (let i = 1; i < arr.length; i++) { 88 | if (arr[i] > max_val) { 89 | max_val = arr[i]; 90 | max_idx = i; 91 | } 92 | } 93 | return max_idx; 94 | } 95 | function color(none, unsureColor, unsureCutoff, values, d) { 96 | if (d.weights == null) { 97 | return d3.color(none); 98 | } 99 | let max_idx = argmax(d.weights); 100 | let max_val = d.weights[max_idx]; 101 | // clamp unsure ones to final value (hopefully something like grey) 102 | if (max_val < unsureCutoff) { 103 | return d3.color(unsureColor); 104 | } 105 | return d3.color(values[argmax(d.weights)]); 106 | } 107 | // 108 | // factorgraph-viz 109 | // 110 | // Visualizing factor graphs using d3-force. 111 | // 112 | // author: mbforbes 113 | // 114 | // 115 | // graph.ts defines the monster build(...) function for constructing the factor 116 | // graph. It's full of closures as an excuse for accessing what are basically 117 | // globals. I blame d3. 118 | // 119 | /// 120 | /// 121 | /// 122 | function appendText(svg) { 123 | let count = 1; 124 | return function (label, d) { 125 | if (d) { 126 | svg.append('g').append('text') 127 | .attr('transform', 'translate(20,' + count * 20 + ')') 128 | .text(label + ': ' + d); 129 | count += 1; 130 | } 131 | }; 132 | } 133 | /** 134 | * 135 | * build is the central function of this codebase. It pareses the factor graph 136 | * data and constructs it. 137 | * 138 | * Note: the nodes here are technically FGNodes, but the horrendous type 139 | * massaging needed to make this work with d3's type hariness is not worth the 140 | * effort. 141 | * @param config 142 | * @param data 143 | */ 144 | function build(config, data) { 145 | let svg = d3.select("svg"), width = +svg.attr("width"), height = +svg.attr("height"); 146 | // Debug logging. Can be nice as Chrome's console lets you interactively 147 | // explore the objects you're getting. 148 | console.log('Factor graph data:'); 149 | console.log(data); 150 | function isolate(force, filter) { 151 | let initialize = force.initialize; 152 | force.initialize = function () { initialize.call(force, data.nodes.filter(filter)); }; 153 | return force; 154 | } 155 | // TODO: We can actually extract most of this information. Stats should only 156 | // be used to provide additional info that can't be extracted from the graph 157 | // structure. 158 | let appeneder = appendText(svg); 159 | if (data.stats) { 160 | appeneder('random variables', data.stats.n_rvs); 161 | appeneder('factors', data.stats.n_facs); 162 | appeneder('focus', data.stats.focus); 163 | appeneder('correct', data.stats.correct); 164 | } 165 | let leftScale = config.position.leftScale; 166 | let rightScale = config.position.rightScale; 167 | let centerScale = config.position.centerScale; 168 | let sim = d3.forceSimulation(data.nodes) 169 | .force('charge', d3.forceManyBody().strength(-500)) 170 | .force('link', d3.forceLink(data.links).id(function (d) { return d.id; })) 171 | .force('center', isolate(d3.forceCenter(width * centerScale, height / 2), nodefocus)) 172 | .force('left', isolate(d3.forceX(width * leftScale).strength(config.position.leftStrength), nodesubtype(config.position.leftSubtype))) 173 | .force('right', isolate(d3.forceX(width * rightScale).strength(config.position.rightStrength), nodesubtype(config.position.rightSubtype))) 174 | .force('up', isolate(d3.forceY(config.position.upScale * height).strength(config.position.upStrength), nodesubtype(config.position.upSubtype))) 175 | .force('down', isolate(d3.forceY(config.position.downScale * height).strength(config.position.downStrength), nodesubtype(config.position.downSubtype))) 176 | .force('middle', d3.forceY(height / 2).strength(config.position.middleStrength)) 177 | .on('tick', ticked); 178 | // use color config we've received to partially bind coloring function 179 | let colorize = color.bind(null, config.color.none, config.color.unsureColor, config.color.unsureCutoff, config.color.values); 180 | // new for svg --- create the objects directly; then ticked just modifies 181 | // their positions rather than drawing them. 182 | let link = svg.append("g") 183 | .attr("class", "links") 184 | .selectAll("line") 185 | .data(data.links) 186 | .enter().append("line") 187 | .attr("stroke", colorize); 188 | let text = svg.append('g') 189 | .selectAll('text') 190 | .data(data.nodes) 191 | .enter().append('text') 192 | .attr('class', textclass) 193 | .text(nodename); 194 | let node = svg.append("g") 195 | .attr("class", "nodes") 196 | .selectAll("circle") 197 | .data(data.nodes.filter(nodetype('rv'))) 198 | .enter().append("circle") 199 | .attr("r", config.size.rv) 200 | .attr("fill", colorize) 201 | .call(d3.drag() 202 | .on("start", dragstarted) 203 | .on("drag", dragged) 204 | .on("end", dragended)); 205 | let fac = svg.append("g") 206 | .attr("class", "facs") 207 | .selectAll("rect") 208 | .data(data.nodes.filter(nodetype('fac'))) 209 | .enter().append("rect") 210 | .attr("fill", colorize) 211 | .attr("width", config.size.factor) 212 | .attr("height", config.size.factor) 213 | .call(d3.drag() 214 | .on("start", dragstarted) 215 | .on("drag", dragged) 216 | .on("end", dragended)); 217 | // Assumes RVs and factor are roughly the same size. 218 | let bigger = Math.max(config.size.rv, config.size.factor); 219 | function ticked() { 220 | link 221 | .attr("x1", function (d) { return d.source.x; }) 222 | .attr("y1", function (d) { return d.source.y; }) 223 | .attr("x2", function (d) { return d.target.x; }) 224 | .attr("y2", function (d) { return d.target.y; }); 225 | node 226 | .attr("cx", function (d) { return d.x; }) 227 | .attr("cy", function (d) { return d.y; }); 228 | fac 229 | .attr("x", function (d) { return d.x - config.size.factor / 2; }) 230 | .attr("y", function (d) { return d.y - config.size.factor / 2; }); 231 | text 232 | .attr("transform", function (d) { 233 | return "translate(" + (d.x + bigger) + "," + (d.y + 10) + ")"; 234 | }); 235 | } 236 | // The following functions allow for dragging interactivity. They're here 237 | // because they require access to variables defined in this function. (Well, 238 | // dragged() might not, but it fits with the others.) 239 | function dragsubject() { 240 | return sim.find(d3.event.x, d3.event.y); 241 | } 242 | function dragstarted() { 243 | if (!d3.event.active) { 244 | sim.alphaTarget(0.3).restart(); 245 | } 246 | d3.event.subject.fx = d3.event.subject.x; 247 | d3.event.subject.fy = d3.event.subject.y; 248 | } 249 | function dragged() { 250 | d3.event.subject.fx = d3.event.x; 251 | d3.event.subject.fy = d3.event.y; 252 | } 253 | function dragended() { 254 | if (!d3.event.active) { 255 | sim.alphaTarget(0); 256 | } 257 | d3.event.subject.fx = null; 258 | d3.event.subject.fy = null; 259 | } 260 | } 261 | ; 262 | // 263 | // factorgraph-viz 264 | // 265 | // Visualizing factor graphs using d3-force. 266 | // 267 | // author: mbforbes 268 | // 269 | // 270 | // main.ts is where the execution begins. 271 | // 272 | /// 273 | /// 274 | // Constants 275 | let FG_NAME_ELEMENT_ID = 'fg-title'; 276 | let SVG_ELEMENT_ID = 'fg-svg'; 277 | let USER_INPUT_ID = 'userInput'; 278 | let SUGGESTIONS_ELEMENT_ID = 'suggestions'; 279 | let SUGGESTION_NOTICE_ELEMENT_ID = 'suggestionNotice'; 280 | let AUTOCOMPLETE_LIMIT_DEFAULT = 50; 281 | let CONFIG_FILE = 'data/config/default.json'; 282 | // Globals (sorry). 283 | let cacheConfig; 284 | let cacheFactorgraphFns = []; 285 | /** 286 | * Extracts general config and list of factorgraph file names. Calls preload. 287 | * @param config 288 | */ 289 | function prepreload(config) { 290 | cacheConfig = config; 291 | d3.json(config.data_filenames, preload); 292 | } 293 | /** 294 | * Saves the list of factor graph file names. 295 | * @param factorgraphFns 296 | */ 297 | function preload(factorgraphFns) { 298 | cacheFactorgraphFns = factorgraphFns; 299 | maybeLoad(cacheConfig.startup_filename); 300 | } 301 | /** 302 | * Helper to clear all children of a DOM node. 303 | * @param el 304 | */ 305 | function clearChildren(el) { 306 | while (el.firstChild) { 307 | el.removeChild(el.firstChild); 308 | } 309 | } 310 | /** 311 | * Removes everything from within the svg. 312 | */ 313 | function destroy() { 314 | clearChildren(document.getElementById(SVG_ELEMENT_ID)); 315 | } 316 | /** 317 | * Loads factor graph found in `fn`. 318 | * @param fn 319 | */ 320 | function load(fn) { 321 | destroy(); 322 | d3.json(fn, build.bind(null, cacheConfig)); 323 | } 324 | /** 325 | * Loads factor graph found in `fn` if it's in our list of valid factor graph 326 | * names. 327 | * @param name 328 | */ 329 | function maybeLoad(name) { 330 | if (cacheFactorgraphFns.indexOf(name) != -1) { 331 | let prefix = cacheConfig.display_prefix || ''; 332 | document.getElementById(FG_NAME_ELEMENT_ID).innerText = prefix + name; 333 | load(cacheConfig.data_dir + name + '.json'); 334 | } 335 | } 336 | /** 337 | * Called every time the user text box changes its content. 338 | */ 339 | function userTypes() { 340 | let inp = document.getElementById(USER_INPUT_ID).value; 341 | // Prefix filter. Don't show anything with blank input 342 | let opts = []; 343 | if (inp.length > 0) { 344 | opts = cacheFactorgraphFns.filter(fn => fn.startsWith(inp)); 345 | } 346 | // Clear any existing suggestions. 347 | let sug = document.getElementById(SUGGESTIONS_ELEMENT_ID); 348 | clearChildren(sug); 349 | // Display suggestions notice only if we have at least 1 suggestion. 350 | let sugNotice = document.getElementById(SUGGESTION_NOTICE_ELEMENT_ID); 351 | sugNotice.style.visibility = opts.length > 0 ? 'visible' : 'hidden'; 352 | // Add suggestions. 353 | let autocomplete_limit = cacheConfig.autocomplete_limit || AUTOCOMPLETE_LIMIT_DEFAULT; 354 | for (let opt of opts.slice(0, autocomplete_limit)) { 355 | let el = document.createElement('button'); 356 | el.className = 'suggestion'; 357 | el.innerText = opt; 358 | el.setAttribute('onclick', 'maybeLoad("' + opt + '");'); 359 | sug.appendChild(el); 360 | } 361 | // Display note if they were truncated. 362 | if (opts.length > autocomplete_limit) { 363 | let el = document.createElement('p'); 364 | el.className = 'limited'; 365 | el.innerText = '(only first ' + autocomplete_limit + ' of ' + 366 | opts.length + ' shown)'; 367 | sug.appendChild(el); 368 | } 369 | } 370 | /** 371 | * Called when the user submits the text box (presses enter or clicks button). 372 | * Always returns false so we don't do a post. 373 | */ 374 | function userSubmits() { 375 | maybeLoad(document.getElementById(USER_INPUT_ID).value); 376 | return false; 377 | } 378 | // execution starts here 379 | d3.json(CONFIG_FILE, prepreload); 380 | -------------------------------------------------------------------------------- /src/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Settings is for running experiments with different parameters. Supports 3 | stuff like auto grid search and logging (yes, logging!). 4 | 5 | TODO: 6 | - [ ] sanity check passed experiments to be of type 'list'. If passing a 7 | single setting that happens to be iterable it will happily iterate 8 | through, e.g., all characters of a string. 9 | 10 | author: mbforbes 11 | """ 12 | 13 | # IMPORTS 14 | # ----------------------------------------------------------------------------- 15 | 16 | # builtins 17 | import code # code.interact(local=dict(globals(), **locals())) 18 | from itertools import product 19 | import logging 20 | 21 | # 3rd party 22 | import numpy as np 23 | from tabulate import tabulate 24 | 25 | 26 | # TOP-LEVEL FUNCTIONS 27 | # ----------------------------------------------------------------------------- 28 | 29 | def cell_massage(val): 30 | """ 31 | Preprocessing values to ensure that they can fit well in the cell of a 32 | printed table. 33 | 34 | Args: 35 | val 36 | 37 | Returns: 38 | val (or something) 39 | """ 40 | # tabulate appears to sometimes work for bools and sometimes not. So I'm 41 | # doing this so that it always works. 42 | if type(val) is bool: 43 | return 'True' if val else 'False' 44 | # tabulate TOTALLY doesn't handle numpy arrays as cell entries. 45 | if type(val) is np.ndarray: 46 | return ', '.join([str(row) for row in val]) 47 | # default 48 | return val 49 | 50 | 51 | # CLASSES 52 | # ----------------------------------------------------------------------------- 53 | 54 | class Settings(object): 55 | """ 56 | Class for trying all (exponentially many) combinations of all parameter 57 | settings. Must call next() before each trial run. 58 | 59 | New features: 60 | 61 | - [x] np.ndarray aligned printing 62 | 63 | - [x] Print settings that aren't changing at the top. If they're default, 64 | note them as so. 65 | 66 | Each iteration, note only the thing that is changing. 67 | 68 | Integrate with results. Output in a table format with the stuff that 69 | is changing. 70 | 71 | Example: 72 | 73 | Settings that aren't changing: 74 | 75 | foo: 0.5 (default) 76 | barbar: 0.7 (default) 77 | baz: 0.9 78 | 79 | (.. experiments run here ...) 80 | 81 | la -> | 0.5 | 0.7 | 0.9 82 | ------+-------+-------+------ 83 | | 98% | 30% | 40% 84 | 85 | 2D for 2 varied. TODO: For > 2, multiple tables? 86 | 87 | TODO: Use pandas for this? 88 | """ 89 | # Class vars as constants for keys 90 | 91 | # Used with iterators to tell when to stop. 92 | NothingLeft = object() 93 | 94 | Eval = 'eval' 95 | GloveVerbSimThresh = 'glove-verb-sim-thresh' 96 | GloveNounSimThresh = 'glove-noun-sim-thresh' 97 | Attrs = 'attrs' 98 | VerbSimPot = 'verb-sim-pot' 99 | NounEqPot = 'noun-eq-pot' 100 | NounSimPot = 'noun-sim-pot' 101 | NounSimRevPot = 'noun-sim-rev-pot' 102 | MaxNounsPerFrame = 'max-nouns-per-frame' 103 | FilterAbstract = 'filter-abstract' 104 | GTBiggerPot = 'gt-bigger-pot' 105 | GTSmallerPot = 'gt-smaller-pot' 106 | GTEqPot = 'gt-eq-pot' 107 | AgreementNeeded = 'agreement-needed' 108 | SelPrefMethod = 'sel-pref-method' 109 | SelPrefFreqCutoff = 'sel-pref-freq-cutoff' 110 | SelPrefPMICutoff = 'sel-pref-pmi-cutoff' 111 | SelPrefPot = 'sel-pref-pot' 112 | NormalizeLBP = 'normalize-lbp' 113 | LBPMaxIters = 'lbp-max-iters' 114 | IncludeVerbSimFactors = 'include-verb-sim-factors' 115 | IncludeNounSimFactors = 'include-noun-sim-factors' 116 | IncludeSelPrefFactors = 'include-sel-pref-factors' 117 | IncludeInfWithinverbSimframeFactors = 'include-inf-withinverb-simframe-factors' 118 | WithinverbSimframePot = 'withinverb-simframe-pot' 119 | IncludeXgraph = 'include-xgraph' 120 | XgraphTuples = 'xgraph-tuples' 121 | XgraphPot = 'xgraph-pot' 122 | MaxSeeds = 'max-seeds' 123 | RawNounsFilename = 'raw-nouns-filename' 124 | EvalNounsFilename = 'eval-nouns-filename' 125 | Lemmatize = 'lemmatize' 126 | SelPrefMinFreqForPMI = 'sel-pref-min-freq-for-pmi' 127 | IncludeNgramDBNouns = 'include-ngramdb-nouns' 128 | IncludeGoldNounpairs = 'include-gold-nounpairs' 129 | GoldNounpairAgreementNeeded = 'gold-nounpair-agreement-needed' 130 | GoldNounpairGreaterPot = 'gold-nounpair-greater-pot' 131 | GoldNounpairLesserPot = 'gold-nounpair-lesser-pot' 132 | GoldNounpairEqPot = 'gold-nounpair-eq-pot' 133 | AddRemainderAsNonseeds = 'add-remainder-as-nonseeds' 134 | FrameSeedMethod = 'frame-seed-method' 135 | NounpairSeedMethod = 'nounpair-seed-method' 136 | SelPrefPotMethod = 'selpref-pot-method' 137 | SelPrefEmbFilename = 'selpref-emb-filename' 138 | ObjpairSplit = 'objpair-split' 139 | FrameSplit = 'frame-split' 140 | 141 | # Class vars in all caps as constants for vals 142 | EVAL_DEV = 'dev' 143 | EVAL_TEST = 'test' 144 | 145 | SEL_PREF_FREQ = 'freq' 146 | SEL_PREF_PMI = 'pmi' 147 | 148 | POTENTIAL_METHOD_HARDCODED = 'hardcoded' 149 | POTENTIAL_METHOD_TRAINED = 'trained' 150 | POTENTIAL_METHOD_BOTH = 'both' 151 | 152 | # digging into more detail here for selpref 153 | SEL_PREF_HARDCODED = 'hardcoded' 154 | SEL_PREF_MLE = 'mle' 155 | SEL_PREF_EMB = 'emb' 156 | 157 | # unary potentials 158 | POT_UNARY_MEDIUM_BIGGER = np.array([0.7, 0.2, 0.1]) 159 | POT_UNARY_MEDIUM_SMALLER = np.array([0.2, 0.7, 0.1]) 160 | POT_UNARY_MEDIUM_EQ = np.array([0.15, 0.15, 0.7]) 161 | 162 | POT_UNARY_STRONG_BIGGER = np.array([0.9, 0.07, 0.03]) 163 | POT_UNARY_STRONG_SMALLER = np.array([0.07, 0.9, 0.03]) 164 | POT_UNARY_STRONG_EQ = np.array([0.05, 0.05, 0.9]) 165 | 166 | # binary potentials 167 | POT_BINARY_MEDIUM_SIM = np.array([ 168 | [0.7, 0.2, 0.1], 169 | [0.2, 0.7, 0.1], 170 | [0.15, 0.15, 0.7], 171 | ]) 172 | POT_BINARY_MEDIUM_REV = np.array([ 173 | [0.2, 0.7, 0.1], 174 | [0.7, 0.2, 0.1], 175 | [0.15, 0.15, 0.7], 176 | ]) 177 | 178 | POT_BINARY_STRONG_SIM = np.array([ 179 | [0.9, 0.07, 0.03], 180 | [0.07, 0.9, 0.03], 181 | [0.05, 0.05, 0.9], 182 | ]) 183 | POT_BINARY_STRONG_REV = np.array([ 184 | [0.07, 0.9, 0.03], 185 | [0.9, 0.07, 0.03], 186 | [0.05, 0.05, 0.9], 187 | ]) 188 | 189 | @staticmethod 190 | def _get_default_map(): 191 | return { 192 | Settings.Eval: Settings.EVAL_DEV, 193 | Settings.Attrs: ['size', 'weight', 'verb-speed', 'hardness', 'rigidness'], 194 | Settings.MaxSeeds: -1, # -1 means no limit 195 | Settings.GloveVerbSimThresh: 0.5, 196 | Settings.GloveNounSimThresh: 0.45, 197 | Settings.VerbSimPot: Settings.POT_BINARY_MEDIUM_SIM, 198 | Settings.NounEqPot: Settings.POT_UNARY_MEDIUM_EQ, 199 | Settings.NounSimPot: Settings.POT_BINARY_MEDIUM_SIM, 200 | Settings.NounSimRevPot: Settings.POT_BINARY_MEDIUM_REV, 201 | Settings.MaxNounsPerFrame: 1, 202 | Settings.FilterAbstract: True, 203 | Settings.GTBiggerPot: Settings.POT_UNARY_MEDIUM_BIGGER, 204 | Settings.GTSmallerPot: Settings.POT_UNARY_MEDIUM_SMALLER, 205 | Settings.GTEqPot: Settings.POT_UNARY_MEDIUM_EQ, 206 | Settings.AgreementNeeded: 2, 207 | Settings.SelPrefFreqCutoff: 1000, 208 | Settings.SelPrefMinFreqForPMI: 1, 209 | Settings.SelPrefPMICutoff: 4.0, 210 | Settings.SelPrefMethod: Settings.SEL_PREF_PMI, 211 | Settings.SelPrefPot: Settings.POT_BINARY_MEDIUM_SIM, 212 | Settings.NormalizeLBP: True, 213 | Settings.LBPMaxIters: 20, 214 | Settings.IncludeSelPrefFactors: True, 215 | Settings.IncludeXgraph: True, 216 | Settings.IncludeVerbSimFactors: True, 217 | Settings.IncludeNounSimFactors: True, 218 | Settings.IncludeInfWithinverbSimframeFactors: True, 219 | Settings.WithinverbSimframePot: Settings.POT_BINARY_MEDIUM_SIM, 220 | Settings.XgraphTuples: [ 221 | ('size', 'weight'), 222 | ('size', 'hardness'), 223 | ('weight', 'hardness'), 224 | ], 225 | Settings.XgraphPot: Settings.POT_BINARY_MEDIUM_SIM, 226 | Settings.RawNounsFilename: '', 227 | Settings.EvalNounsFilename: '', 228 | Settings.Lemmatize: True, 229 | Settings.IncludeNgramDBNouns: False, 230 | Settings.IncludeGoldNounpairs: True, 231 | Settings.GoldNounpairAgreementNeeded: 2, 232 | Settings.GoldNounpairGreaterPot: Settings.POT_UNARY_MEDIUM_BIGGER, 233 | Settings.GoldNounpairLesserPot: Settings.POT_UNARY_MEDIUM_SMALLER, 234 | Settings.GoldNounpairEqPot: Settings.POT_UNARY_MEDIUM_EQ, 235 | Settings.AddRemainderAsNonseeds: True, 236 | Settings.FrameSeedMethod: Settings.POTENTIAL_METHOD_BOTH, 237 | Settings.NounpairSeedMethod: Settings.POTENTIAL_METHOD_BOTH, 238 | Settings.SelPrefPotMethod: Settings.SEL_PREF_HARDCODED, 239 | Settings.SelPrefEmbFilename: '', 240 | Settings.ObjpairSplit: 20, 241 | Settings.FrameSplit: 5, 242 | } 243 | 244 | def __init__(self, logger=None): 245 | """ 246 | Sets dict with default settings. 247 | 248 | Settings to do: 249 | - [x] constants above 250 | - [x] number of nounsp 251 | - [x] Potentials (bigger, smaller, eq) 252 | - [x] Agreement needed (x/3) 253 | - [x] Verb sim fac pots 254 | - [x] Noun sim fac pots 255 | - [x] Sel pref pots 256 | - [x] Sel pref cutoff 257 | - [x] whether to normalize in lbp 258 | - [x] max n iterations to run lbp for 259 | - [x] which factors to add 260 | - [x] whether to filter abstract nouns 261 | - [x] check out data.py settings 262 | - [x] check rest of this file 263 | """ 264 | # Some admin 265 | if logger is None: 266 | logger = logging.getLogger(__name__) 267 | self.logger = logger 268 | 269 | # Default values 270 | self._params = Settings._get_default_map() 271 | self.param_keys = [] 272 | self.param_iterator = None 273 | 274 | def get(self, key): 275 | return self._params[key] 276 | 277 | def _setup_trial(self, trial_keys): 278 | """ 279 | Tracks which configs vary (are "trial" keys). 280 | 281 | Args: 282 | trial_keys ([str]) 283 | """ 284 | self.default_keys = set(self._get_default_map().keys()) - set(trial_keys) 285 | self.trial_keys = trial_keys 286 | self.trial_num = 0 287 | self.trial_log = {} 288 | self.trial_results = {} 289 | self.trial_results_all_keys = [] 290 | 291 | def trial_sequence(self, params): 292 | """ 293 | Sets up a trial to try the specified ranges of parameter values in 294 | sequence (holding all other parameters to their defaults and varying 295 | only one at a time). 296 | 297 | Args: 298 | params ({Settings.KEY: [list of values to try]}) 299 | """ 300 | self._setup_trial(params.keys()) 301 | 302 | # This implementation is kind of gross because it's bolted onto how the 303 | # trial_product was designed. We really want to iterate over both keys 304 | # and values and just set what we want. But I'm too lazy to learn about 305 | # how iterators work in python. So we just use all the keys. 306 | dm = self._get_default_map() 307 | keys = dm.keys() 308 | vals = [dm[k] for k in keys] 309 | trials = [] 310 | for k, v in params.iteritems(): 311 | kidx = keys.index(k) 312 | for val in v: 313 | trial = vals[:] 314 | trial[kidx] = val 315 | trials += [tuple(trial)] 316 | self.param_keys = keys 317 | self.param_iterator = iter(trials) 318 | 319 | def trial_product(self, params): 320 | """ 321 | Sets up a trial to try the product (all exponentially many 322 | combinations) of the specified ranges of parameter values. 323 | 324 | Args: params ({Settings.KEY: [list of values to try]}) 325 | """ 326 | self._setup_trial(params.keys()) 327 | 328 | param_keys = [] 329 | param_vals = [] 330 | for k,v in params.iteritems(): 331 | param_keys += [k] 332 | param_vals += [v] 333 | 334 | # self.current_indices = [-1 for _ in range(len(param_keys))] 335 | self.param_keys = param_keys 336 | self.param_iterator = product(*param_vals) 337 | 338 | def next(self): 339 | """ 340 | Move on to the next parameter setting combination. 341 | 342 | Returns: 343 | bool Whether there's anything left 344 | """ 345 | next_params = next(self.param_iterator, Settings.NothingLeft) 346 | if next_params is Settings.NothingLeft: 347 | return False 348 | assert len(next_params) == len(self.param_keys) 349 | 350 | self.trial_num += 1 351 | self.trial_log[self.trial_num] = {} 352 | self.trial_results[self.trial_num] = {} 353 | for i, k in enumerate(self.param_keys): 354 | self._params[k] = next_params[i] 355 | self.trial_log[self.trial_num][k] = self._params[k] 356 | return True 357 | 358 | def add_result(self, key, val): 359 | """ 360 | Adds result in form of key: val *to currently running trial*. 361 | 362 | Args: 363 | key (any hashable) 364 | val (any) 365 | """ 366 | if key not in self.trial_results_all_keys: 367 | self.trial_results_all_keys.append(key) 368 | self.trial_results[self.trial_num][key] = val 369 | 370 | def log_results(self): 371 | """ 372 | Logs results. Call after trials have finished. 373 | 374 | First logs the config that didn't change. 375 | 376 | Then logs a table of the experiments run and any results that were 377 | added. 378 | """ 379 | self.logger.info('Static config (defaults):') 380 | full_dm = self._get_default_map() 381 | pure_dm = {k: cell_massage(v) for k,v in full_dm.iteritems() if k in self.default_keys} 382 | list_pure_dm = [list(item) for item in pure_dm.iteritems()] 383 | for line in tabulate(list_pure_dm, tablefmt="fancy_grid").split('\n'): 384 | self.logger.info(line) 385 | 386 | self.logger.info('Trial configs:') 387 | rows = [] 388 | for i in sorted(self.trial_log.keys()): 389 | row = {} 390 | # settings 391 | for tk in self.trial_keys: 392 | row[tk] = cell_massage(self.trial_log[i][tk]) 393 | # ... then results 394 | for rk in self.trial_results_all_keys: 395 | val = '---' 396 | if rk in self.trial_results[i]: 397 | val = cell_massage(self.trial_results[i][rk]) 398 | row[rk] = val 399 | rows.append(row) 400 | 401 | # TODO: use ordereddict and set key order so table headers go settings 402 | # and then results. 403 | # headers = self.trial_keys + self.trial_results_all_keys 404 | for line in tabulate(rows, headers="keys", tablefmt="fancy_grid").split('\n'): 405 | self.logger.info(line) 406 | 407 | def debug_log_config(self): 408 | """ 409 | Dumps full config to debug log. 410 | """ 411 | self.logger.debug('Settings:') 412 | for k,v in self._params.iteritems(): 413 | self.logger.debug('%(key)25s: %(val)s' % {'key': k, 'val': v}) 414 | -------------------------------------------------------------------------------- /lib/ngramdb/ngramdb/ngramdb.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | 4 | from myria import MyriaConnection 5 | 6 | import util 7 | from constants import * 8 | from ngramtoken import ngrams_from_tupledict 9 | 10 | 11 | class NgramDb(object): 12 | 13 | def __init__(self, connection_id): 14 | self._connection = MyriaConnection( 15 | hostname=REST_URL, 16 | port=REST_PORT, 17 | ssl=True) 18 | 19 | connection_id = connection_id.replace(' ', '_') 20 | 21 | if not connection_id.replace('_', '').isalpha(): 22 | raise ValueError("connection_id must be letters only, no " 23 | "numbers or punctuation") 24 | 25 | self._connection_id = connection_id 26 | 27 | self.queries = [] 28 | 29 | def create_query( 30 | self, 31 | words=None, 32 | postags=None, 33 | deprels=None, 34 | headids=None, 35 | ignore_position=False, 36 | absolute_position=False, 37 | limit=None, 38 | threshold=None, 39 | description=None, 40 | output=None): 41 | """Creates an NgramQuery object, which can be passed to the 42 | `run_query` method of this NgramDb object. 43 | 44 | Keyword arguments: 45 | words 46 | - a list of strings 47 | postags 48 | - a list of Penn-treebank style POS tags 49 | deprels 50 | - a list of Stanford-style dependency relations 51 | headids 52 | - a list of integers corresponding to the list position of 53 | this token's head 54 | ignore_position 55 | - do not pay attention to the ordering of the tokens in 56 | the lists 57 | absolute_position 58 | - the positions of tokens must match the positions in the 59 | ngram exactly 60 | limit 61 | - TODO: NOT COMPLETELY IMPLEMENTED 62 | threshold 63 | - only return ngrams with at least this frequency 64 | description 65 | - a plain-language description of this query 66 | output 67 | - name of the Myria table that will store this query's 68 | results; default is this NgramDb's connection_id 69 | 70 | Except for words, all arguments are optional. 71 | 72 | Any position in words, postags, deprels, or headids can be defined as 73 | `None` to denote that space as a "wildcard". For example, 74 | postags=["NNS", 75 | 76 | Any string in words, postags, or deprels may use the "|" character to 77 | signify "or". For example, words=["cat|dog|mous", "eats|runs"] will 78 | match "cat", "mouse", or "dog" in the first token, and "eats" or "runs" 79 | in the second token. """ 80 | if not description or not isinstance(description, str): 81 | description = "[ ngramdb query #{} from {} ]".format( 82 | len(self.queries), self._connection_id) 83 | 84 | if not output or not isinstance(output, str): 85 | output = self._connection_id 86 | 87 | return NgramDbQuery( 88 | words=words, 89 | postags=postags, 90 | deprels=deprels, 91 | headids=headids, 92 | ignore_position=ignore_position, 93 | absolute_position=absolute_position, 94 | limit=limit, 95 | threshold=threshold, 96 | description=description, 97 | output=output) 98 | 99 | def run_query(self, query): 100 | """Runs an NgramQuery and returns a list of Ngrams. 101 | 102 | See create_query for details on creating a query. 103 | """ 104 | self.queries.append(query) 105 | 106 | q_plan = self._make_join_context_query_plan(query) 107 | 108 | relation_key = q_plan['fragments'][-1]['operators'][-1]['relationKey'] 109 | 110 | try: 111 | # answer = self._connection.execute_query(q_plan) 112 | myria_query = self._connection.submit_query(q_plan) 113 | query_id = myria_query['queryId'] 114 | 115 | full_status = self._connection.get_query_status(query_id) 116 | status = full_status['status'] 117 | 118 | while status not in ('UNKNOWN', 'SUCCESS', 'ERROR'): 119 | time.sleep(0.1) 120 | full_status = self._connection.get_query_status(query_id) 121 | status = full_status['status'] 122 | 123 | if status in ('UNKNOWN', 'ERROR'): 124 | raise RuntimeError( 125 | "Myria error: {}".format(full_status['message'])) 126 | 127 | else: 128 | raw_results = self._connection.download_dataset(relation_key) 129 | full_results = ngrams_from_tupledict(raw_results) 130 | return full_results 131 | 132 | except KeyboardInterrupt: 133 | raise KeyboardInterrupt 134 | 135 | def create_and_run_query( 136 | self, 137 | words=None, 138 | postags=None, 139 | deprels=None, 140 | headids=None, 141 | ignore_position=False, 142 | absolute_position=False, 143 | limit=None, 144 | threshold=None, 145 | description=None, 146 | output=None): 147 | """Creates and runs an NgramQuery and returns a list of Ngrams. 148 | 149 | See create_query for details on creating a query. 150 | """ 151 | 152 | query = self.create_query( 153 | words=words, 154 | postags=postags, 155 | deprels=deprels, 156 | headids=headids, 157 | ignore_position=ignore_position, 158 | absolute_position=absolute_position, 159 | limit=limit, 160 | threshold=threshold, 161 | description=description, 162 | output=output) 163 | 164 | return self.run_query(query) 165 | 166 | @classmethod 167 | def _make_join_context_query_plan(cls, query): 168 | subquery = cls._build_join_context_subquery(query) 169 | sql = ' '.join(SQL_CONTEXT_TEMPLATE.format(subquery=subquery).split()) 170 | q_plan = JSON_CONTEXT_TEMPLATE 171 | q_plan['fragments'][0]['operators'][0]['sql'] = sql 172 | q_plan['rawQuery'] = query.description 173 | # Ugh line length. 174 | last_op = q_plan['fragments'][-1]['operators'][-1] 175 | last_op['relationKey']['relationName'] = query.output 176 | return q_plan 177 | 178 | @classmethod 179 | def _build_join_context_subquery(cls, query): 180 | sub_rel_str, sub_pred_str = cls._build_join_subquery_components(query) 181 | sub_template = "SELECT DISTINCT tt0.nid, tt0.freq FROM {} WHERE {}" 182 | 183 | if isinstance(query.threshold, int): 184 | sub_template = sub_template + \ 185 | " AND tt0.freq >= {}".format(query.threshold) 186 | 187 | sub_template = sub_template + " ORDER BY tt0.freq DESC, tt0.nid ASC" 188 | 189 | if isinstance(query.limit, int): 190 | sub_template = sub_template + " LIMIT {}".format(query.limit) 191 | 192 | subquery = sub_template.format(sub_rel_str, sub_pred_str, query.limit) 193 | return subquery 194 | 195 | @classmethod 196 | def _build_join_subquery_components(cls, query): 197 | # cheating at refactoring is fun lol 198 | words = query.words 199 | postags = query.postags 200 | deprels = query.deprels 201 | headids = query.headids 202 | ignore_position = query.ignore_position 203 | absolute_position = query.absolute_position 204 | threshold = query.threshold 205 | ngram_length = query.ngram_length 206 | 207 | zipped = zip(words, postags, deprels, range(ngram_length), headids) 208 | 209 | relations = [] 210 | predicates = [] 211 | 212 | # get all appropriate pairs of tokens 213 | token_idx_pairs = [ 214 | (i, j) for j in range(ngram_length) for i in range(j)] 215 | #if ignore_position else [(i, i+1) for i in range(ngram_length-1)] 216 | 217 | # create relations and predicates for each pair 218 | for i, pair in enumerate(token_idx_pairs): 219 | tka_idx, tkb_idx = pair 220 | token_pair = (zipped[tka_idx], zipped[tkb_idx]) 221 | 222 | pair_id = "tt{}".format(i) 223 | 224 | if i > 0: 225 | predicates.append( 226 | util.make_predicate(pair_id, "nid", "tt0.nid")) 227 | 228 | tka_raw, tkb_raw = token_pair 229 | 230 | def build_token_kwargs(tka_raw, tkb_raw): 231 | tka_kwargs = {} 232 | tkb_kwargs = {} 233 | 234 | tka_kwargs['word'] = tka_raw[0] 235 | tkb_kwargs['word'] = tkb_raw[0] 236 | 237 | tka_kwargs['postag'] = tka_raw[1] 238 | tkb_kwargs['postag'] = tkb_raw[1] 239 | 240 | tka_kwargs['deprel'] = tka_raw[2] 241 | tkb_kwargs['deprel'] = tkb_raw[2] 242 | 243 | if ignore_position: 244 | tka_kwargs['offset'] = None 245 | tkb_kwargs['offset'] = None 246 | elif absolute_position: 247 | tka_kwargs['offset'] = tka_raw[3]+1 248 | tkb_kwargs['offset'] = tkb_raw[3]+1 249 | else: 250 | tka_kwargs['offset'] = None 251 | tkb_kwargs['offset'] = tkb_raw[3] - tka_raw[3] 252 | 253 | head = None 254 | if tka_raw[4] == None and tkb_raw[4] == None: 255 | pass 256 | elif tka_raw[3] == tkb_raw[4]: 257 | head = "tka" 258 | elif tka_raw[3] == tkb_raw[4]: 259 | head = "tkb" 260 | 261 | return (tka_kwargs, tkb_kwargs, head) 262 | 263 | tka_kwargs, tkb_kwargs, head = build_token_kwargs(tka_raw, tkb_raw) 264 | 265 | subrelations, subpredicates = cls._build_pair_predicate( 266 | pair_id, tka_kwargs, tkb_kwargs, head) 267 | 268 | relations.extend(subrelations) 269 | 270 | if ignore_position: 271 | sr, sp = cls._build_pair_predicate( 272 | pair_id, tkb_kwargs, tka_kwargs, head) 273 | relations.extend(sr) 274 | 275 | sp1 = "({})".format(" AND ".join(subpredicates)) 276 | sp2 = "({})".format(" AND ".join(sp)) 277 | 278 | predicates.append("({})".format(" OR ".join((sp1, sp2)))) 279 | 280 | else: 281 | subpredicate = "({})".format(" AND ".join(subpredicates)) 282 | predicates.append(subpredicate) 283 | 284 | # put 'em all together 285 | sub_rel_str = ", ".join(set(relations)) 286 | sub_pred_str = " AND ".join(set(predicates)) 287 | 288 | return (sub_rel_str, sub_pred_str) 289 | 290 | @classmethod 291 | def _build_pair_predicate( 292 | cls, this_pair, tka_kwargs, tkb_kwargs, head=None): 293 | # kwargs: word, postag, deprel, offset 294 | 295 | subrelations = [util.aliased_relation(TT_RELATION, this_pair)] 296 | subpredicates = [] 297 | 298 | i = int(this_pair[2:]) 299 | 300 | def make_word_pred(tk, word): 301 | if word is None: 302 | return None 303 | 304 | joined_tk_words = ','.join( 305 | "'{}'".format(w) for w in word.split('|')) 306 | return util.make_predicate( 307 | this_pair, 308 | "{}_surface".format(tk), 309 | "({})".format(joined_tk_words), 310 | ' IN ') 311 | 312 | subpredicates.append(make_word_pred('tka', tka_kwargs['word'])) 313 | subpredicates.append(make_word_pred('tkb', tkb_kwargs['word'])) 314 | 315 | def make_postag_pred(tk, postag): 316 | if postag is None: 317 | return None 318 | 319 | this_pos = 'pos{}_{}'.format(i, tk) 320 | subrelations.append(util.aliased_relation(POS_RELATION, this_pos)) 321 | 322 | these_postags = postag.split('|') 323 | 324 | pred1 = util.make_predicate( 325 | this_pair, 326 | "{}_posid".format(tk), 327 | "{}.posid".format(this_pos)) 328 | 329 | pred2 = util.make_predicate( 330 | this_pos, 331 | "postag", 332 | "({})".format(','.join("'{}'".format(p) for p in 333 | these_postags)), 334 | ' IN ') 335 | return " AND ".join((pred1, pred2)) 336 | 337 | subpredicates.append(make_postag_pred('tka', tka_kwargs['postag'])) 338 | subpredicates.append(make_postag_pred('tkb', tkb_kwargs['postag'])) 339 | 340 | def make_deprel_pred(tk, deprel): 341 | if deprel is None: 342 | return None 343 | 344 | this_deprel = 'deprel{}_{}'.format(i, tk) 345 | subrelations.append( 346 | util.aliased_relation(DEP_RELATION, this_deprel)) 347 | 348 | these_deprels = deprel.split('|') 349 | 350 | pred1 = util.make_predicate( 351 | this_pair, 352 | "{}_depid".format(tk), 353 | "{}.depid".format(this_deprel)) 354 | 355 | pred2 = util.make_predicate( 356 | this_deprel, 357 | "deprel", 358 | "({})".format(','.join("'{}'".format(p) for p in 359 | these_deprels)), 360 | ' IN ') 361 | 362 | return " AND ".join((pred1, pred2)) 363 | 364 | subpredicates.append(make_deprel_pred('tka', tka_kwargs['deprel'])) 365 | subpredicates.append(make_deprel_pred('tkb', tkb_kwargs['deprel'])) 366 | 367 | # if first token's offset is not none, then the position is absolute -- 368 | # (both should be set) 369 | if tka_kwargs['offset'] is not None: 370 | subpredicates.append(util.make_predicate( 371 | this_pair, "tka_position", tka_kwargs['offset'])) 372 | 373 | subpredicates.append(util.make_predicate( 374 | this_pair, "tkb_position", tkb_kwargs['offset'])) 375 | 376 | # otherwise, position is relative (but we still care) 377 | elif tkb_kwargs['offset'] is not None: 378 | subpredicates.append(util.make_predicate( 379 | this_pair, "tkb_position", 380 | "{}.tka_position".format(this_pair), 381 | '>')) 382 | 383 | if head is not None: 384 | if head == 'tka': 385 | subpredicates.append(util.make_predicate( 386 | this_pair, "tkb_headposition", 387 | "{}.{}".format(this_pair, "tka_position"))) 388 | elif head == 'tkb': 389 | subpredicates.append(util.make_predicate( 390 | this_pair, "tka_headposition", 391 | "{}.{}".format(this_pair, "tkb_position"))) 392 | 393 | return (subrelations, [s for s in subpredicates if s is not None]) 394 | 395 | 396 | class NgramDbQuery(object): 397 | 398 | def __init__( 399 | self, 400 | words=None, 401 | postags=None, 402 | deprels=None, 403 | headids=None, 404 | ignore_position=False, 405 | absolute_position=False, 406 | limit=None, 407 | threshold=None, 408 | description="[ ngramdb query ]", 409 | output="TEMPOUT"): 410 | 411 | # no conflicting args!! 412 | if ignore_position and absolute_position: 413 | raise ValueError("ignore_position and absolute_position cannot" 414 | "both be True") 415 | 416 | # make sure we have enough reasonable words, or the query could freeze 417 | # the db 418 | if words is None: 419 | raise ValueError("'words' keyword argument must have a list of " 420 | "at least " + str(MIN_WORD_COUNT) + " word(s)") 421 | 422 | if sum(1 for w in words if w is not None) < MIN_WORD_COUNT: 423 | raise ValueError("'words' keyword argument must have a list of " 424 | "at least " + str(MIN_WORD_COUNT) + " word(s)") 425 | 426 | if all(len(w) < MIN_WORD_LEN for w in words if w is not None): 427 | raise ValueError("'words' keyword argument must have at least 1 " 428 | "word that is " + str(MIN_WORD_LEN) + 429 | " or more letters long") 430 | 431 | # error check the rest of the arguments 432 | try: 433 | self.ngram_length, max_name = max([(len(x), y) for x, y in zip( 434 | (words, postags, deprels, headids), 435 | ("words", "postags", "deprels", "headids")) 436 | if x is not None], 437 | key=lambda x: x[0]) 438 | 439 | except TypeError as e: 440 | raise ValueError("Must provide at least keyword arg of 'words', " 441 | "'posids', 'depids', 'headids'") 442 | 443 | def check_arg(kw, arg): 444 | if arg is not None: 445 | if len(arg) != self.ngram_length: 446 | raise ValueError( 447 | "{} and {} must have same number of items" 448 | " (need {}, found {})".format( 449 | max_name, kw, self.ngram_length, len(arg))) 450 | 451 | else: 452 | return arg 453 | 454 | else: 455 | return [None for _ in range(self.ngram_length)] 456 | 457 | # normalize arguments 458 | self.words = [x.lower() if x is not None else x 459 | for x in check_arg('words', words)] 460 | self.postags = [x.upper() if x is not None else x 461 | for x in check_arg('posids', postags)] 462 | self.deprels = [x for x in check_arg('depids', deprels)] 463 | self.headids = [int(x) if x is not None else x 464 | for x in check_arg('headids', headids)] 465 | 466 | # set all the other stuff 467 | self.ignore_position = ignore_position 468 | self.absolute_position = absolute_position 469 | self.limit = limit 470 | self.threshold = threshold 471 | self.description = description 472 | self.output = output 473 | 474 | def __eq__(self, other): 475 | ''' 476 | Implementing for caching with these as keys to a dictionary. 477 | 478 | Would be as simple as compairing self.__dict__.items() (as below in 479 | __str__) but there is some output info stored that we don't want to 480 | compare. 481 | ''' 482 | return (self.words == other.words and 483 | self.postags == other.postags and 484 | self.deprels == other.deprels and 485 | self.headids == other.headids and 486 | self.ignore_position == other.ignore_position and 487 | self.absolute_position == other.absolute_position and 488 | self.limit == other.limit and 489 | self.threshold == other.threshold) 490 | 491 | def __ne__(self, other): 492 | ''' 493 | Yep, this doesn't happen automatically. Thanks, python. 494 | ''' 495 | return not self == other 496 | 497 | def __hash__(self): 498 | ''' 499 | Implementing for caching with these as keys to a dictionary. 500 | ''' 501 | # Can't use None in hashing because it can return inconsistent numbers 502 | # (as it just uses None's address in memory, which changes if the OS 503 | # has memory randomization turned on). 504 | words = tuple([w if w is not None else '' for w in self.words]) 505 | postags = tuple([p if p is not None else '' for p in self.postags]) 506 | deprels = tuple([d if d is not None else '' for d in self.deprels]) 507 | headids = tuple([h if h is not None else '' for h in self.headids]) 508 | limit = 0 if self.limit is None else self.limit 509 | threshold = 0 if self.threshold is None else self.threshold 510 | return hash(( 511 | words, 512 | postags, 513 | deprels, 514 | headids, 515 | self.ignore_position, 516 | self.absolute_position, 517 | limit, 518 | threshold)) 519 | 520 | def __str__(self): 521 | values = ["{}={}".format(k, repr(v)) 522 | for k, v in sorted(self.__dict__.items(), 523 | key=lambda x: type(x[1]))] 524 | return '\n'.join(values) 525 | -------------------------------------------------------------------------------- /data/verbphysics/objects/train-20/train.csv: -------------------------------------------------------------------------------- 1 | ,obj1,obj2,size-agree,size-maj,weight-agree,weight-maj,strength-agree,strength-maj,rigidness-agree,rigidness-maj,speed-agree,speed-maj 2 | 0,person,dress,1,-42,3,1,3,1,1,-42,1,-42 3 | 1,person,step,3,1,3,1,3,-1,2,-1,3,1 4 | 2,body,mouth,3,1,3,1,3,1,2,1,2,-1 5 | 3,sun,coal,3,1,3,1,2,1,1,-42,2,1 6 | 4,vessel,something,3,-42,3,-42,3,-42,3,-42,3,-42 7 | 5,place,farm,3,-42,3,-42,2,-42,3,-42,2,0 8 | 6,master,dress,3,1,3,1,3,1,3,1,3,1 9 | 7,ground,body,3,1,3,1,3,1,3,1,3,-1 10 | 8,ash,mouth,3,-1,3,-1,3,-1,2,-1,2,-1 11 | 9,gentleman,knife,3,1,3,1,2,-1,2,-1,2,1 12 | 10,train,face,3,1,3,1,3,1,3,1,3,1 13 | 11,friend,mouth,3,1,3,1,3,1,2,1,2,0 14 | 12,energy,sun,2,-42,2,-1,1,-42,2,-42,2,-42 15 | 13,father,basin,3,1,2,1,2,1,2,-1,3,1 16 | 14,bag,gate,3,-1,3,-1,3,-1,3,-1,2,-42 17 | 15,brother,book,3,1,3,1,3,1,3,-1,3,1 18 | 16,way,road,2,-42,3,-42,2,0,2,0,2,0 19 | 17,back,something,2,-42,2,-42,2,-42,2,-42,2,-42 20 | 18,lady,car,3,-1,3,-1,3,-1,3,-1,3,-1 21 | 19,dinner,daughter,3,-1,3,-1,3,-1,1,-42,3,-1 22 | 20,person,lad,3,0,3,0,3,0,3,0,3,0 23 | 21,fist,hand,2,-1,3,0,2,0,2,0,2,0 24 | 22,ground,room,1,-42,2,1,2,1,1,-42,2,-42 25 | 23,child,doorway,3,-1,2,-1,2,-1,3,-1,3,1 26 | 24,victim,face,3,1,3,1,2,1,3,0,2,0 27 | 25,rain,light,1,-42,1,-42,2,-42,1,-42,3,-1 28 | 26,horse,coal,3,1,3,1,2,1,2,-1,3,1 29 | 27,poet,door,2,-1,2,1,1,-42,2,-1,2,-42 30 | 28,brother,ball,3,1,3,1,3,1,3,-1,2,-1 31 | 29,lady,direction,3,-42,2,-42,2,-42,2,-42,2,-42 32 | 30,house,sea,3,-1,2,-1,3,-42,2,-42,2,-1 33 | 31,coach,arm,3,1,3,1,3,1,2,0,2,0 34 | 32,lady,object,3,-42,3,-42,3,-42,3,-42,3,-42 35 | 33,something,hand,2,1,2,1,2,-42,2,-42,2,-42 36 | 34,father,seal,1,-42,1,-42,1,-42,2,-42,2,-42 37 | 35,edition,place,3,-42,3,-42,3,-42,3,-42,3,-42 38 | 36,room,wife,2,1,2,1,2,1,2,1,2,-42 39 | 37,messenger,camp,2,-1,2,-1,2,-42,2,-1,2,1 40 | 38,window,floor,3,-1,3,-1,3,-1,2,0,3,0 41 | 39,place,hand,3,1,2,1,2,1,2,1,3,-1 42 | 40,door,floor,3,-1,3,-1,2,0,3,0,2,1 43 | 41,bay,boat,3,1,2,-42,1,-42,1,-42,3,-1 44 | 42,food,way,3,-42,3,-42,3,-42,3,-42,3,-42 45 | 43,hat,back,3,-1,3,-1,3,-1,3,-1,2,-1 46 | 44,someone,dinner,3,1,3,1,2,1,1,-42,3,1 47 | 45,someone,fool,2,-42,1,-42,2,-42,2,-42,2,-42 48 | 46,stone,hand,1,-42,2,1,3,1,3,1,2,-1 49 | 47,ice,head,2,-42,1,-42,2,1,2,1,2,-42 50 | 48,coach,hat,3,1,3,1,3,1,3,1,3,1 51 | 49,ear,something,3,-42,3,-42,3,-42,3,-42,3,-42 52 | 50,someone,boy,2,-42,2,-42,2,-42,2,0,1,-42 53 | 51,stone,bed,2,-1,2,-1,2,1,3,1,1,-42 54 | 52,person,daughter,2,1,2,1,2,1,1,-42,2,1 55 | 53,person,barn,3,-1,3,-1,3,-1,3,-1,3,1 56 | 54,sun,tree,3,1,3,1,2,1,2,-42,2,1 57 | 55,door,light,2,1,3,1,3,1,2,1,2,-1 58 | 56,ball,mouth,1,-42,1,-42,1,-42,2,1,1,-42 59 | 57,child,picture,3,1,3,1,3,1,3,-1,3,1 60 | 58,brother,hand,3,1,3,1,3,1,2,0,3,0 61 | 59,back,air,2,-42,2,1,2,1,2,1,2,-42 62 | 60,gentleman,ball,2,1,2,1,2,1,2,1,1,-42 63 | 61,window,end,2,-42,2,-42,2,-42,2,-42,2,-42 64 | 62,step,road,3,-1,3,-1,3,-1,2,-1,2,1 65 | 63,result,element,2,-42,1,-42,1,-42,2,-42,2,-42 66 | 64,parent,child,3,1,3,1,3,1,2,0,2,0 67 | 65,sun,sail,3,1,3,1,2,1,2,-42,2,1 68 | 66,river,breath,2,1,3,1,3,1,2,1,3,1 69 | 67,vessel,anchor,3,1,3,1,2,1,2,0,2,1 70 | 68,friend,newspaper,3,1,3,1,3,1,3,1,3,1 71 | 69,everything,master,3,1,3,1,2,1,2,-42,2,-42 72 | 70,coast,place,2,1,2,1,2,-42,2,-42,1,-42 73 | 71,state,way,2,-42,2,-42,2,-42,2,-42,3,-42 74 | 72,anchor,mouth,2,1,2,1,2,1,2,1,3,-42 75 | 73,hair,room,3,-1,3,-1,3,-1,3,-1,1,-42 76 | 74,sea,sail,3,1,3,1,2,1,2,-1,2,1 77 | 75,temple,something,3,-42,3,-42,3,-42,3,-42,3,-42 78 | 76,system,end,3,-42,3,-42,3,-42,3,-42,3,-42 79 | 77,stone,way,2,-42,2,-42,2,-42,2,-42,2,-42 80 | 78,sun,ear,3,1,3,1,3,1,2,1,2,1 81 | 79,anything,end,2,-42,3,-42,3,-42,3,-42,2,-42 82 | 80,father,truck,3,-1,3,-1,3,-1,3,-1,3,-1 83 | 81,head,ball,2,0,3,1,2,1,1,-42,3,-1 84 | 82,hip,hand,3,1,3,1,2,1,1,-42,2,-1 85 | 83,body,direction,2,-42,2,-42,2,-42,2,-42,2,-42 86 | 84,king,camp,3,-1,3,-1,2,-1,1,-42,3,1 87 | 85,bag,way,3,-42,3,-42,3,-42,3,-42,3,-42 88 | 86,person,wife,3,0,3,0,3,0,3,0,3,0 89 | 87,hair,floor,3,-1,3,-1,3,-1,3,-1,2,1 90 | 88,ball,light,1,-42,2,1,2,1,1,-42,3,-1 91 | 89,heaven,face,2,1,2,-42,2,-42,2,-42,2,-1 92 | 90,knife,throat,1,-42,2,-1,2,1,2,1,2,-42 93 | 91,someone,light,1,-42,3,1,2,1,2,-1,2,-1 94 | 92,chair,window,2,0,2,1,3,1,2,0,3,0 95 | 93,person,fox,3,1,3,1,2,1,2,0,3,-1 96 | 94,sea,middle,2,-42,3,-42,3,-42,3,-42,3,-42 97 | 95,messenger,master,1,-42,1,-42,1,-42,2,0,1,-42 98 | 96,system,something,3,-42,3,-42,3,-42,3,-42,3,-42 99 | 97,shirt,hand,3,1,2,-1,2,-1,1,-42,1,-42 100 | 98,person,ice,3,1,3,1,3,1,2,-1,2,1 101 | 99,step,flood,2,-1,2,-42,2,-42,2,-42,2,-42 102 | 100,daughter,call,2,-42,2,1,2,-42,2,-42,3,-42 103 | 101,eye,fist,3,-1,3,-1,3,-1,2,-1,2,-42 104 | 102,house,hill,3,-1,3,-1,3,-1,2,-1,2,0 105 | 103,stream,hand,3,1,2,1,2,-42,2,-1,3,1 106 | 104,current,shore,2,1,1,-42,2,1,2,-1,3,1 107 | 105,sea,call,2,-42,2,-42,2,-42,2,-42,2,-42 108 | 106,ship,hand,3,1,3,1,3,1,3,1,1,-42 109 | 107,child,glass,2,1,2,1,1,-42,2,-1,2,1 110 | 108,way,end,3,-42,3,-42,3,-42,3,-42,3,-42 111 | 109,lady,eye,2,1,2,1,2,1,2,1,1,-42 112 | 110,house,back,1,-42,1,-42,1,-42,1,-42,2,-1 113 | 111,fist,mouth,2,0,1,-42,2,1,2,1,1,-42 114 | 112,door,wife,2,1,1,-42,1,-42,2,1,2,-1 115 | 113,bay,way,3,-42,3,-42,3,-42,3,-42,3,-42 116 | 114,object,hand,3,-42,3,-42,3,-42,3,-42,3,-42 117 | 115,flood,end,3,-42,3,-42,3,-42,3,-42,3,-42 118 | 116,eye,direction,3,-42,3,-42,3,-42,2,-42,2,-42 119 | 117,river,boat,3,1,3,1,2,1,2,-1,2,1 120 | 118,brother,coal,3,1,2,1,3,1,2,-1,3,1 121 | 119,victim,house,3,-1,3,-1,2,-1,2,-1,3,1 122 | 120,brother,clothes,2,1,3,1,3,1,3,1,2,0 123 | 121,child,purse,3,1,3,1,3,1,2,-1,3,1 124 | 122,bank,flood,3,-42,3,-42,2,-42,2,-42,2,-1 125 | 123,house,farm,3,-1,3,-1,2,-1,1,-42,3,0 126 | 124,side,current,3,-42,3,-42,3,-42,2,-42,2,-42 127 | 125,gentleman,book,3,1,3,1,3,1,3,-1,3,1 128 | 126,ground,king,3,1,3,1,3,1,3,1,3,-1 129 | 127,father,world,3,-1,3,-1,3,-1,3,-1,2,1 130 | 128,wall,hand,3,1,3,1,3,1,3,1,3,-1 131 | 129,grass,hand,3,-1,3,-1,3,-1,2,1,3,-1 132 | 130,bank,suit,3,1,3,1,3,1,3,1,1,-42 133 | 131,patient,glass,2,1,3,1,1,-42,2,-1,2,1 134 | 132,gentleman,train,3,-1,3,-1,3,-1,3,-1,3,-1 135 | 133,meal,piece,3,1,3,1,2,-42,2,-42,2,-42 136 | 134,sun,breath,3,1,3,1,3,1,1,-42,1,-42 137 | 135,everything,child,3,1,3,1,3,1,2,1,2,1 138 | 136,hat,response,2,-42,2,1,2,-42,1,-42,3,-1 139 | 137,torrent,mountain,2,-1,2,-1,1,-42,2,-42,3,1 140 | 138,boy,farm,3,-1,2,-1,3,-1,2,-1,3,1 141 | 139,office,picture,3,1,3,1,1,-42,1,-42,3,0 142 | 140,gentleman,stream,3,-1,3,-1,1,-42,3,1,2,-1 143 | 141,house,barn,1,-42,1,-42,2,0,2,0,2,0 144 | 142,bag,everything,2,-42,2,-42,2,-42,2,-42,2,-42 145 | 143,coach,bank,2,-1,2,-1,2,-1,2,-1,2,1 146 | 144,gentleman,eye,3,1,3,1,3,1,2,-1,2,1 147 | 145,person,ship,3,-1,3,-1,3,-1,3,-1,2,-1 148 | 146,someone,eye,2,1,2,1,2,1,1,-42,1,-42 149 | 147,father,light,2,-42,3,1,2,1,3,1,2,-1 150 | 148,river,sun,3,-1,3,-1,3,-1,2,-1,2,-42 151 | 149,sun,head,3,1,3,1,3,1,2,1,2,1 152 | 150,someone,piece,2,-42,2,-42,3,-42,3,-42,3,-42 153 | 151,gentleman,room,3,-1,3,-1,3,-1,3,-1,3,1 154 | 152,lady,stream,3,-1,2,-1,2,1,2,1,3,-1 155 | 153,foot,wall,3,-1,3,-1,3,-1,3,-1,3,1 156 | 154,breath,soul,2,-42,2,0,1,-42,2,0,1,-42 157 | 155,daughter,anything,2,-42,2,-42,3,-42,2,-42,3,-42 158 | 156,back,room,2,-42,2,-42,2,-42,2,-42,2,-42 159 | 157,scene,room,2,-42,2,-42,2,-42,2,-42,2,-42 160 | 158,hair,effect,3,-42,2,-42,2,-42,3,-42,2,-42 161 | 159,king,effect,1,-42,2,-42,2,-42,2,-42,2,-42 162 | 160,car,hand,3,1,3,1,3,1,3,1,3,1 163 | 161,town,picture,3,1,3,1,3,1,2,1,2,-42 164 | 162,lady,picture,3,1,3,1,3,1,2,-1,3,1 165 | 163,window,air,2,-1,3,1,2,1,3,1,3,-1 166 | 164,piano,suit,3,1,3,1,3,1,2,1,2,-42 167 | 165,father,bag,3,1,3,1,3,1,2,1,3,1 168 | 166,exile,end,2,-42,2,-42,2,-42,2,-42,2,-42 169 | 167,house,picture,3,1,3,1,3,1,3,1,2,-1 170 | 168,office,air,2,-1,2,1,2,-42,1,-42,2,-1 171 | 169,skirt,knee,1,-42,2,-1,1,-42,2,-1,2,-1 172 | 170,body,room,3,-1,2,-1,2,-1,3,-1,3,1 173 | 171,someone,child,3,1,3,1,3,1,2,0,1,-42 174 | 172,lady,hand,2,1,2,1,2,0,1,-42,1,-42 175 | 173,person,elbow,3,1,3,1,2,1,2,0,2,0 176 | 174,river,current,3,1,2,1,2,-1,2,0,2,-1 177 | 175,head,light,2,-1,3,1,3,1,3,1,2,-1 178 | 176,fox,goose,2,1,2,1,2,1,3,0,2,1 179 | 177,person,deck,3,-1,3,-1,3,-1,3,-1,3,1 180 | 178,boy,something,3,-42,3,-42,3,-42,3,-42,3,-42 181 | 179,phone,room,3,-1,3,-1,2,-1,2,-1,1,-42 182 | 180,call,way,2,-42,2,-42,2,-42,2,-42,2,-42 183 | 181,boy,face,3,1,3,1,3,1,1,-42,2,0 184 | 182,energy,hand,2,-42,2,-1,1,-42,2,-1,2,1 185 | 183,stone,direction,3,-42,3,-42,3,-42,3,-42,3,-42 186 | 184,state,step,3,1,2,1,2,1,2,1,1,-42 187 | 185,shoulder,light,3,-42,2,1,2,-42,2,1,2,-1 188 | 186,house,mouth,3,1,2,1,3,1,3,1,3,-1 189 | 187,father,bay,3,-1,2,-1,3,-1,2,1,2,1 190 | 188,side,soul,2,-42,2,1,2,-42,3,1,3,-42 191 | 189,front,light,3,-42,2,-42,2,-42,2,-42,2,-42 192 | 190,finger,grass,2,1,3,1,3,1,2,1,3,1 193 | 191,town,newspaper,3,1,3,1,2,1,3,1,2,-1 194 | 192,body,effect,3,-42,3,-42,3,-42,3,-42,3,-42 195 | 193,side,everything,2,-1,2,-1,3,-42,3,-42,3,-42 196 | 194,hat,way,3,-42,3,-42,3,-42,3,-42,3,-42 197 | 195,world,anything,2,1,2,1,2,-42,2,-42,2,1 198 | 196,piece,knee,2,-42,2,-42,2,-42,2,-42,2,-42 199 | 197,office,wind,1,-42,2,1,2,-1,1,-42,3,-1 200 | 198,magistrate,wife,2,1,2,1,1,-42,1,-42,1,-42 201 | 199,person,fool,3,0,3,0,3,0,3,0,3,0 202 | 200,master,road,2,-1,2,-1,2,-1,2,-1,2,1 203 | 201,father,coast,2,-1,2,-1,2,-42,2,-1,2,1 204 | 202,sea,city,2,1,2,-42,2,-42,2,-1,2,-42 205 | 203,boy,lamp,3,1,3,1,3,1,3,-1,3,1 206 | 204,coach,door,2,-42,2,-42,2,-42,2,-42,2,-42 207 | 205,anchor,middle,3,-42,3,-42,3,-42,3,-42,3,-42 208 | 206,messenger,boy,2,0,2,0,2,0,3,0,2,0 209 | 207,father,step,3,1,2,1,1,-42,2,-1,2,1 210 | 208,meal,child,3,-1,3,-1,3,-1,2,-1,2,-1 211 | 209,block,wall,3,-1,2,-1,2,-1,2,0,2,0 212 | 210,home,city,3,-1,2,-1,2,0,2,-1,2,0 213 | 211,father,wife,2,1,2,1,2,1,2,0,2,0 214 | 212,side,beach,2,-1,2,-42,3,-42,2,-42,2,-42 215 | 213,goose,hill,3,-1,3,-1,3,-1,3,-1,3,1 216 | 214,horse,room,3,-1,3,-1,2,-1,3,-1,2,1 217 | 215,air,light,2,-42,1,-42,1,-42,2,-1,1,-42 218 | 216,person,spoon,3,1,3,1,2,1,3,-1,3,1 219 | 217,newspaper,floor,2,-1,2,-1,2,-1,3,-1,2,-42 220 | 218,car,end,3,-42,3,-42,3,-42,3,-42,3,-42 221 | 219,servant,room,3,-1,3,-1,3,-1,3,-1,3,1 222 | 220,eye,car,3,-1,3,-1,3,-1,3,-1,3,-1 223 | 221,base,hand,2,1,2,1,1,-42,2,0,2,-42 224 | 222,clock,direction,3,-42,2,-42,3,-42,3,-42,3,-42 225 | 223,father,clothes,3,1,3,1,2,1,2,1,2,1 226 | 224,wall,room,1,-42,1,-42,2,1,1,-42,2,-42 227 | 225,room,piece,2,-42,2,-42,2,-42,2,-42,2,-42 228 | 226,energy,room,2,-42,2,-1,2,-42,2,-1,2,1 229 | 227,sea,light,2,1,3,1,2,1,2,-42,3,-1 230 | 228,gentleman,everything,2,-1,2,-1,2,-1,2,-42,1,-42 231 | 229,clothes,bed,2,-1,2,-1,2,-1,2,-1,2,0 232 | 230,someone,grass,3,1,3,1,3,1,3,1,3,1 233 | 231,something,end,3,-42,3,-42,3,-42,3,-42,3,-42 234 | 232,city,impatient,2,-42,2,-42,2,-42,2,-42,3,-42 235 | 233,horse,direction,3,-42,2,-42,2,-42,2,-42,3,-42 236 | 234,king,ship,2,-1,2,-1,2,-1,2,-1,2,-1 237 | 235,head,direction,3,-42,3,-42,3,-42,3,-42,2,-42 238 | 236,city,dress,3,1,3,1,3,1,3,1,1,-42 239 | 237,current,end,3,-42,3,-42,3,-42,3,-42,3,-42 240 | 238,person,boat,3,-1,3,-1,3,-1,3,-1,2,-1 241 | 239,anything,vessel,3,-42,3,-42,3,-42,3,-42,3,-42 242 | 240,dinner,dress,3,-1,2,0,2,-1,2,-42,2,0 243 | 241,person,precipice,2,-1,2,-1,2,-1,2,-1,3,1 244 | 242,harlot,face,3,1,3,1,3,1,3,-1,2,1 245 | 243,master,violin,3,1,3,1,3,1,3,-1,2,1 246 | 244,call,door,3,-42,3,-42,3,-42,3,-42,3,-42 247 | 245,person,clothes,2,1,3,1,3,1,2,1,3,1 248 | 246,dress,face,3,1,2,-1,2,1,2,-1,2,-42 249 | 247,king,knee,3,1,3,1,2,1,2,-1,2,1 250 | 248,window,front,3,-42,3,-42,3,-42,3,-42,2,-42 251 | 249,stone,torrent,2,-1,2,-1,2,-42,2,-42,2,-1 252 | 250,friend,bed,3,-1,2,-1,2,-1,3,-1,2,1 253 | 251,seal,way,3,-42,3,-42,3,-42,3,-42,3,-42 254 | 252,brother,doorway,3,-1,1,-42,2,-1,3,-1,3,1 255 | 253,cup,air,2,-1,2,1,2,1,2,1,2,-1 256 | 254,state,city,3,1,2,1,1,-42,2,1,1,-42 257 | 255,glass,hand,1,-42,2,0,3,-1,3,1,2,-1 258 | 256,father,child,3,1,3,1,3,1,2,1,2,-1 259 | 257,ship,rain,2,1,3,1,2,1,3,1,1,-42 260 | 258,messenger,city,3,-1,3,-1,3,-1,3,-1,2,1 261 | 259,wife,mouth,3,1,3,1,3,1,2,1,3,1 262 | 260,state,piece,3,1,3,1,3,1,2,1,2,-1 263 | 261,state,book,3,1,2,1,2,1,2,1,2,-42 264 | 262,person,vial,3,1,3,1,3,1,3,-1,3,1 265 | 263,someone,step,3,1,3,1,2,-1,3,-1,3,1 266 | 264,hand,position,3,-42,2,-42,2,-42,3,-42,3,-42 267 | 265,brother,face,3,1,3,1,3,1,2,1,3,1 268 | 266,gentleman,bottle,3,1,3,1,3,1,3,-1,3,1 269 | 267,body,breath,3,1,3,1,3,1,3,1,1,-42 270 | 268,body,master,3,0,3,0,3,0,3,0,3,0 271 | 269,brother,daughter,2,1,2,1,3,1,2,0,2,0 272 | 270,something,mouth,2,-42,2,-42,2,-42,2,-42,2,-42 273 | 271,eye,road,3,-1,3,-1,3,-1,3,-1,2,1 274 | 272,coach,front,3,-42,3,-42,3,-42,3,-42,3,-42 275 | 273,watch,house,3,-1,3,-1,3,-1,2,-1,2,-42 276 | 274,ash,floor,3,-1,3,-1,3,-1,3,-1,2,1 277 | 275,horse,bed,2,1,2,1,2,1,2,-1,3,1 278 | 276,hair,body,3,-1,3,-1,3,-1,3,-1,3,0 279 | 277,head,doorway,3,-1,3,-1,2,-1,2,-1,2,1 280 | 278,lady,fool,2,-42,2,-42,2,-42,2,-42,2,-42 281 | 279,gentleman,purse,3,1,3,1,3,1,1,-42,3,1 282 | 280,result,position,1,-42,2,1,2,1,2,1,2,-42 283 | 281,street,middle,3,-42,3,-42,3,-42,3,-42,3,-42 284 | 282,hair,wind,3,-1,2,1,2,-1,3,1,2,-1 285 | 283,train,effect,2,-42,1,-42,1,-42,2,1,2,1 286 | 284,lad,camp,3,-1,2,-1,2,-1,3,-1,2,1 287 | 285,place,ship,2,1,2,-42,2,-42,2,-42,2,-1 288 | 286,fox,hand,3,1,3,1,2,1,2,-1,3,1 289 | 287,meal,ground,2,-1,2,-1,2,-1,2,-1,2,-42 290 | 288,bank,hill,3,-1,3,-1,2,-1,2,0,3,0 291 | 289,boy,call,2,-42,2,-42,2,-42,2,-42,2,-42 292 | 290,father,pocket,3,1,3,1,3,1,2,1,3,1 293 | 291,food,car,3,-1,3,-1,3,-1,3,-1,3,-1 294 | 292,sea,basin,3,1,3,1,1,-42,3,-1,2,1 295 | 293,clothes,body,2,-1,3,-1,2,-1,3,-1,1,-42 296 | 294,arm,king,3,-1,3,-1,1,-42,2,0,2,0 297 | 295,boat,servant,3,1,3,1,3,1,3,1,3,1 298 | 296,train,servant,3,1,3,1,3,1,3,1,3,1 299 | 297,sun,eye,3,1,3,1,3,1,2,1,2,1 300 | 298,world,eye,3,1,3,1,3,1,2,1,2,-1 301 | 299,place,store,2,1,1,-42,2,-1,2,-1,1,-42 302 | 300,everything,ball,1,-42,1,-42,2,0,1,-42,2,-42 303 | 301,brother,back,2,1,3,1,1,-42,1,-42,2,0 304 | 302,coach,wife,2,0,2,0,1,-42,2,0,2,0 305 | 303,wife,book,3,1,3,1,2,1,2,-1,3,1 306 | 304,ground,road,2,1,2,1,1,-42,2,-1,1,-42 307 | 305,poet,picture,3,1,3,1,3,1,2,-1,3,1 308 | 306,hat,arm,3,-1,2,-1,3,-1,2,-1,2,-1 309 | 307,way,doorway,2,1,1,-42,1,-42,1,-42,1,-42 310 | 308,servant,floor,3,-1,2,-1,2,-1,3,-1,3,1 311 | 309,worker,wind,2,-1,3,1,1,-42,3,1,2,-1 312 | 310,way,wind,2,-42,2,-42,2,-42,3,-42,2,-42 313 | 311,system,picture,2,-42,2,-42,2,-42,2,-42,2,-42 314 | 312,button,eye,3,-1,3,-1,3,1,3,1,2,-1 315 | 313,back,light,1,-42,3,1,3,1,2,1,2,-1 316 | 314,daughter,picture,3,1,3,1,3,1,2,-1,3,1 317 | 315,knife,store,3,-1,3,-1,2,-1,2,0,2,-42 318 | 316,watch,face,3,-1,3,-1,2,1,3,1,2,-42 319 | 317,person,dinner,3,1,3,1,3,1,3,1,3,1 320 | 318,brother,hair,3,1,3,1,3,1,3,1,2,1 321 | 319,teacher,air,2,-1,2,1,1,-42,2,1,2,1 322 | 320,body,face,3,1,3,1,3,1,3,0,1,-42 323 | 321,daughter,bottle,3,1,3,1,3,1,2,-1,3,1 324 | 322,bag,mouth,3,1,3,-1,3,-1,3,-1,2,-42 325 | 323,state,door,3,1,2,1,2,-42,3,-42,2,-42 326 | 324,bay,direction,2,-42,2,-42,2,-42,2,-42,3,-42 327 | 325,ship,element,3,1,3,1,3,1,3,1,3,1 328 | 326,street,way,1,-42,1,-42,1,-42,1,-42,2,-42 329 | 327,father,strap,3,1,3,1,3,1,2,-1,2,1 330 | 328,father,coal,2,1,2,1,2,1,2,-1,3,1 331 | 329,corner,road,2,-1,2,-1,1,-42,1,-42,2,0 332 | 330,brother,room,3,-1,3,-1,2,-1,3,-1,3,1 333 | 331,ground,way,2,-42,2,-42,1,-42,1,-42,2,-42 334 | 332,chair,back,2,1,2,1,2,-1,3,1,2,-1 335 | 333,gentleman,picture,3,1,3,1,3,1,2,-1,3,1 336 | 334,servant,end,2,-42,2,-42,2,-42,2,-42,2,-42 337 | 335,gulp,throat,3,-1,2,-1,2,-1,3,-1,3,1 338 | 336,messenger,door,3,-1,2,1,3,-1,3,-1,2,1 339 | 337,edition,position,3,-42,3,-42,3,-42,3,-42,3,-42 340 | 338,parcel,mouth,2,1,2,1,1,-42,3,1,2,-1 341 | 339,air,shore,2,1,2,-1,2,-1,2,-1,2,1 342 | 340,magistrate,door,3,-1,3,1,3,-1,3,-1,3,1 343 | 341,lady,air,2,-1,3,1,3,1,2,1,2,-1 344 | 342,friend,piece,2,1,2,1,2,1,2,-42,2,1 345 | 343,king,middle,2,-42,2,-42,2,-42,3,-42,3,-42 346 | 344,lady,piece,2,1,2,1,2,1,2,1,2,1 347 | 345,beard,face,3,-1,3,-1,2,-1,3,-1,1,-42 348 | 346,poet,something,2,-42,2,-42,2,-42,2,-42,2,-42 349 | 347,friend,shore,3,-1,3,-1,3,-1,2,-1,1,-42 350 | 348,state,king,3,1,3,1,2,-42,2,1,1,-42 351 | 349,boat,face,3,1,3,1,3,1,3,1,3,1 352 | 350,hat,step,2,-1,2,-1,2,-1,2,-1,1,-42 353 | 351,servant,vial,2,1,2,1,2,1,2,-1,2,1 354 | 352,father,stone,3,1,2,1,3,-1,3,-1,3,1 355 | 353,boy,stream,2,-1,2,1,1,-42,3,1,1,-42 356 | 354,brother,house,3,-1,3,-1,3,-1,3,-1,3,1 357 | 355,door,knee,3,1,3,1,2,1,3,1,3,-1 358 | 356,current,face,3,1,3,1,3,1,2,1,3,1 359 | 357,hedge,way,2,-42,2,-42,2,-42,2,-42,2,-42 360 | 358,train,shore,2,-1,1,-42,2,-42,3,1,3,1 361 | 359,end,road,2,-1,2,-42,2,-42,1,-42,2,-42 362 | 360,person,door,2,-1,1,-42,2,-1,3,-1,3,1 363 | 361,light,face,1,-42,2,-1,2,-1,2,-1,2,1 364 | 362,person,gulp,2,1,2,1,2,-42,3,-42,3,-42 365 | 363,friend,car,3,-1,3,-1,3,-1,3,-1,3,-1 366 | 364,bank,house,2,1,2,1,2,1,2,1,3,0 367 | 365,way,ship,2,-42,2,-42,3,-42,2,-1,3,-42 368 | 366,watch,camp,3,-1,3,-1,1,-42,2,-42,1,-42 369 | 367,wife,newspaper,3,1,3,1,3,1,2,1,3,1 370 | 368,ship,road,1,-42,2,1,2,-1,2,-1,3,1 371 | 369,servant,bottle,3,1,3,1,3,1,3,-1,3,1 372 | 370,daughter,wife,2,-1,2,-1,2,-1,2,-1,2,-1 373 | 371,daughter,exile,3,-42,3,-42,3,-42,3,-42,3,-42 374 | 372,bank,grass,2,1,3,1,3,1,3,1,1,-42 375 | 373,place,king,3,1,3,1,3,1,3,1,3,-1 376 | 374,object,mouth,2,1,2,1,2,1,1,-42,2,-42 377 | 375,finger,pocket,2,-1,2,1,2,1,2,-1,2,1 378 | 376,soul,ship,2,-1,2,-1,2,-1,2,-1,1,-42 379 | 377,person,something,2,-42,2,-42,2,-42,3,-42,2,-42 380 | 378,truck,road,2,1,2,1,2,1,2,-1,3,1 381 | 379,hill,middle,3,-42,3,-42,3,-42,3,-42,3,-42 382 | 380,patient,head,3,1,3,1,2,0,2,0,3,0 383 | 381,boy,grass,3,1,3,1,3,1,3,1,3,1 384 | 382,messenger,something,2,-42,2,-42,2,-42,2,-42,2,-42 385 | 383,step,light,2,-42,2,1,2,1,2,1,2,-42 386 | 384,clothes,floor,3,-1,3,-1,3,-1,3,-1,2,1 387 | 385,person,road,3,-1,3,-1,2,-1,3,-1,3,1 388 | 386,boy,head,3,1,3,1,3,1,2,-1,2,0 389 | 387,lady,boy,2,1,2,1,2,1,2,0,2,0 390 | 388,father,front,3,-42,3,-42,3,-42,3,-42,3,-42 391 | 389,person,everything,2,1,2,1,2,-42,2,-42,2,1 392 | 390,way,room,3,-42,3,-42,3,-42,3,-42,3,-42 393 | 391,person,sea,3,-1,3,-1,2,-42,3,1,2,-42 394 | 392,deck,room,2,-1,3,-1,2,-1,2,-1,2,0 395 | 393,someone,store,3,-1,3,-1,3,-1,3,-1,3,1 396 | 394,eye,breath,1,-42,3,1,2,1,3,1,2,-1 397 | 395,servant,dress,3,1,3,1,3,1,3,1,3,1 398 | 396,way,ball,3,-42,2,-42,3,-42,2,-42,2,-42 399 | 397,wife,picture,2,1,2,1,2,1,2,-1,3,1 400 | 398,gentleman,city,3,-1,3,-1,3,-1,3,-1,2,1 401 | 399,hair,boy,3,-1,3,-1,3,-1,3,-1,2,-42 402 | 400,messenger,way,2,-42,2,-42,2,-42,2,-42,2,-42 403 | 401,vessel,shore,2,-1,1,-42,1,-42,2,-1,2,1 404 | 402,knee,bed,3,-1,3,-1,3,-1,3,-1,3,1 405 | 403,block,room,3,-1,2,-1,2,-1,2,0,2,0 406 | 404,shop,factory,3,-1,3,-1,2,-1,2,0,3,0 407 | 405,someone,middle,2,1,2,1,2,-42,2,-42,2,1 408 | 406,fox,street,3,-1,3,-1,3,-1,3,-1,3,1 409 | 407,something,floor,3,-1,2,-1,3,-1,3,-1,3,1 410 | 408,chest,way,3,-42,3,-42,3,-42,3,-42,3,-42 411 | 409,eye,book,3,-1,3,-1,3,-1,3,-1,1,-42 412 | 410,bag,knife,3,1,2,-1,3,-1,3,-1,1,-42 413 | 411,sea,beach,3,1,2,-42,3,1,2,-1,3,1 414 | 412,servant,town,3,-1,3,-1,3,-1,3,-1,2,1 415 | 413,lady,home,3,-1,3,-1,3,-1,3,-1,3,1 416 | 414,person,floor,2,-1,3,-1,2,-1,3,-1,3,1 417 | 415,mouth,face,3,-1,3,-1,3,0,2,0,2,0 418 | 416,food,place,2,-1,2,-1,2,-42,2,-1,2,-42 419 | 417,stair,block,2,1,1,-42,2,-1,2,0,2,0 420 | 418,father,arm,3,1,3,1,3,1,2,1,2,0 421 | 419,bed,floor,3,-1,2,-1,3,-1,3,-1,2,-42 422 | 420,bed,face,3,1,3,1,2,1,2,1,2,-1 423 | 421,shirt,skirt,3,0,2,0,3,0,3,0,3,0 424 | 422,lady,boat,3,-1,3,-1,2,-1,3,-1,2,-1 425 | 423,boy,middle,2,-42,2,-42,2,-42,3,-42,2,-42 426 | 424,lady,scene,3,-1,2,-1,2,-1,2,-1,2,1 427 | 425,room,picture,3,1,3,1,3,1,3,1,2,0 428 | 426,lady,room,3,-1,3,-1,3,-1,2,-1,3,1 429 | 427,teacher,purse,3,1,3,1,3,1,2,-42,3,1 430 | 428,piece,coal,2,-42,2,-42,2,-42,2,-42,2,0 431 | 429,city,position,2,-42,2,-42,2,-42,2,-42,2,-42 432 | 430,stair,house,3,-1,3,-1,2,-1,2,0,2,0 433 | 431,vessel,direction,3,-42,3,-42,3,-42,3,-42,3,-42 434 | 432,watch,pocket,3,-1,3,1,2,1,3,1,1,-42 435 | 433,friend,room,3,-1,2,-1,1,-42,3,-1,3,1 436 | 434,clothes,hand,3,1,2,1,3,-1,3,-1,2,-1 437 | 435,fox,back,2,1,2,1,2,1,2,-1,2,1 438 | 436,master,piece,2,1,2,-42,2,1,1,-42,2,-42 439 | 437,glass,floor,3,-1,3,-1,3,-1,2,0,2,0 440 | 438,sun,wall,3,1,3,1,3,1,1,-42,2,1 441 | 439,river,city,2,-1,3,-1,2,1,3,-1,3,1 442 | 440,messenger,farm,3,-1,3,-1,3,-1,3,-1,3,1 443 | 441,person,someone,3,0,3,0,3,0,3,0,3,0 444 | 442,boy,breath,2,1,2,1,2,1,2,1,2,1 445 | 443,gentleman,end,3,-42,3,-42,3,-42,3,-42,3,-42 446 | 444,body,floor,2,-1,2,-1,3,-1,3,-1,2,1 447 | 445,someone,floor,3,-1,3,-1,3,-1,3,-1,3,1 448 | 446,daughter,bow,3,1,3,1,3,1,2,-1,2,1 449 | 447,hair,temple,3,-1,3,-1,3,-1,3,-1,3,1 450 | 448,brother,city,3,-1,3,-1,3,-1,3,-1,3,1 451 | 449,seal,place,3,-1,3,-1,3,-1,3,-1,3,1 452 | 450,foot,street,3,-1,3,-1,2,-1,3,-1,2,1 453 | 451,person,gentleman,3,0,3,0,2,0,3,0,3,0 454 | 452,base,way,2,-42,2,-42,2,-42,2,-42,2,-42 455 | 453,step,air,2,-42,2,-42,2,-42,2,-42,2,-42 456 | 454,master,ball,3,1,3,1,3,1,2,-1,3,-1 457 | 455,foot,ground,3,-1,2,-1,2,-1,3,-1,3,1 458 | 456,brother,door,2,-1,2,1,3,-1,2,-1,3,1 459 | 457,house,floor,3,1,3,1,2,-1,2,-1,2,0 460 | 458,arm,position,2,-42,2,-42,2,-42,2,-42,2,-42 461 | 459,clock,wife,3,-1,3,-1,2,-1,2,1,3,-1 462 | 460,stone,lamp,2,-1,1,-42,3,1,3,1,2,1 463 | 461,child,farm,3,-1,3,-1,3,-1,3,-1,3,1 464 | 462,stone,light,2,-42,2,1,2,1,2,1,2,-42 465 | 463,button,side,2,-1,2,-1,2,-1,2,-42,2,-42 466 | 464,person,flood,3,-1,3,-1,3,-1,3,1,3,-1 467 | 465,patient,piece,2,-42,2,-42,3,-42,2,-42,2,-42 468 | 466,master,way,2,-42,2,-42,2,-42,2,-42,2,-42 469 | 467,chest,fist,3,1,3,1,2,-1,2,1,2,-1 470 | 468,magistrate,seal,3,1,3,1,2,1,2,1,2,1 471 | 469,place,city,1,-42,1,-42,2,0,3,0,2,0 472 | 470,brother,way,2,-42,3,-42,2,-42,2,-42,2,-42 473 | 471,office,position,2,1,2,1,2,1,2,1,2,-42 474 | 472,beard,air,2,-1,1,-42,3,-42,2,1,2,-1 475 | 473,person,result,2,-42,2,-42,2,-42,3,-42,3,-42 476 | 474,king,response,2,-42,2,-42,3,-42,3,-42,2,-42 477 | 475,block,road,2,-1,2,-1,1,-42,2,0,1,-42 478 | 476,cup,room,3,-1,3,-1,3,-1,2,0,2,0 479 | 477,parcel,piece,2,-42,2,-42,3,-42,2,-42,3,-42 480 | 478,stream,end,3,-42,3,-42,3,-42,3,-42,3,-42 481 | 479,meal,hand,3,1,2,1,3,-1,2,-1,2,-1 482 | 480,world,something,3,1,3,1,3,1,2,1,2,1 483 | 481,head,face,3,1,3,1,3,1,3,1,2,0 484 | 482,horse,piece,3,1,3,1,3,1,1,-42,3,1 485 | 483,sun,room,3,1,3,1,2,1,2,-1,2,1 486 | 484,horse,picture,3,1,3,1,2,1,2,-1,3,1 487 | 485,way,flood,3,-42,3,-42,3,-42,3,-42,3,-42 488 | 486,breath,shore,3,-1,3,-1,3,-1,3,-1,2,1 489 | 487,tear,side,2,-42,1,-42,2,-1,2,-1,2,1 490 | 488,bed,dress,3,1,3,1,3,1,3,1,2,-1 491 | 489,office,side,3,-42,3,-42,3,-42,3,-42,3,-42 492 | 490,person,world,3,-1,3,-1,3,-1,2,-1,2,-1 493 | 491,magistrate,hand,3,1,3,1,2,1,1,-42,2,0 494 | 492,lady,stone,3,1,3,1,3,-1,3,-1,2,1 495 | 493,boy,place,2,-1,2,-1,2,-1,2,-1,3,1 496 | 494,stream,direction,3,-42,3,-42,2,-42,2,-42,2,-42 497 | 495,head,grass,2,1,3,1,3,1,2,1,3,1 498 | 496,person,father,3,0,3,0,3,0,2,0,2,0 499 | 497,father,shore,3,-1,3,-1,2,-1,1,-42,2,-42 500 | 498,father,wall,3,-1,3,-1,2,-1,3,-1,3,1 501 | 499,foot,boat,3,-1,3,-1,3,-1,3,-1,2,-1 502 | 500,piece,door,2,-42,2,-42,3,-42,2,-42,2,-42 503 | 501,car,camp,2,-1,1,-42,2,-42,2,1,2,1 504 | 502,sink,something,3,-42,3,-42,3,-42,3,-42,3,-42 505 | 503,father,face,3,1,3,1,2,1,2,0,2,1 506 | 504,bank,position,2,-42,2,-42,2,-42,2,-42,2,-42 507 | 505,lady,step,2,1,2,1,2,1,2,-1,3,1 508 | 506,watch,piece,3,-42,3,-42,3,-42,3,-42,3,-42 509 | 507,step,something,3,-42,3,-42,3,-42,3,-42,3,-42 510 | 508,daughter,floor,3,-1,3,-1,3,-1,3,-1,3,1 511 | 509,person,beard,3,1,3,1,3,1,2,1,2,0 512 | 510,child,wife,3,-1,3,-1,3,-1,2,0,2,1 513 | 511,boat,town,3,-1,3,-1,3,-1,1,-42,3,1 514 | 512,lady,elbow,3,1,3,1,3,1,1,-42,3,0 515 | 513,clock,bed,3,-1,3,-1,2,-1,2,1,3,0 516 | 514,father,rain,2,1,2,1,2,1,2,1,3,-1 517 | 515,nose,everything,3,-1,3,-1,2,-1,2,-1,2,-1 518 | 516,call,end,2,-42,2,-42,2,-42,2,-42,2,-42 519 | 517,cross,side,2,-1,2,-1,2,1,2,1,2,0 520 | 518,door,car,3,-1,3,-1,1,-42,2,0,3,-1 521 | 519,place,position,2,-42,2,-42,2,-42,2,-42,2,-42 522 | 520,breath,door,2,-1,2,-1,2,-1,2,-1,2,1 523 | 521,person,state,2,-1,2,-1,2,-1,2,-42,2,1 524 | 522,home,head,3,1,3,1,3,1,1,-42,3,-1 525 | 523,energy,arm,1,-42,2,-1,2,-42,2,-1,2,1 526 | 524,spoon,hand,3,-1,3,-1,3,-1,3,1,2,-1 527 | 525,king,direction,3,-42,2,-42,2,-42,1,-42,2,-42 528 | 526,room,bed,3,1,2,1,3,1,2,1,2,0 529 | 527,eye,master,2,-1,2,-1,2,-1,2,-1,1,-42 530 | 528,state,head,2,1,2,1,2,1,2,1,2,-1 531 | 529,foot,bottle,2,0,3,-42,1,-42,3,-1,3,1 532 | 530,friend,face,3,1,3,1,3,1,1,-42,2,0 533 | 531,energy,breath,2,-42,3,-42,2,-42,3,-42,3,-42 534 | 532,way,something,3,-42,3,-42,3,-42,3,-42,3,-42 535 | 533,mountain,end,2,-42,2,1,2,1,2,1,2,-42 536 | 534,source,way,3,-42,3,-42,3,-42,3,-42,3,-42 537 | 535,father,wind,2,-1,2,1,2,1,2,1,2,-1 538 | 536,lady,bag,3,1,3,1,3,1,2,1,3,1 539 | 537,heaven,light,2,1,2,-42,2,-42,1,-42,1,-42 540 | 538,person,room,3,-1,3,-1,3,-1,3,-1,3,1 541 | 539,lady,hat,3,1,3,1,3,1,2,-1,3,1 542 | 540,river,place,3,-42,3,-42,2,-42,2,-1,2,1 543 | 541,way,wife,3,-42,3,-42,3,-42,3,-42,3,-42 544 | 542,wife,air,2,-1,2,1,2,1,2,1,2,-42 545 | 543,stream,flood,3,-1,2,-1,3,-1,1,-42,3,-1 546 | 544,king,mouth,2,1,2,1,2,1,3,0,2,0 547 | 545,boat,shore,3,-1,2,-1,1,-42,3,1,3,1 548 | 546,head,throat,3,1,3,1,3,1,1,-42,2,0 549 | 547,father,parcel,2,1,2,1,2,1,2,-1,2,1 550 | 548,finger,everything,3,-1,3,-1,2,-1,2,-42,2,-42 551 | 549,ash,hand,3,-1,3,-1,2,-1,2,-1,2,-1 552 | 550,someone,wall,3,-1,3,-1,3,-1,3,-1,3,1 553 | 551,hair,something,2,-1,2,-1,2,-1,2,-1,2,-42 554 | 552,arm,strap,2,1,2,1,2,1,1,-42,2,1 555 | 553,coach,book,3,1,3,1,3,1,2,-1,2,1 556 | 554,office,piece,3,1,2,1,2,-42,2,-42,2,-1 557 | 555,ear,mouth,1,-42,1,-42,2,-1,2,-42,2,0 558 | 556,horse,position,2,-42,2,-42,2,-42,2,-42,2,-42 559 | 557,hat,something,2,-1,2,-1,2,-1,2,-1,1,-42 560 | 558,wife,wind,2,-42,1,-42,2,-42,1,-42,2,-1 561 | 559,foot,place,3,-1,3,-1,3,-1,3,-1,3,1 562 | 560,call,friend,2,-42,2,-42,2,-42,2,-42,2,-42 563 | 561,person,phone,3,1,3,1,3,1,2,-1,2,1 564 | 562,train,room,3,1,3,1,3,1,2,1,3,1 565 | 563,back,middle,2,-1,1,-42,1,-42,1,-42,2,0 566 | 564,king,abode,3,-1,3,-1,3,-1,3,-1,3,1 567 | 565,dress,skirt,2,1,2,0,2,0,2,0,2,0 568 | 566,someone,brick,3,1,3,1,2,-1,3,-1,3,1 569 | 567,king,exile,2,0,2,0,2,0,2,0,2,0 570 | 568,fist,mountain,3,-1,3,-1,2,-1,3,-1,3,1 571 | 569,cup,middle,3,-42,3,-42,3,-42,3,-42,3,-42 572 | 570,messenger,doorway,3,-1,2,-42,1,-42,3,-1,3,1 573 | 571,stone,position,2,-42,2,1,2,1,2,1,2,-42 574 | 572,river,eye,3,1,3,1,3,1,3,-1,2,1 575 | 573,messenger,gate,2,-1,2,-1,3,-1,3,-1,2,1 576 | 574,poet,glass,3,1,3,1,3,1,3,-1,2,1 577 | 575,someone,end,2,-42,2,-42,2,-42,2,-42,2,-42 578 | 576,rain,shore,3,-42,2,-42,2,-1,2,-1,2,1 579 | 577,chair,place,2,-1,2,-1,1,-42,1,-42,1,-42 580 | 578,place,head,2,1,2,-42,2,1,1,-42,2,-1 581 | 579,world,breath,3,1,3,1,3,1,2,1,3,-1 582 | 580,house,wall,3,1,3,1,2,1,2,0,3,0 583 | 581,door,bed,2,-1,3,-1,1,-42,2,1,1,-42 584 | 582,watch,strap,1,-42,2,1,1,-42,2,1,2,-42 585 | 583,someone,object,2,1,2,1,1,-42,2,-1,2,1 586 | 584,wife,sip,2,1,2,1,2,1,2,1,1,-42 587 | 585,current,stream,2,-42,2,-42,2,-42,2,-42,2,-42 588 | 586,hair,middle,2,-42,2,-42,2,-42,2,-42,3,-42 589 | 587,person,sun,3,-1,3,-1,2,-1,2,-1,1,-42 590 | 588,river,wind,2,-42,2,-42,2,-42,2,-42,2,-42 591 | 589,child,hand,3,1,3,1,3,1,2,-1,1,-42 592 | 590,world,sun,3,-1,2,-1,2,-42,2,1,2,1 593 | 591,finger,glass,2,-1,3,-1,3,1,3,-1,3,1 594 | 592,place,friend,2,1,2,1,2,1,2,1,3,-1 595 | 593,master,room,3,-1,3,-1,3,-1,3,-1,3,1 596 | 594,gentleman,air,2,-1,3,1,1,-42,3,1,2,-1 597 | 595,river,side,2,-42,2,1,2,-42,1,-42,3,1 598 | 596,person,mouth,3,1,3,1,3,1,1,-42,2,0 599 | 597,door,doorway,2,-1,2,1,1,-42,2,0,3,1 600 | 598,result,effect,2,-42,3,-42,3,-42,3,-42,3,-42 601 | 599,watch,ship,3,-1,3,-1,2,-1,2,-1,2,-42 602 | 600,office,floor,3,1,3,1,2,0,2,0,3,0 603 | 601,person,heaven,2,-1,2,-1,2,-1,2,-1,2,-42 604 | 602,wife,glass,2,1,3,1,3,1,2,-1,3,1 605 | 603,king,air,2,-1,2,1,1,-42,2,1,2,-1 606 | 604,someone,hand,3,1,3,1,3,1,2,0,2,0 607 | 605,fool,hand,2,-42,2,-42,2,-42,2,-42,2,-42 608 | 606,house,call,2,-42,2,-42,2,-42,2,-42,2,-42 609 | 607,ice,way,3,-42,2,-42,2,-42,2,-42,2,-42 610 | 608,person,brick,3,1,3,1,2,1,3,-1,3,1 611 | 609,friend,direction,2,-42,2,-42,2,-42,2,-42,2,-42 612 | 610,piece,lip,1,-42,2,1,2,1,2,1,2,-42 613 | 611,someone,bottle,3,1,3,1,3,1,3,-1,3,1 614 | 612,person,purse,3,1,3,1,3,1,1,-42,3,1 615 | 613,victim,scene,3,-1,2,-1,2,-42,2,-42,3,1 616 | 614,cup,front,2,-1,2,-1,2,-1,1,-42,2,0 617 | 615,lady,nose,3,1,3,1,2,1,1,-42,2,0 618 | 616,town,shore,2,-42,3,-42,2,-42,2,-42,2,-42 619 | 617,friend,position,2,-42,2,-42,2,-42,2,-42,2,-42 620 | 618,brother,middle,2,-42,2,-42,2,-42,2,-42,2,-42 621 | 619,boat,lock,3,1,3,1,2,1,1,-42,3,1 622 | 620,person,sip,2,-42,2,-42,2,-42,3,-42,2,-42 623 | 621,element,hand,2,-42,2,-42,2,-42,3,-42,2,-42 624 | 622,lady,breath,3,1,3,1,3,1,2,1,1,-42 625 | 623,father,direction,3,-42,2,-42,3,-42,3,-42,2,-42 626 | 624,king,rope,3,1,3,1,2,1,3,1,3,1 627 | 625,door,book,3,1,3,1,3,1,3,1,2,0 628 | 626,town,position,2,-42,2,-42,2,-42,2,-42,2,-42 629 | 627,person,lung,3,1,3,1,3,1,2,1,2,0 630 | 628,sail,ship,3,-1,3,-1,3,-1,3,-1,2,0 631 | 629,abode,place,1,-42,2,0,2,0,2,0,2,0 632 | 630,window,effect,3,-42,2,-42,3,-42,2,-42,2,-42 633 | 631,hill,beach,2,-1,1,-42,1,-42,2,1,2,0 634 | 632,watch,friend,3,-1,3,-1,2,-42,2,1,1,-42 635 | 633,something,book,3,-42,3,-42,3,-42,3,-42,3,-42 636 | 634,something,light,1,-42,3,1,3,1,2,1,3,-1 637 | 635,person,hedge,3,-1,2,-1,3,1,3,1,3,1 638 | 636,person,gully,3,-1,3,-1,2,-1,3,-1,3,1 639 | 637,lady,call,2,-42,2,1,2,-42,2,-42,3,-42 640 | 638,person,way,3,-42,3,-42,2,-42,2,-42,2,-42 641 | 639,hat,hand,3,1,2,-1,2,-1,2,-1,2,-1 642 | 640,back,end,2,-42,2,-42,2,-42,2,-42,2,-42 643 | 641,world,mouth,3,1,3,1,3,1,2,1,2,1 644 | 642,father,doorway,3,-1,2,-1,2,-1,3,-1,3,1 645 | 643,brother,vessel,2,-1,2,-1,1,-42,3,-1,2,-1 646 | 644,knee,hand,2,0,1,-42,3,0,2,1,1,-42 647 | 645,phone,end,3,-42,3,-42,3,-42,3,-42,3,-42 648 | 646,lady,back,2,-42,2,1,2,1,1,-42,1,-42 649 | 647,chair,gate,2,-1,3,-1,2,-1,2,0,2,0 650 | 648,result,something,3,-42,3,-42,3,-42,3,-42,3,-42 651 | 649,knife,floor,3,-1,3,-1,2,-1,2,-1,2,0 652 | 650,person,edition,1,-42,1,-42,2,-42,2,-1,2,-42 653 | 651,sail,way,3,-42,3,-42,3,-42,2,-42,2,-42 654 | 652,factory,room,3,1,3,1,1,-42,1,-42,1,-42 655 | 653,brother,end,3,-42,3,-42,3,-42,3,-42,3,-42 656 | 654,hat,backwards,3,-42,3,-42,3,-42,3,-42,3,-42 657 | 655,lady,piano,3,-1,2,-1,2,-1,3,-1,3,1 658 | 656,town,effect,2,-42,2,-42,2,-42,2,-42,2,-42 659 | 657,bank,way,3,-42,2,-42,3,-42,3,-42,3,-42 660 | 658,king,stream,3,-1,3,-1,3,1,2,1,2,1 661 | 659,coach,road,3,-1,2,-1,3,-1,3,-1,3,1 662 | 660,brother,dinner,3,1,3,1,3,1,2,-42,3,1 663 | 661,seed,knee,3,-1,3,-1,2,-1,2,1,2,-1 664 | 662,person,violin,3,1,3,1,3,1,3,-1,3,1 665 | 663,everything,ear,2,1,2,1,2,-42,2,-42,2,-42 666 | 664,person,call,2,1,2,-42,2,-42,2,1,1,-42 667 | 665,arm,way,3,-42,2,-42,2,-42,2,-42,2,-42 668 | 666,piece,light,3,-42,2,-42,3,-42,2,-42,2,-1 669 | 667,father,place,3,-1,3,-1,3,-1,3,-1,3,1 670 | 668,house,boat,3,1,3,1,3,1,2,0,3,-1 671 | 669,foot,middle,2,-42,2,-42,2,-42,2,-42,2,0 672 | 670,house,something,2,1,2,1,1,-42,2,1,2,-1 673 | 671,person,rain,2,1,2,-42,2,1,2,1,3,-1 674 | 672,place,train,2,-42,2,-42,2,-42,2,-42,2,-42 675 | 673,person,back,3,1,3,1,2,1,2,-1,2,1 676 | 674,victim,meal,3,1,3,1,3,1,3,1,3,1 677 | 675,piece,way,2,-1,2,-42,3,-42,3,-42,2,-42 678 | 676,boy,rope,3,1,3,1,3,1,3,1,3,1 679 | 677,body,train,3,-1,3,-1,3,-1,3,-1,3,-1 680 | 678,lady,chin,3,1,3,1,2,0,2,-1,2,0 681 | 679,body,grass,2,-1,2,1,2,1,2,-1,3,1 682 | 680,brother,breath,2,1,3,1,2,1,3,1,2,-1 683 | 681,room,middle,2,-42,2,-42,2,-42,2,-42,2,-42 684 | 682,child,soul,2,1,3,1,2,1,2,1,1,-42 685 | 683,heaven,horse,2,1,2,1,2,1,2,1,2,-42 686 | 684,foot,arm,3,-1,3,-1,2,0,1,-42,3,0 687 | 685,daughter,lamp,3,1,3,1,3,1,3,-1,3,1 688 | 686,poet,position,2,-42,2,-42,2,-42,2,-1,2,-42 689 | 687,father,call,2,1,2,1,2,1,1,-42,2,-1 690 | 688,sea,mouth,3,1,3,1,3,1,3,-1,3,1 691 | 689,person,bay,3,-1,2,-1,2,-1,2,-1,2,1 692 | 690,servant,mouth,3,1,3,1,3,1,2,0,3,1 693 | 691,everything,piece,3,1,3,1,2,1,1,-42,2,-42 694 | 692,father,boy,3,1,3,1,2,1,2,1,2,1 695 | 693,father,knee,3,1,3,1,3,1,2,-1,2,1 696 | 694,patient,meal,3,1,3,1,2,1,1,-42,3,1 697 | 695,king,dress,2,1,2,1,2,1,2,1,3,1 698 | 696,sun,train,3,1,3,1,3,1,2,1,2,1 699 | 697,house,street,2,-1,3,-1,3,-42,1,-42,3,-42 700 | 698,exile,element,2,1,2,1,1,-42,2,-1,2,1 701 | 699,ice,everything,3,-1,3,-1,2,-1,2,-42,2,-42 702 | 700,room,gate,2,1,2,1,2,-42,2,1,2,-42 703 | 701,wife,bottle,2,1,3,1,3,1,2,-1,3,1 704 | 702,beard,lip,3,1,1,-42,2,-42,1,-42,2,-1 705 | 703,king,wife,3,1,2,1,2,1,2,1,1,-42 706 | 704,door,stream,3,-1,2,-1,1,-42,3,1,3,-1 707 | 705,daughter,bed,3,-1,2,-1,2,-1,3,-1,3,1 708 | 706,victim,doorway,2,-1,2,-42,2,-1,2,-1,2,1 709 | 707,arm,something,2,-42,2,-42,2,-42,2,-42,2,-42 710 | 708,piece,tree,2,-1,2,-1,2,-42,2,-42,2,-42 711 | 709,eye,ball,3,-1,3,-1,3,-1,3,-1,3,-1 712 | 710,horse,air,2,-1,3,1,1,-42,2,1,2,0 713 | 711,foot,bed,3,-1,3,-1,3,-1,3,-1,3,1 714 | 712,boat,way,2,-42,2,-42,2,-42,2,-42,2,-42 715 | 713,master,fist,3,1,3,1,3,1,2,-1,2,0 716 | 714,hat,head,3,-1,3,-1,3,-1,3,-1,1,-42 717 | 715,chin,face,3,-1,3,-1,1,-42,2,1,2,0 718 | 716,lock,throat,3,-1,2,-1,3,1,2,1,2,-1 719 | 717,heaven,rain,2,1,2,1,2,1,1,-42,2,-1 720 | 718,father,position,2,-42,2,-42,2,-42,2,-42,2,-42 721 | 719,person,lover,3,0,3,0,3,0,3,0,2,0 722 | 720,stone,breath,3,1,3,1,3,1,3,1,3,-1 723 | 721,someone,picture,2,1,2,1,2,1,2,-42,2,1 724 | 722,end,position,3,-42,3,-42,3,-42,3,-42,3,-42 725 | 723,ice,vessel,1,-42,1,-42,1,-42,2,1,2,-1 726 | 724,window,object,3,-42,3,-42,2,-42,2,-42,3,-42 727 | 725,house,tree,3,1,3,1,3,1,2,0,2,0 728 | 726,effect,end,3,-42,3,-42,3,-42,3,-42,3,-42 729 | 727,fox,stone,3,1,3,1,3,-1,3,-1,3,1 730 | 728,master,door,2,-42,2,-42,2,-42,2,-42,2,-42 731 | 729,anything,mouth,2,1,1,-42,3,-42,1,-42,3,-42 732 | 730,knee,way,3,-42,3,-42,3,-42,3,-42,3,-42 733 | 731,end,face,2,-42,3,-42,3,-42,3,-42,2,-42 734 | -------------------------------------------------------------------------------- /src/data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for querying ngramdb and managing local cache(s) of query results. 3 | 4 | Author: mbforbes 5 | """ 6 | 7 | # IMPORTS 8 | # ------------------------------------------------------------------------------ 9 | 10 | from __future__ import division 11 | 12 | # builtins 13 | import code # code.interact(local=dict(globals(), **locals())) 14 | from collections import Counter 15 | import cPickle as pickle 16 | import glob 17 | import math 18 | import os 19 | import sys 20 | 21 | # 3rd party 22 | from nltk.corpus import wordnet as wn 23 | from nltk.stem.wordnet import WordNetLemmatizer 24 | from tqdm import tqdm 25 | 26 | # local 27 | from ngramdb import NgramDb 28 | from ngramdb.util import pprint_ngram_list 29 | 30 | 31 | # CONSTANTS 32 | # ------------------------------------------------------------------------------ 33 | 34 | # ngramdb cache info 35 | CACHE_SPREAD_DIR = 'data/ngramdb/queries/' 36 | CACHE_SPREAD_EXT = '.cache' 37 | PMI_CACHE_FN = 'data/ngramdb/pmi/pmi.cache' 38 | 39 | QUIT = 'q' 40 | 41 | # Wordnet stuff 42 | # The following synset names are synsets given by 'abstraction' 43 | ABSTRACT_SS_NAMES = [ 44 | 'abstraction.n.01', 45 | 'abstraction.n.02', 46 | 'abstraction.n.03', 47 | 'abstraction.n.04', 48 | 'abstractedness.n.01', 49 | 'abstraction.n.06', 50 | ] 51 | ABSTRACT_SS = [wn.synset(x) for x in ABSTRACT_SS_NAMES] 52 | 53 | # Verb endings 54 | SUBS = ['_d', '_p', '_dp', '_op', '_od'] 55 | 56 | # For PMI and use in system. 57 | 58 | # We deal with people separately because we assume that all nominal subjects 59 | # refer to the same physical-propertied object (roughly the same size, weight, 60 | # etc.) Here show all nouns that we assume refer to a person. (I only saw 'man' 61 | # but I'm adding more in case others show up.) We'll remove all of these and 62 | # include only HUMAN_NOUN. 63 | PERSON_NOUNS = ['man', 'woman', 'he', 'she', 'I', 'you', 'human', 'person'] 64 | 65 | # The replacement for all PERSON_NOUNS. 66 | HUMAN_NOUN = 'PERSON' 67 | 68 | 69 | # TOP LEVEL FUNCTIONS 70 | # ------------------------------------------------------------------------------ 71 | 72 | def attr_filter(attr, val): 73 | """ 74 | General filter constructor: ensures obj's attr == val. Example attrs: 75 | - 'deprel' 76 | - 'postag' 77 | 78 | Takes: 79 | attr (str) 80 | val (str) 81 | 82 | Returns: 83 | f(obj) -> bool 84 | """ 85 | return lambda o: o.__dict__[attr] == val 86 | 87 | 88 | def passes_filters(token, filters): 89 | """ 90 | Returns whether token passes all filters. 91 | 92 | Takes: 93 | token (Token) 94 | filters ([f(Token) -> bool]) 95 | 96 | """ 97 | for f in filters: 98 | if not f(token): 99 | return False 100 | return True 101 | 102 | 103 | def filter_count_n(ngrams, fs): 104 | """ 105 | Args: 106 | ngrams ([Ngram] (I think)) 107 | fs ([[filter]]): Critical: *LIST* of filter lists. 108 | 109 | Returns: 110 | Counter[tuple(str)] 111 | """ 112 | c = Counter() 113 | for idx, ng in enumerate(ngrams): 114 | # Grab words via filters. 115 | wlists = [] 116 | for f in fs: 117 | w = [x for x in ng if passes_filters(x, f)] 118 | wlists.append(w) 119 | 120 | # Don't add this ngram if any word has multiple matches. 121 | mul = False 122 | for w in wlists: 123 | if len(w) != 1: 124 | mul = True 125 | break 126 | if mul: 127 | continue 128 | 129 | # Flatten the list 130 | ws = [l[0] for l in wlists] 131 | 132 | # Check the positions are increasing. 133 | noninc = False 134 | for i in range(0, len(ws) - 1): 135 | if ws[i].position >= ws[i+1].position: 136 | noninc = True 137 | break 138 | if noninc: 139 | continue 140 | 141 | # Turn into a tuple (to be a key of the Counter). 142 | tup = tuple([w.surface for w in ws]) 143 | c[tup] += ng.freq 144 | return c 145 | 146 | 147 | def is_abstract(noun): 148 | """ 149 | Try to (heuristically) filter abstract nouns. 150 | 151 | Args: 152 | noun (str|unicode) 153 | """ 154 | assert type(noun) in [str, unicode], 'bad noun type: %r. noun: %s' % ( 155 | type(noun), noun) 156 | noun_ss = wn.synsets(noun, pos=wn.NOUN) 157 | 158 | # if wordnet doesn't know about the noun, let it fly 159 | if len(noun_ss) == 0: 160 | return False 161 | 162 | # Checking just the first (most common?) synset for the noun, but checking 163 | # all hypernym paths for that synset, and all paths must be clean of any 164 | # abstraction. 165 | 166 | # previously, looped over all with: 167 | # for n_ss in noun_ss: 168 | # but that proved not restrictive enough 169 | n_ss = noun_ss[0] 170 | paths = n_ss.hypernym_paths() 171 | # all ps must pass 172 | paths_good = True 173 | for p in paths: 174 | for a in ABSTRACT_SS: 175 | if a in p: 176 | paths_good = False 177 | break 178 | if not paths_good: 179 | break 180 | if paths_good: 181 | # debug 182 | # print paths 183 | return False 184 | return True 185 | 186 | 187 | def filter_abstract_from_counter(c): 188 | """ 189 | Try to (heuristically) filter abstract nouns. 190 | 191 | Args: 192 | c (Counter) Frequency counts of nouns 193 | """ 194 | for n in c.keys(): 195 | if is_abstract(n): 196 | del c[n] 197 | 198 | 199 | # CLASSES 200 | # ------------------------------------------------------------------------------ 201 | 202 | class SizeQueryN(object): 203 | """Keys to the n-obj cache""" 204 | 205 | def __init__(self, query, raw_f_list): 206 | """ 207 | Takes: 208 | query (NGramDBQuery) 209 | raw_f_list ([[(str, str)]]) 210 | """ 211 | self.query = query 212 | self.raw_f_list = raw_f_list 213 | 214 | def __eq__(self, other): 215 | return (self.query == other.query and 216 | self.raw_f_list == other.raw_f_list) 217 | 218 | def __hash__(self): 219 | f_hash = hash(tuple([tuple(rf) for rf in self.raw_f_list])) 220 | return hash((hash(self.query), f_hash)) 221 | 222 | 223 | class DBWrapper(object): 224 | def __init__(self, cxn_id, fn_n): 225 | """ 226 | Args: 227 | cxn_id (str) connection ID for the DB 228 | fn_n (str) directory of spread cache n-obj cache 229 | """ 230 | self.db = NgramDb(cxn_id) 231 | self.fn_n = fn_n 232 | self.cache_n = {} 233 | 234 | def load_caches(self): 235 | if os.path.isdir(self.fn_n): 236 | # Turning off for now; should have logging framework. 237 | # print '' 238 | files = glob.glob(self.fn_n + '*' + CACHE_SPREAD_EXT) 239 | # hashes = [os.path.split(f)[1].rstrip(CACHE_SPREAD_EXT) for f in files] 240 | for f in tqdm(files): 241 | k, v = self.load_spreadfile(f) 242 | self.cache_n[k] = v 243 | # Turning off for now; should have logging framework. 244 | # print '' % (len(self.cache_n.keys())) 245 | 246 | def print_cache_stats(self): 247 | """ 248 | Prints aggregated verb stats for cache. 249 | """ 250 | n = 100 251 | c = Counter() 252 | for k, v in self.cache_n.iteritems(): 253 | w = [w for w in k.query.words if w is not None] 254 | w = w[0] 255 | c[w] += sum(v.values()) 256 | print '%d most common verbs:' & (n) 257 | for k,v in c.most_common(n): 258 | print '\t', k, '\t', v 259 | 260 | def write_caches(self): 261 | """ 262 | Writes (all) caches to files. 263 | """ 264 | print '' 265 | self.write_spread(self.cache_n, CACHE_SPREAD_DIR) 266 | 267 | def write_kv(self, fn, k, v): 268 | """ 269 | Writes a single k, v to file name fn. 270 | 271 | Args: 272 | fn (str) 273 | k (Object) 274 | v (Object) 275 | """ 276 | with open(fn, 'w') as f: 277 | pickle.dump(k, f) 278 | pickle.dump(v, f) 279 | 280 | def load_spreadfile(self, fn): 281 | """ 282 | utility to load a single spreadfile and return its contents as k, v 283 | 284 | Args: 285 | fn (str) 286 | """ 287 | with open(fn, 'r') as f: 288 | k = pickle.load(f) 289 | v = pickle.load(f) 290 | return k, v 291 | 292 | def check_cache_files(self, d): 293 | """ 294 | Checks d to find any malformed cache entries. Does not require cache to 295 | be loaded in advance. 296 | 297 | Args: 298 | d (str): Directory. 299 | """ 300 | files = glob.glob(d + '*' + CACHE_SPREAD_EXT) 301 | n_good, n_bad, n_total = 0, 0, 0 302 | for f in files: 303 | good = True 304 | try: 305 | k, v = self.load_spreadfile(f) 306 | if type(k) != SizeQueryN: 307 | good = False 308 | print 'ERRR: cache file key is not type SizeQueryN; is %r (%s)' % (type(k), f) 309 | if type(v) != Counter: 310 | good = False 311 | print 'ERRR: cache file value is not type Counter; is %r (%s)' % (type(v), f) 312 | except: 313 | good = False 314 | print 'ERRR: Problem loading cache file (%s)' % (f) 315 | if good: 316 | n_good += 1 317 | else: 318 | n_bad += 1 319 | n_total += 1 320 | if n_good + n_bad != n_total: 321 | print 'ERRR: cache checking error: good (%d) + bad (%d) != total (%d)' % (n_good, n_bad, n_total) 322 | print '%d/%d/%d good/bad/total' % (n_good, n_bad, n_total) 323 | 324 | def write_spread_item(self, k, v, d): 325 | """ 326 | Writes a single item (k, v) to d. 327 | 328 | Args: 329 | k (SizeQueryN): Cache key. 330 | v (Counter): Cache value. 331 | d (str): Directory. 332 | 333 | Returns: 334 | bool: True if the item was written, False if it was found and 335 | didn't need to be written. 336 | """ 337 | orig = str(hash(k)) 338 | written, done = False, False 339 | postfix, postfix_n = '', 0 340 | while not done: 341 | fn = os.path.join(d, orig) + postfix + CACHE_SPREAD_EXT 342 | 343 | # no collision! just write. 344 | if not os.path.isfile(fn): 345 | self.write_kv(fn, k, v) 346 | written = True 347 | done = True 348 | else: 349 | # existing key, existing value 350 | ek, ev = self.load_spreadfile(fn) 351 | if ek == k and ev == v: 352 | # best case scenario: collision and it's what we want! 353 | # no need to write. 354 | done = True 355 | else: 356 | # a collision AND it's not what we want. try the next 357 | # one. 358 | postfix_n += 1 359 | postfix = '-%d' % (postfix_n) 360 | return written 361 | 362 | def write_spread(self, c, d): 363 | """ 364 | Lazily writes out c across d, only writing what is necessary. 365 | 366 | Args: 367 | c (dict): Cache. 368 | d (str): Directory. 369 | """ 370 | # tracking 371 | n_total, n_written = len(c.keys()), 0 372 | 373 | # write me maybe 374 | for k,v in c.iteritems(): 375 | written = self.write_spread_item(k, v, d) 376 | n_written = n_written + 1 if written else n_written 377 | print '<%d/%d/%d written/skipped/total>' % (n_written, n_total - n_written, n_total) 378 | 379 | def run(self, sq, fetch=False): 380 | """ 381 | Cache-aware query runner. 382 | 383 | If fetch=True, always fetches the result and returns it as the second 384 | value. Otherwise, the second value returned will be None. 385 | 386 | Takes: 387 | sq (SizeQueryN) 388 | 389 | Returns: 390 | Counter[tuple(str)], (result|None) 391 | """ 392 | # Cache getting layer. 393 | cache = self.cache_n 394 | in_cache = sq in cache.keys() 395 | if (not fetch) and in_cache: 396 | # Turning off for now; should have logging framework. 397 | # print '' 398 | return cache[sq], None 399 | if fetch: 400 | print '' 401 | else: 402 | print '' 403 | 404 | # Running layer. 405 | res = self.db.run_query(sq.query) 406 | f_list = [] 407 | for raw_fs in sq.raw_f_list: 408 | fs = [attr_filter(f[0], f[1]) for f in raw_fs] 409 | f_list.append(fs) 410 | count = filter_count_n(res, f_list) 411 | 412 | # Cache writing layer. 413 | if not in_cache: 414 | cache[sq] = count 415 | # In case things go wrong: always write the cache(s) after a 416 | # successful query. We only need to write this one query. 417 | self.write_spread_item(sq, count, self.fn_n) 418 | 419 | # Returning layer. We could just always return res, but this will keep 420 | # memory freer and make exploration more explicit. 421 | if fetch: 422 | return count, res 423 | return count, None 424 | 425 | 426 | class Data(object): 427 | """ 428 | This is the API for how to interact with the data programmatically. 429 | 430 | This class aims to ease the transition of this code base along three axes: 431 | 432 | - (a) interactive --> programmatic 433 | - (b) database-focused --> data-focused 434 | - (c) CLI-focused --> API-focused 435 | """ 436 | 437 | def __init__(self, w=None): 438 | """ 439 | NOTE(mbforbes): Main consideration is whether I want this to be a 440 | lighter-weight init than loading the DB. 441 | 442 | Args: 443 | w (DBWrapper, optional): Default is None, in which case one is 444 | loaded from the cache. If w is provided, it should have the 445 | caches loaded already. 446 | 447 | """ 448 | if w is None: 449 | w = DBWrapper('max data API', CACHE_SPREAD_DIR) 450 | w.load_caches() 451 | self._w = w 452 | 453 | # This is also maybe used below---init once. 454 | self.lmtz = WordNetLemmatizer() 455 | 456 | def get_queries_for_verb(self, verb): 457 | """ 458 | Gets queries, noun indexes, and preposition indexes for a verb. 459 | 460 | Args: 461 | verb (str): Verb to get queries for. 462 | 463 | Returns: 464 | ( 465 | SizeQueryN, -- queries 466 | [[int]], -- corresponding noun indexes 467 | [[int]], -- corresponding preposition indexes 468 | ) 469 | """ 470 | qs = [ 471 | # e.g. I threw the ball 472 | # saving: obj 473 | SizeQueryN( 474 | # Query 475 | self._w.db.create_query( 476 | words=[None, verb, None], 477 | postags=['PRP', 'VBD', 'NN|NNS'], 478 | deprels=['nsubj', None, 'dobj'] 479 | ), 480 | # List of filter lists. 481 | [ 482 | [('deprel', 'nsubj')], 483 | [('deprel', 'dobj')], 484 | ] 485 | ), 486 | # e.g. I walked into the room 487 | # saving: prp, obj 488 | SizeQueryN( 489 | # Query 490 | self._w.db.create_query( 491 | words=[None, verb, None, None], 492 | postags=['PRP', 'VBD', 'IN', 'NN|NNS'], 493 | deprels=['nsubj', None, None, 'pobj'] 494 | ), 495 | # List of filter lists. 496 | [ 497 | [('deprel', 'nsubj')], 498 | [('postag', 'IN')], 499 | [('deprel', 'pobj')], 500 | ] 501 | ), 502 | # e.g. I put it (in / inside / on / under) the cupboard 503 | # e.g. I put it over the cupboard 504 | # saving: obj1, prp, obj 505 | SizeQueryN( 506 | # Query 507 | self._w.db.create_query( 508 | words=[None, verb, None, None, None], 509 | postags=['PRP', 'VBD', 'NN|NNS', 'IN', 'NN|NNS'], 510 | deprels=['nsubj', None, 'dobj', None, 'pobj'] 511 | ), 512 | # List of filter lists. 513 | [ 514 | [('deprel', 'nsubj')], 515 | [('deprel', 'dobj')], 516 | [('postag', 'IN')], 517 | [('deprel', 'pobj')], 518 | ] 519 | ), 520 | # ------------------------------------------------------------------ 521 | # META NOTES: 522 | # The following two are pretty interesting, but they give 523 | # absolute info rather than relative info. (Philosophically, a 524 | # sentence is actually relative to "normal" experiences, but it's 525 | # much harder to figure this reference frame out than with a 526 | # direct comparison.) 527 | # 528 | # Thus, it's probably OK to put these on the back-burner for now. 529 | # ------------------------------------------------------------------ 530 | # # e.g. the plane flew 531 | # # saving: obj 532 | # SizeQueryN( 533 | # # Query 534 | # self._w.db.create_query( 535 | # words=[None, verb], 536 | # postags=['NN|NNS', 'VBD'], 537 | # deprels=['nsubj', 'ROOT'] 538 | # ), 539 | # # List of filter lists. 540 | # [ 541 | # [('deprel', 'nsubj')], 542 | # ] 543 | # ), 544 | # # e.g. the plane flew by 545 | # # saving: obj, prep 546 | # SizeQueryN( 547 | # # Query 548 | # self._w.db.create_query( 549 | # words=[None, verb, None], 550 | # postags=['NN|NNS', 'VBD', 'IN'], 551 | # deprels=['nsubj', 'ROOT', None] 552 | # ), 553 | # # List of filter lists. 554 | # [ 555 | # [('deprel', 'nsubj')], 556 | # [('postag', 'IN')], 557 | # ] 558 | # ), 559 | 560 | # ------------------------------------------------------------------ 561 | # META NOTES: 562 | # The following two are good analogs to the nsubj being PRP; if 563 | # we talk about objects (nouns) doing things to other objects 564 | # (nouns), this should capture that. 565 | # 566 | # So, these, I think, should be kept. 567 | # ------------------------------------------------------------------ 568 | # e.g. the plane flew by the blimp 569 | # saving: obj, prep, obj 570 | SizeQueryN( 571 | # Query 572 | self._w.db.create_query( 573 | words=[None, verb, None, None], 574 | postags=['NN|NNS', 'VBD', 'IN', 'NN|NNS'], 575 | deprels=['nsubj', 'ROOT', None, 'pobj'] 576 | ), 577 | # List of filter lists. 578 | [ 579 | [('deprel', 'nsubj')], 580 | [('postag', 'IN')], 581 | [('deprel', 'pobj')], 582 | ] 583 | ), 584 | # e.g. the boot squashed the bug 585 | # saving: obj, prep, obj 586 | SizeQueryN( 587 | # Query 588 | self._w.db.create_query( 589 | words=[None, verb, None], 590 | postags=['NN|NNS', 'VBD', 'NN|NNS'], 591 | deprels=['nsubj', 'ROOT', 'dobj'] 592 | ), 593 | # List of filter lists. 594 | [ 595 | [('deprel', 'nsubj')], 596 | [('deprel', 'dobj')], 597 | ] 598 | ), 599 | 600 | # ------------------------------------------------------------------ 601 | # META NOTES: 602 | # The following query is interesting, but it's a three-way 603 | # comparison, which will take extra work to integrate. 604 | # 605 | # Thus: back-burner for now. 606 | # ------------------------------------------------------------------ 607 | # # e.g. the man squashed the bug with his shoe 608 | # # saving: obj, prep, obj 609 | # SizeQueryN( 610 | # # Query 611 | # self._w.db.create_query( 612 | # words=[None, verb, None, None, None], 613 | # postags=['NN|NNS', 'VBD', 'NN|NNS', 'IN', 'NN|NNS'], 614 | # deprels=['nsubj', 'ROOT', 'dobj', None, 'pobj'] 615 | # ), 616 | # # List of filter lists. 617 | # [ 618 | # [('deprel', 'nsubj')], 619 | # [('deprel', 'dobj')], 620 | # [('postag', 'IN')], 621 | # [('deprel', 'pobj')], 622 | # ] 623 | # ), 624 | ] 625 | noun_idxes = [ 626 | [1], 627 | [2], 628 | [1, 3], 629 | # [0], 630 | # [0], 631 | [0, 2], 632 | [0, 1], 633 | # [0, 1, 3], 634 | ] 635 | prep_idxes = [ 636 | [], 637 | [1], 638 | [2], 639 | # [], 640 | # [1], 641 | [1], 642 | [], 643 | # [2], 644 | ] 645 | return qs, noun_idxes, prep_idxes 646 | 647 | def get_freq_nouns(self, v, s, p, cutoff=1000): 648 | """ 649 | Get frequent nouns for v_sub occurring at or above cutoff. 650 | 651 | Args: 652 | v (str): Verb 653 | s (str): Sub ('_d', '_p', etc.) 654 | p (str|None): Preposition (or None if sub doesn't use one) 655 | cutoff (int): Frequency cutoff below which nouns will not be 656 | returned. 657 | 658 | Returns: 659 | [str] | [(str, str)] 660 | """ 661 | c, n_idxes = self._get_cache_res_prep(v, s, p) 662 | fc = Counter() 663 | for surface, count in c.iteritems(): 664 | il = list(surface) 665 | # NOTE: Assuming at most 2 nouns per query. 666 | o = il[n_idxes[0]] if len(n_idxes) == 1 else (il[n_idxes[0]], il[n_idxes[1]]) 667 | fc[o] += count 668 | res = [] 669 | for o, freq in fc.most_common(): 670 | if freq < cutoff: 671 | break 672 | res += [o] 673 | return res 674 | 675 | def get_prep_freqs_agg(self, v): 676 | """ 677 | Gets the counter of prepositions for v, aggregating across all of its 678 | subs (and their query results). 679 | 680 | Args: 681 | v (str) verb 682 | 683 | Returns: 684 | Counter 685 | """ 686 | # gotta iterate over the various subs 687 | 688 | cs, _, p_idxes = self._get_cache_res_verb(v) 689 | # aggregating over different subs 690 | ac = Counter() 691 | for i, c in enumerate(cs): 692 | # Only aggregate prepositions if we saved any 693 | ps = p_idxes[i] 694 | if len(ps) == 0: 695 | continue 696 | # Consider most general case that we could have n prepositions 697 | # saved. 698 | for p in ps: 699 | for item, count in c.iteritems(): 700 | prep = list(item)[p] 701 | ac[prep] += count 702 | return ac 703 | 704 | def get_prep_freqs(self, v): 705 | """ 706 | Gets a counter of prepositions for v for each of its subs. 707 | 708 | Args: 709 | v (str) verb 710 | sub (str) one of ['p', 'dp', 'op'] 711 | 712 | Returns: 713 | {str -> Counter(str)} --- {sub -> Counter(prep)} 714 | """ 715 | cs, _, p_idxes = self._get_cache_res_verb(v) 716 | res = {} 717 | for i, c in enumerate(cs): 718 | # Only count prepositions any exist in this particular query 719 | ps = p_idxes[i] 720 | if len(ps) == 0: 721 | continue 722 | 723 | sub = SUBS[i] 724 | res[sub] = Counter() 725 | # Consider most general case that we could have n prepositions 726 | # saved. 727 | for p in ps: 728 | for item, count in c.iteritems(): 729 | prep = list(item)[p] 730 | res[sub][prep] += count 731 | return res 732 | 733 | def get_top_nouns(self, v, s, p, filter_abstract, lemmatize): 734 | """ 735 | Gets the counter for nouns in v, s, p. 736 | 737 | Args: 738 | v (str): Verb 739 | s (str): Sub 740 | p (str): Prep 741 | filter_abstract (bool): Whether to filter abstract nouns out of the 742 | returned list. 743 | lemmatize (bool): Whether to compress nouns into their lemmatized 744 | form before returning. 745 | Returns: 746 | Counter[str] 747 | """ 748 | c, n_idxes = self._get_cache_res_prep(v, s, p) 749 | 750 | # aggregating 751 | ac = Counter() 752 | for i in n_idxes: 753 | for surface, count in c.iteritems(): 754 | noun = list(surface)[i] 755 | ac[noun] += count 756 | 757 | # debugging 758 | # print '... ' 759 | # code.interact(local=dict(globals(), **locals())) 760 | 761 | # Maybe filter 762 | if filter_abstract: 763 | filter_abstract_from_counter(ac) 764 | 765 | # debugging 766 | # print '... ' 767 | # code.interact(local=dict(globals(), **locals())) 768 | 769 | # Maybe lemmatize 770 | if lemmatize: 771 | self.compress_lemmas_in_counter(ac) 772 | 773 | return ac 774 | 775 | def compress_lemmas_in_counter(self, c): 776 | """ 777 | Compresses forms to their lemma in (keys of) c (by adding counts). 778 | 779 | Args: 780 | c (Counter) 781 | 782 | Modifies c in-place. 783 | """ 784 | for k in c.keys(): 785 | l = self.lmtz.lemmatize(k) 786 | if l != k: 787 | c[l] += c[k] 788 | del c[k] 789 | 790 | def get_verb_freq(self, v): 791 | """ 792 | Get frequency statistics for verb. 793 | 794 | Args: 795 | v (str): Verbn 796 | 797 | Returns: 798 | int: sum occurrences of verb in currently active queries. 799 | """ 800 | cs, _, _ = self._get_cache_res_verb(v) 801 | total = 0 802 | for c in cs: 803 | total += sum(c.values()) 804 | return total 805 | 806 | def _get_cache_res_verb(self, v): 807 | """ 808 | Gets verb cache result for all subs. 809 | 810 | Args: 811 | v (str): Verb 812 | 813 | Returns (each list in the returned tuple is of length `len(SUBS)`: 814 | ( 815 | [Counter], -- results for each sub 816 | [[int]], -- noun indexes for each sub 817 | [[int]], -- preposition indexes for each sub 818 | ) 819 | """ 820 | qs, noun_idxes, prep_idxes = self.get_queries_for_verb(v) 821 | cs = [] 822 | for q in qs: 823 | c, _ = self._w.run(q) 824 | cs += [c] 825 | return cs, noun_idxes, prep_idxes 826 | 827 | def _get_cache_res_sub(self, v, s): 828 | """ 829 | Returns the cached result for v_sub along with the indexes of the nouns. 830 | Helper function for querying APIs. 831 | 832 | Args: 833 | v (str): Verb 834 | s (str): Sub 835 | 836 | Returns: 837 | Counter[tuple(str)] 838 | [int]: Noun indexes 839 | [int]: Prep indexes 840 | """ 841 | # TODO(mbforbes): Should consolidate w/ constant in system.py. Then 842 | # again, much code (e.g. query code) assumes just these exist... 843 | 844 | # Figure out what we're looking for 845 | idx = SUBS.index(s) 846 | 847 | # Get all data 848 | qs, noun_idxes, prep_idxes = self.get_queries_for_verb(v) 849 | 850 | # Return the piece we want from each 851 | c, _ = self._w.run(qs[idx]) 852 | n = noun_idxes[idx] 853 | p = prep_idxes[idx] 854 | return c, n, p 855 | 856 | def _get_cache_res_prep(self, v, s, p): 857 | """ 858 | Args: 859 | v (str): Verb 860 | s (str): Sub 861 | p (str): Prep 862 | 863 | Returns: 864 | Counter[tuple(str)] 865 | [int]: Noun indexes 866 | """ 867 | c, nidxes, pidxes = self._get_cache_res_sub(v, s) 868 | 869 | # If we're looking at a sub without a prep, we can just return directly. 870 | if p is None and len(pidxes) > 0 or p is not None and len(pidxes) == 0: 871 | assert False, 'verb %s sub %s has prep mismatch: prep %r, idxes %r' % (v, s, p, pidxes) 872 | if p is None and len(pidxes) == 0: 873 | return c, nidxes 874 | 875 | # Else, we select only results which match the preposition. 876 | res = Counter() 877 | # NOTE: Assuming that there's at most one preposition per query as 878 | # that's how things are currently structured. Change the argument above 879 | # to be a [str] (and Turk new data to match) if you want to allow this. 880 | for surface, cnt in c.iteritems(): 881 | pidx = pidxes[0] 882 | if list(surface)[pidx] != p: 883 | continue 884 | res[surface] += cnt 885 | return res, nidxes 886 | 887 | 888 | class PMI(object): 889 | 890 | def __init__(self): 891 | """ 892 | Must have run PMI.compute() first. 893 | """ 894 | with open(PMI_CACHE_FN, 'r') as f: 895 | self.frame_counter = pickle.load(f) 896 | self.frame_total = pickle.load(f) 897 | self.nounpair_counter = pickle.load(f) 898 | self.nounpair_total = pickle.load(f) 899 | self.joint_counter = pickle.load(f) 900 | self.joint_total = pickle.load(f) 901 | 902 | def query(self, frame, nounpair): 903 | """ 904 | For the given frame (verb_sub[_prep]) and nounpair (noun1, noun2), 905 | determines the PMI score between them (according to the ngramdb data) 906 | and returns it. 907 | 908 | Args: 909 | frame (string) verb_sub[_prep] 910 | nounpair (string, string) (noun1, noun2) 911 | 912 | Return: 913 | float: The PMI between 914 | """ 915 | res = self._get_pmi(frame, nounpair) 916 | # if HUMAN_NOUN in nounpair and res >= 0: 917 | # print 'GOT HUMAN PMI:', frame, nounpair, res 918 | return res 919 | 920 | def _get_pmi(self, frame, nounpair): 921 | """ 922 | Refactoring query() to allow for multiple trials if desired. See doc 923 | there. 924 | 925 | Args: 926 | frame (string) 927 | nounpair (string, string) 928 | 929 | Returns: 930 | float 931 | """ 932 | joint = (frame, nounpair) 933 | # log(joint / (x*y)) = log(joint) - log(x*y) = log(joint) - log(x) - log(y) 934 | # 935 | # for any prob p_x, p_x = count(x) / total(x) 936 | # 937 | # log(p_x) = log(count(x) / total(x)) = log(count(x)) - log(total(x)) 938 | 939 | # sanity checking 940 | if self.joint_counter[joint] == 0 or self.frame_counter[frame] == 0 or \ 941 | self.nounpair_counter[nounpair] == 0: 942 | return float('-inf') 943 | 944 | lj = math.log(self.joint_counter[joint]) - math.log(self.joint_total) 945 | lf = math.log(self.frame_counter[frame]) - math.log(self.frame_total) 946 | lnp = math.log(self.nounpair_counter[nounpair]) - math.log(self.nounpair_total) 947 | return lj - lf - lnp 948 | 949 | @staticmethod 950 | def compute(): 951 | """ 952 | Computing PMI and saving 953 | """ 954 | # get caches 955 | w = DBWrapper('max print verb info', CACHE_SPREAD_DIR) 956 | w.load_caches() 957 | api = Data(w) 958 | 959 | # load verbs 960 | print '[pmi] loading verbs...' 961 | basedir = 'data/turk/hardcore/' 962 | # NOTE: This is fine because this isn't the corpus we are training / 963 | # testing on --- these are just the verbs that we might encounter, so 964 | # we're precomputing PMI for everything we might want and caching it. 965 | # (i.e. not actually touching test data here). 966 | fnames = ['train.txt', 'dev.txt', 'test.txt'] 967 | verbs = [] 968 | for fname in fnames: 969 | with open(basedir + fname, 'r') as f: 970 | verbs += [v.strip() for v in f.readlines()] 971 | 972 | # there are our variables for aggregating pmi counts 973 | frame_counter = Counter() 974 | nounpair_counter = Counter() 975 | joint_counter = Counter() 976 | 977 | print '[pmi] counting query results...' 978 | for verb in tqdm(verbs): 979 | counters, noun_idxes_lst, prep_idxes_lst = api._get_cache_res_verb(verb) 980 | assert len(SUBS) == len(counters), 'Should get 1 set of results back for each frame type (sub)' 981 | for i in range(len(SUBS)): 982 | counter = counters[i] 983 | noun_idxes = noun_idxes_lst[i] 984 | prep_idxes = prep_idxes_lst[i] 985 | 986 | for surface_forms, freq in counter.iteritems(): 987 | # Compute frame 988 | frame = verb + SUBS[i] 989 | if len(prep_idxes) > 0: 990 | # NOTE: assuming at most 1 prep for now 991 | frame += '_' + surface_forms[prep_idxes[0]] 992 | 993 | # Compute noun pair 994 | # NOTE: Assuming 1 or 2 nouns 995 | if len(noun_idxes) == 1: 996 | nouns = [HUMAN_NOUN, surface_forms[noun_idxes[0]]] 997 | else: 998 | nouns = [surface_forms[noun_idxes[0]], surface_forms[noun_idxes[1]]] 999 | 1000 | for j in range(2): 1001 | if nouns[j] in PERSON_NOUNS: 1002 | nouns[j] = HUMAN_NOUN 1003 | 1004 | nounpair = tuple(nouns) 1005 | 1006 | frame_counter[frame] += freq 1007 | nounpair_counter[nounpair] += freq 1008 | joint_counter[(frame, nounpair)] += freq 1009 | 1010 | # easy to get actual probs so why not 1011 | frame_total = sum(frame_counter.values()) 1012 | nounpair_total = sum(nounpair_counter.values()) 1013 | joint_total = sum(joint_counter.values()) 1014 | 1015 | with open(PMI_CACHE_FN, 'w') as f: 1016 | pickle.dump(frame_counter, f) 1017 | pickle.dump(frame_total, f) 1018 | pickle.dump(nounpair_counter, f) 1019 | pickle.dump(nounpair_total, f) 1020 | pickle.dump(joint_counter, f) 1021 | pickle.dump(joint_total, f) 1022 | 1023 | 1024 | # TOP-LEVEL COMMAND-LINE FUNCS 1025 | # ------------------------------------------------------------------------------ 1026 | 1027 | def explore(): 1028 | w = DBWrapper('max explore', CACHE_SPREAD_DIR) 1029 | w.load_caches() 1030 | api = Data(w) 1031 | code.interact(local=dict(globals(), **locals())) 1032 | 1033 | 1034 | def run_for_verb(verb, w=None): 1035 | """ 1036 | Run queries for a verb. 1037 | 1038 | Args: 1039 | verb (str): Verb to run queries for. 1040 | w (DBWrapper, optional): Default is None, in which case one is loaded 1041 | from the cache. If w is provided, it should have the caches loaded 1042 | already. 1043 | """ 1044 | print '' % (verb) 1045 | if w is None: 1046 | w = DBWrapper('max verb run', CACHE_SPREAD_DIR) 1047 | w.load_caches() 1048 | api = Data(w) 1049 | qs, _, _ = api.get_queries_for_verb(verb) 1050 | for q in qs: 1051 | w.run(q) 1052 | 1053 | 1054 | def run_for_file(fname): 1055 | """ 1056 | Run queries for verbs in a file. 1057 | 1058 | Args: 1059 | fname (str) 1060 | """ 1061 | # Prep the DB. 1062 | w = DBWrapper('max file run', CACHE_SPREAD_DIR) 1063 | w.load_caches() 1064 | 1065 | # Read the verbs. 1066 | with open(fname) as f: 1067 | lines = f.readlines() 1068 | verbs = [line.strip() for line in lines] 1069 | 1070 | # Run the queries. 1071 | for v in verbs: 1072 | run_for_verb(v, w) 1073 | 1074 | 1075 | def ping(): 1076 | """ 1077 | Really just want to see if the databse is actually up. 1078 | """ 1079 | w = DBWrapper('max ping', CACHE_SPREAD_DIR) 1080 | w.load_caches() 1081 | res = w.db.create_and_run_query( 1082 | words=['cat', None, 'dog'], 1083 | postags=['NN', 'VBD', 'NN'], 1084 | deprels=['nsubj', None, 'dobj'], 1085 | ) 1086 | pprint_ngram_list(res[:10]) 1087 | 1088 | 1089 | def check_cache(): 1090 | """ 1091 | Checks the cache files on disk. 1092 | """ 1093 | w = DBWrapper('max check cache', CACHE_SPREAD_DIR) 1094 | w.check_cache_files(CACHE_SPREAD_DIR) 1095 | 1096 | 1097 | def print_cache_stats(): 1098 | """ 1099 | Prints (verb) stats of cache. 1100 | """ 1101 | w = DBWrapper('max print cache', CACHE_SPREAD_DIR) 1102 | w.load_caches() 1103 | w.print_cache_stats() 1104 | 1105 | 1106 | def interact_verb_info(): 1107 | """ 1108 | Prints info about verbs. 1109 | 1110 | Specifically, prints cached verb data in sets that would be partitioned 1111 | into nodes in the factor graph. 1112 | """ 1113 | w = DBWrapper('max interact verb info', CACHE_SPREAD_DIR) 1114 | w.load_caches() 1115 | api = Data(w) 1116 | 1117 | while True: 1118 | verb = raw_input('enter a verb (%s to quit): ' % (QUIT)) 1119 | if verb == QUIT: 1120 | break 1121 | _print_single_verb_info(w, api, verb) 1122 | 1123 | 1124 | def print_top_preps_interact(): 1125 | """ 1126 | Pretty much this yeah. 1127 | """ 1128 | w = DBWrapper('max interact verb preps', CACHE_SPREAD_DIR) 1129 | w.load_caches() 1130 | api = Data(w) 1131 | 1132 | while True: 1133 | verb = raw_input('enter a verb (%s to quit): ' % (QUIT)) 1134 | if verb == QUIT: 1135 | break 1136 | preps = api.get_prep_freqs_agg(verb) 1137 | for p, count in preps.most_common(20): 1138 | print '%d\t%s' % (count, p) 1139 | 1140 | def print_verb_info(verbs): 1141 | """ 1142 | Prints info about verbs. 1143 | 1144 | Args: 1145 | verbs ([str]) 1146 | """ 1147 | w = DBWrapper('max print verb info', CACHE_SPREAD_DIR) 1148 | w.load_caches() 1149 | api = Data(w) 1150 | for verb in verbs: 1151 | _print_single_verb_info(w, api, verb) 1152 | 1153 | 1154 | def _print_single_verb_info(w, api, verb): 1155 | """ 1156 | Args: 1157 | w (DBWrapper) 1158 | api (API) 1159 | verb (str) 1160 | """ 1161 | qs, _, _ = api.get_queries_for_verb(verb) 1162 | descs = [ 1163 | '(1) PRP %s dobj' % (verb), 1164 | '(2) PRP %s IN pobj' % (verb), 1165 | '(3) PRP %s dobj IN pobj' % (verb), 1166 | # '(4) NN(S) %s' % (verb), 1167 | # '(5) NN(S) %s IN' % (verb), 1168 | '(6) NN(S) %s IN pobj' % (verb), 1169 | '(7) NN(S) %s dobj' % (verb), 1170 | # '(8) NN(S) %s dobj IN pobj' % (verb), 1171 | ] 1172 | # These index the noun positions in the queries. They are defined assuming 1173 | # the PRPs---where applicable---have already been stripped off. Because of 1174 | # this, we don't use the noun indexes returned from the 1175 | # get_*queries_for_verb functions. 1176 | idxes = [ 1177 | [0], 1178 | [1], 1179 | [0,2], 1180 | # [0], 1181 | # [0], 1182 | [0,2], 1183 | [0,1], 1184 | # [0,2,4], 1185 | ] 1186 | 1187 | # Sanity check 1188 | if len(qs) != len(descs) or len(qs) != len(idxes): 1189 | print 'ERRR: Code out-of-date: qs, descs, idxes must match query #s.' 1190 | return 1191 | 1192 | for i, q in enumerate(qs): 1193 | noun_idxes = idxes[i] 1194 | c, _ = w.run(q) 1195 | print descs[i] 1196 | 1197 | # maybe compress PRPs 1198 | compress = True # Change to false to not compress. 1199 | if compress and i < 3: 1200 | # compress PRP 1201 | cp = Counter() 1202 | for item, count in c.iteritems(): 1203 | minus_prp = tuple(list(item)[1:]) 1204 | cp[minus_prp] += count 1205 | else: 1206 | # no PRP; can't compress (or just disabled) 1207 | cp = c 1208 | 1209 | # NOTE: In other functions, abstract nouns may be filtered. May want to 1210 | # enable here. 1211 | # filter_abstract_from_counter(cp, noun_idxes) 1212 | for r, f in cp.most_common(20): 1213 | print '\t %s\t %d' % (r, f) 1214 | 1215 | 1216 | def compute_pmi(): 1217 | PMI.compute() 1218 | 1219 | 1220 | def query_pmi(): 1221 | pmi = PMI() 1222 | print 'Entering interactive python shell' 1223 | print 'Query pmi with `pmi.query(frame, nounpair)`' 1224 | print 'Example:' 1225 | print ">>> pmi.query('looked_op_as', ('children', 'friend'))" 1226 | print pmi.query('looked_op_as', ('children', 'friend')) 1227 | code.interact(local=dict(globals(), **locals())) 1228 | 1229 | 1230 | def main(): 1231 | """ 1232 | NOTE: To any reader of this code, apologizes for the hacked-together 1233 | command line parsing. I should have just used `argparse`. If you're seeing 1234 | this and care, open an issue and send a PR and we'll make this better :-) 1235 | """ 1236 | # sanity checking 1237 | if len(sys.argv) < 2: 1238 | print 'USAGE: python data.py --command [args]' 1239 | print 'Possible commands:' 1240 | print '\t', '--explore \t\t run iterative exploration' 1241 | print '\t', '--ping \t\t\t ping the Myria DB' 1242 | print '\t', '--check-cache \t\t check local cache for soundness' 1243 | print '\t', '--print-cache-stats \t print (verb) cache stats' 1244 | print '\t', '--interact-verb-info \t print cached verb info (interact)' 1245 | print '\t', '--query-pmi \t\t querys precomputed PMI results' 1246 | print '\t', '--compute-pmi \t\t computes PMI over ngramdb w/ turked verbs' 1247 | print '\t', '--print-verb-info []\t print cached verb info (verb(s) provided)' 1248 | print '\t', '--print-top-preps-interact\t print cached info on top preps for verbs (interact)' 1249 | print '\t', '--verb \t\t run query and cache results for ' 1250 | print '\t', '--file \t\t run queries for all words in ' 1251 | return 1 1252 | 1253 | if sys.argv[1] == '--explore': 1254 | explore() 1255 | elif sys.argv[1] == '--ping': 1256 | ping() 1257 | elif sys.argv[1] == '--check-cache': 1258 | check_cache() 1259 | elif sys.argv[1] == '--print-cache-stats': 1260 | print_cache_stats() 1261 | elif sys.argv[1] == '--interact-verb-info': 1262 | interact_verb_info() 1263 | elif sys.argv[1] == '--compute-pmi': 1264 | compute_pmi() 1265 | elif sys.argv[1] == '--query-pmi': 1266 | query_pmi() 1267 | elif sys.argv[1] == '--print-top-preps-interact': 1268 | print_top_preps_interact() 1269 | elif sys.argv[1] == '--print-verb-info': 1270 | if len(sys.argv) < 3: 1271 | print 'ERRR: Command "--print-verb-info" requires at least one verb' 1272 | return 1 1273 | print_verb_info(sys.argv[2:]) 1274 | elif sys.argv[1] == '--verb': 1275 | if len(sys.argv) < 3: 1276 | print 'ERRR: Command "--verb" requires a verb' 1277 | return 1 1278 | run_for_verb(sys.argv[2]) 1279 | elif sys.argv[1] == '--file': 1280 | if len(sys.argv) < 3: 1281 | print 'ERRR: Command "--file" requires a filename' 1282 | return 1 1283 | run_for_file(sys.argv[2]) 1284 | else: 1285 | print 'ERRR: Command "%s" unrecognized' % (sys.argv[1]) 1286 | return 1 1287 | 1288 | return 0 1289 | 1290 | 1291 | if __name__ == '__main__': 1292 | sys.exit(main()) 1293 | --------------------------------------------------------------------------------