├── images
    └── bob_dylan.png
├── LICENSE
├── mapper.py
├── stats.py
├── README.md
├── constant.py
├── requirements.txt
├── .gitignore
├── extract.py
├── corpus
    ├── adverbs.txt
    └── english-adjectives.txt
├── process.py
└── utils.py


/images/bob_dylan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nidharap/language-models-are-knowledge-graphs-pytorch/HEAD/images/bob_dylan.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Ray Tam
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/mapper.py:
--------------------------------------------------------------------------------
 1 | from constant import invalid_relations_set
 2 | 
 3 | from REL.db.generic import GenericLookup
 4 | sqlite_path = "../../Documents/wiki_2020/generated"
 5 | emb = GenericLookup("entity_word_embedding", save_dir=sqlite_path, table_name="embeddings")
 6 | 
 7 | 
 8 | 
 9 | def Map(head, relations, tail, top_first=True, best_scores = True):
10 |     if head == None or tail == None or relations == None:
11 |         return {}
12 |     head_p_e_m = emb.wiki(str(head), 'wiki')
13 |     if head_p_e_m is None:
14 |         return {}
15 |     tail_p_e_m = emb.wiki(str(tail), 'wiki')
16 |     if tail_p_e_m is None:
17 |         return {}
18 |     tail_p_e_m = tail_p_e_m[0]
19 |     head_p_e_m = head_p_e_m[0]
20 |     valid_relations = [ r for r in relations if r not in invalid_relations_set and r.isalpha() and len(r) > 1 ]
21 |     if len(valid_relations) == 0:
22 |         return {}
23 | 
24 |     return { 'h': head_p_e_m[0], 't': tail_p_e_m[0], 'r': '_'.join(valid_relations)  }
25 | 
26 | def deduplication(triplets):
27 |     unique_pairs = []
28 |     pair_confidence = []
29 |     for t in triplets:
30 |         key = '{}\t{}\t{}'.format(t['h'], t['r'], t['t'])
31 |         conf = t['c']
32 |         if key not in unique_pairs:
33 |             unique_pairs.append(key)
34 |             pair_confidence.append(conf)
35 |     
36 |     unique_triplets = []
37 |     for idx, unique_pair in enumerate(unique_pairs):
38 |         h, r, t = unique_pair.split('\t')
39 |         unique_triplets.append({ 'h': h, 'r': r, 't': t , 'c': pair_confidence[idx]})
40 | 
41 |     return unique_triplets
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     emb = GenericLookup("entity_word_embedding", save_dir=sqlite_path, table_name="embeddings")
46 |     p_e_m = emb.wiki("Bob", 'wiki')[:10]
47 |     print(p_e_m)
48 | 


--------------------------------------------------------------------------------
/stats.py:
--------------------------------------------------------------------------------
 1 | from utils import compress_attention, create_mapping, BFS, build_graph, is_word
 2 | from multiprocessing import Pool
 3 | import spacy
 4 | import en_core_web_sm
 5 | import torch
 6 | from transformers import AutoTokenizer, BertModel
 7 | 
 8 | nlp = en_core_web_sm.load()
 9 | 
10 | if __name__ == '__main__':
11 |     import json
12 |     from tqdm import tqdm
13 | 
14 |     target_file = [
15 |         '../../Documents/KGERT-v2/datasets/squad_v1.1/wiki_dev_2020-18.json',
16 |         '../../Documents/KGERT-v2/datasets/squad_v1/dev-v1.1.json',
17 |         '../../Documents/KGERT-v2/datasets/squad_v1.1/train-v1.1.json',
18 |     ]
19 | 
20 |     with open('stats.txt', 'a') as g:
21 |         for target_file in target_file:
22 |             with open(target_file, 'r') as f:
23 |                 dataset = json.load(f)
24 |             print(target_file)
25 | 
26 |             sentence_cnt = 0
27 |             word_cnt = 0
28 |             for data in tqdm(dataset['data'], dynamic_ncols=True):
29 |                 for para in data['paragraphs']:
30 |                     context = para['context']
31 |                     doc = nlp(context)
32 |                     sentence_cnt +=  len(list(doc.sents))
33 |                     word_cnt += len(list(doc))
34 | 
35 |                     for question in para['qas']:
36 |                         question = question['question']
37 |                         doc = nlp(question)
38 |                         sentence_cnt +=  len(list(doc.sents))
39 |                         word_cnt += len(list(doc))
40 | 
41 |             print('sentence : %d' % sentence_cnt)
42 |             print('word     : %d' % word_cnt)
43 |         
44 |             g.write(target_file+'\n')
45 |             g.write('sentence : %d\n' % sentence_cnt)
46 |             g.write('word     : %d\n' % word_cnt)
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # language-models-are-knowledge-graphs-pytorch
 2 | Language models are open knowledge graphs ( work in progress )
 3 | 
 4 | A non official reimplementation of [Language models are open knowledge graphs](https://arxiv.org/abs/2010.11967)
 5 | 
 6 | The implemtation of Match is in process.py
 7 | 
 8 | ![example bob dylan](https://raw.githubusercontent.com/theblackcat102/language-models-are-knowledge-graphs-pytorch/main/images/bob_dylan.png)
 9 | 
10 | ### Execute MAMA(Match and Map) section
11 | 
12 | Do note the extracted results is still quite noisy and should then filtered based on relation unique pair frequency 
13 | 
14 | ```
15 | python extract.py examples/bob_dylan.txt bert-large-cased-bob_dynlan.jsonl --language_model bert-large-cased --use_cuda true
16 | ```
17 | 
18 | ## Map
19 | 
20 | 1. Entity linking
21 | 
22 | The original download link for Stanford Entity linking is removed (nlp.stanford.edu/pubs/crosswikis-data.tar.bz2)[nlp.stanford.edu/pubs/crosswikis-data.tar.bz2]. I will use (REL)[https://github.com/informagi/REL] for entity disambiguation model (supervised instead of the original unsupervied) to achieve the same task.
23 | 
24 | 2. Relations linking (page 5, 2.2.1)
25 | 
26 | Lemmatization is done in the previous steps [process.py](), in this stage we remove inflection, auxiliary verbs, adjectives, adverbs words.
27 | 
28 | Adjectives extracted from here: [https://gist.github.com/hugsy/8910dc78d208e40de42deb29e62df913](https://gist.github.com/hugsy/8910dc78d208e40de42deb29e62df913)
29 | 
30 | 
31 | Adverbs extracted from here : [https://raw.githubusercontent.com/janester/mad_libs/master/List%20of%20Adverbs.txt](https://raw.githubusercontent.com/janester/mad_libs/master/List%20of%20Adverbs.txt)
32 | 
33 | 
34 | ### Environment setup
35 | 
36 | 
37 | This repo is run using virtualenv 
38 | 
39 | ```
40 | virtualenv -p python3 env
41 | source env/bin/activate
42 | pip install -r requirements.txt
43 | ```
44 | 
45 | 


--------------------------------------------------------------------------------
/constant.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | found_invalid = [
 4 |     'and', 'of', 'in', 'to', ',', 'for', 'be', 'by', 'with', 'on', 'as', 'that', 'from', 'be', ')', '(', 'which',
 5 |     'at', 'be', 'be', 'be', ';', 'or', 'but', 'have', 'have', 'the', 'have', 'not', 'after', '"', 'include', 'also',
 6 |     'be', 'into', 'between', 'such', ':', 'do', 'while', 'when', 'during', 'would', 'over', 'since', '2019', 
 7 |     'well', 'than', '2020', 'under', 'where', 'one', 'be', 'hold', '2018', 'can', 'through', '-', 
 8 |     'make',  'out', 'there', 'know', 'due', 'a', 'take', 'up', 'begin', 'before', 'about',
 9 |     "'",  '4', '10', '3', '11', '&', '$', '12',  '2015', '2008','–', 'will',
10 |     'so', 'do', 'follow', 'most', 'although', 'cause', 'only', '—',  '2007',  '2014', 'mostly', '5', 'say', '2017', '20', 
11 |     '2009',
12 | ]
13 | 
14 | invalid_relations = [
15 |     'and', 'but', 'or', 'so', 'because', 'when', 'before', 'although', # conjunction
16 |     'oh', 'wow', 'ouch', 'ah', 'oops',
17 |     'what', 'how', 'where', 'when', 'who', 'whom',
18 |     'a', 'and', 'the', 'there', 
19 |     'them', 'he', 'she', 'him', 'her', 'it', # pronoun
20 |     'ten', 'hundred', 'thousand', 'million', 'billion',# unit
21 |     'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',# number
22 |     'year', 'month', 'day', 'daily',
23 | ] + found_invalid
24 | 
25 | 
26 | 
27 | 
28 | auxiliaries = [
29 |     'be', 'can', 'have', 'dare', 'may', 'will', 'would', 'should', 
30 |     'need', 'ought', 'shall', 'might', 'do', 'does', 'did',
31 |     'be able to', 'had better','have to','need to','ought to','used to',
32 | ]
33 | 
34 | with open('corpus/english-adjectives.txt', 'r') as f:
35 |     adjectives = [ line.strip().lower() for line in f]
36 | 
37 | with open('corpus/adverbs.txt', 'r') as f:
38 |     adverbs = [ line.strip().lower() for line in f]
39 | 
40 | # with open('corpus/Wordlist-Verbs-All.txt', 'r') as f:
41 | #     verbs = [ line.strip().lower() for line in f]
42 | 
43 | invalid_relations += adjectives
44 | invalid_relations += adverbs
45 | # invalid_relations += verbs
46 | 
47 | invalid_relations_set = set(invalid_relations)
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | argon2-cffi==20.1.0
 2 | async-generator==1.10
 3 | attrs==20.3.0
 4 | backcall==0.2.0
 5 | bleach==3.2.1
 6 | blis==0.4.1
 7 | catalogue==1.0.0
 8 | certifi==2020.11.8
 9 | cffi==1.14.3
10 | chardet==3.0.4
11 | click==7.1.2
12 | cycler==0.10.0
13 | cymem==2.0.4
14 | dataclasses==0.6
15 | decorator==4.4.2
16 | defusedxml==0.6.0
17 | en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.3.1/en_core_web_lg-2.3.1.tar.gz
18 | en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz
19 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
20 | entrypoints==0.3
21 | filelock==3.0.12
22 | idna==2.10
23 | importlib-metadata==2.0.0
24 | ipykernel==5.3.4
25 | ipython==7.16.1
26 | ipython-genutils==0.2.0
27 | ipywidgets==7.5.1
28 | jedi==0.17.2
29 | Jinja2==2.11.2
30 | joblib==0.17.0
31 | jsonschema==3.2.0
32 | jupyter==1.0.0
33 | jupyter-client==6.1.7
34 | jupyter-console==6.2.0
35 | jupyter-core==4.6.3
36 | jupyterlab-pygments==0.1.2
37 | kiwisolver==1.3.1
38 | MarkupSafe==1.1.1
39 | matplotlib==3.3.3
40 | mistune==0.8.4
41 | murmurhash==1.0.4
42 | nbclient==0.5.1
43 | nbconvert==6.0.7
44 | nbformat==5.0.8
45 | nest-asyncio==1.4.3
46 | nltk==3.5
47 | notebook==6.1.5
48 | numpy==1.19.4
49 | packaging==20.4
50 | pandas==1.1.4
51 | pandocfilters==1.4.3
52 | parso==0.7.1
53 | pexpect==4.8.0
54 | pickleshare==0.7.5
55 | Pillow==8.0.1
56 | plac==1.1.3
57 | preshed==3.0.4
58 | prometheus-client==0.8.0
59 | prompt-toolkit==3.0.8
60 | protobuf==3.13.0
61 | ptyprocess==0.6.0
62 | pycparser==2.20
63 | Pygments==2.7.2
64 | pyparsing==2.4.7
65 | pyrsistent==0.17.3
66 | python-dateutil==2.8.1
67 | pytz==2020.4
68 | pyzmq==19.0.2
69 | qtconsole==4.7.7
70 | QtPy==1.9.0
71 | regex==2020.11.11
72 | requests==2.25.0
73 | sacremoses==0.0.43
74 | scipy==1.5.4
75 | seaborn==0.11.0
76 | Send2Trash==1.5.0
77 | sentencepiece==0.1.90
78 | six==1.15.0
79 | spacy==2.3.2
80 | srsly==1.0.4
81 | terminado==0.9.1
82 | testpath==0.4.4
83 | thinc==7.4.1
84 | tokenizers==0.8.1rc2
85 | torch>=1.5
86 | tornado==6.1
87 | tqdm==4.51.0
88 | traitlets==4.3.3
89 | transformers==3.3.1
90 | typing-extensions==3.7.4.3
91 | urllib3==1.26.1
92 | wasabi==0.8.0
93 | wcwidth==0.2.5
94 | webencodings==0.5.1
95 | widgetsnbextension==3.5.1
96 | zipp==3.4.0
97 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/extract.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | from process import parse_sentence
 3 | from mapper import Map, deduplication
 4 | from transformers import AutoTokenizer, BertModel, GPT2Model
 5 | import argparse
 6 | import en_core_web_md
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | def str2bool(v):
11 |     if isinstance(v, bool):
12 |        return v
13 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
14 |         return True
15 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
16 |         return False
17 |     else:
18 |         raise argparse.ArgumentTypeError('Boolean value expected.')
19 | 
20 | parser = argparse.ArgumentParser(description='Process lines of text corpus into knowledgraph')
21 | parser.add_argument('input_filename', type=str, help='text file as input')
22 | parser.add_argument('output_filename', type=str, help='output text file')
23 | parser.add_argument('--language_model',default='bert-base-cased', 
24 |                     choices=[ 'bert-large-uncased', 'bert-large-cased', 'bert-base-uncased', 'bert-base-cased', 'gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl'],
25 |                     help='which language model to use')
26 | parser.add_argument('--use_cuda', default=True, 
27 |                         type=str2bool, nargs='?',
28 |                         help="Use cuda?")
29 | parser.add_argument('--include_text_output', default=False, 
30 |                         type=str2bool, nargs='?',
31 |                         help="Include original sentence in output")
32 | parser.add_argument('--threshold', default=0.003, 
33 |                         type=float, help="Any attention score lower than this is removed")
34 | 
35 | args = parser.parse_args()
36 | 
37 | use_cuda = args.use_cuda
38 | nlp = en_core_web_md.load()
39 | 
40 | '''Create
41 | Tested language model:
42 | 
43 | 1. bert-base-cased
44 | 
45 | 2. gpt2-medium
46 | 
47 | Basically any model that belongs to this family should work
48 | 
49 | '''
50 | 
51 | language_model = args.language_model
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     tokenizer = AutoTokenizer.from_pretrained(language_model)
56 |     if 'gpt2' in language_model:
57 |         encoder = GPT2Model.from_pretrained(language_model)
58 |     else:
59 |         encoder = BertModel.from_pretrained(language_model)
60 |     encoder.eval()
61 |     if use_cuda:
62 |         encoder = encoder.cuda()    
63 |     input_filename = args.input_filename
64 |     output_filename = args.output_filename
65 |     include_sentence = args.include_text_output
66 | 
67 |     with open(input_filename, 'r') as f, open(output_filename, 'w') as g:
68 |         for idx, line in enumerate(tqdm(f)):
69 |             sentence  = line.strip()
70 |             if len(sentence):
71 |                 valid_triplets = []
72 |                 for sent in nlp(sentence).sents:
73 |                     # Match
74 |                     for triplets in parse_sentence(sent.text, tokenizer, encoder, nlp, use_cuda=use_cuda):
75 |                         valid_triplets.append(triplets)
76 |                 if len(valid_triplets) > 0:
77 |                     # Map
78 |                     mapped_triplets = []
79 |                     for triplet in valid_triplets:
80 |                         head = triplet['h']
81 |                         tail = triplet['t']
82 |                         relations = triplet['r']
83 |                         conf = triplet['c']
84 |                         if conf < args.threshold:
85 |                             continue
86 |                         mapped_triplet = Map(head, relations, tail)
87 |                         if 'h' in mapped_triplet:
88 |                             mapped_triplet['c'] = conf
89 |                             mapped_triplets.append(mapped_triplet)
90 |                     output = { 'line': idx, 'tri': deduplication(mapped_triplets) }
91 | 
92 |                     if include_sentence:
93 |                         output['sent'] = sentence
94 |                     if len(output['tri']) > 0:
95 |                         g.write(json.dumps( output )+'\n')


--------------------------------------------------------------------------------
/corpus/adverbs.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | abnormally 
  3 | absentmindedly 
  4 | accidentally 
  5 | acidly
  6 | actually 
  7 | adventurously
  8 | afterwards 
  9 | almost 
 10 | always 
 11 | angrily 
 12 | annually 
 13 | anxiously 
 14 | arrogantly 
 15 | awkwardly
 16 | badly 
 17 | bashfully 
 18 | beautifully 
 19 | bitterly
 20 | bleakly
 21 | blindly 
 22 | blissfully
 23 | boastfully 
 24 | boldly 
 25 | bravely 
 26 | briefly 
 27 | brightly 
 28 | briskly 
 29 | broadly 
 30 | busily	
 31 | calmly 
 32 | carefully 
 33 | carelessly 
 34 | cautiously 
 35 | certainly
 36 | cheerfully 
 37 | clearly 
 38 | cleverly
 39 | closely 
 40 | coaxingly 
 41 | colorfully 
 42 | commonly
 43 | continually 
 44 | coolly 
 45 | correctly 
 46 | courageously 
 47 | crossly 
 48 | cruelly 
 49 | curiously
 50 | daily 
 51 | daintily
 52 | dearly 
 53 | deceivingly 
 54 | delightfully
 55 | deeply 
 56 | defiantly 
 57 | deliberately 
 58 | delightfully 
 59 | diligently 
 60 | dimly
 61 | doubtfully 
 62 | dreamily 
 63 | easily 
 64 | elegantly 
 65 | energetically 
 66 | enormously 
 67 | enthusiastically 
 68 | equally 
 69 | especially
 70 | even 
 71 | evenly 
 72 | eventually 
 73 | exactly 
 74 | excitedly 
 75 | extremely  
 76 | fairly 
 77 | faithfully 
 78 | famously 
 79 | far 
 80 | fast 
 81 | fatally 
 82 | ferociously 
 83 | fervently 
 84 | fiercely 
 85 | fondly 
 86 | foolishly 
 87 | fortunately 
 88 | frankly 
 89 | frantically
 90 | freely
 91 | frenetically 
 92 | frightfully
 93 | fully
 94 | furiously 
 95 | generally
 96 | generously
 97 | gently
 98 | gladly 
 99 | gleefully 
100 | gracefully 
101 | gratefully
102 | greatly 
103 | greedily
104 | happily 
105 | hastily 
106 | healthily 
107 | heavily 
108 | helpfully 
109 | helplessly 
110 | highly 
111 | honestly 
112 | hopelessly 
113 | hourly 
114 | hungrily	
115 | immediately 
116 | innocently 
117 | inquisitively 
118 | instantly 
119 | intensely 
120 | intently 
121 | interestingly 
122 | inwardly 
123 | irritably	
124 | jaggedly
125 | jealously
126 | joshingly
127 | joyfully
128 | joyously 
129 | jovially
130 | jubilantly
131 | judgmentally
132 | justly
133 | keenly
134 | kiddingly
135 | kindheartedly
136 | kindly
137 | knavishly
138 | knottily
139 | knowingly
140 | knowledgeably
141 | kookily	
142 | lazily 
143 | less 
144 | lightly 
145 | likely 
146 | limply
147 | lively
148 | loftily
149 | longingly 
150 | loosely
151 | lovingly 
152 | loudly 
153 | loyally
154 | madly 
155 | majestically 
156 | meaningfully 
157 | mechanically 
158 | merrily 
159 | miserably 
160 | mockingly 
161 | monthly 
162 | more 
163 | mortally 
164 | mostly 
165 | mysteriously	
166 | naturally 
167 | nearly 
168 | neatly
169 | needily 
170 | nervously 
171 | never 
172 | nicely
173 | noisily 
174 | not	
175 | obediently 
176 | obnoxiously 
177 | oddly
178 | offensively
179 | officially
180 | often 
181 | only 
182 | openly 
183 | optimistically
184 | overconfidently
185 | owlishly
186 | painfully 
187 | partially 
188 | patiently 
189 | perfectly 
190 | physically 
191 | playfully 
192 | politely 
193 | poorly 
194 | positively
195 | potentially
196 | powerfully 
197 | promptly 
198 | properly
199 | punctually
200 | quaintly
201 | quarrelsomely
202 | queasily
203 | queerly
204 | questionably
205 | questioningly
206 | quicker
207 | quickly
208 | quietly
209 | quirkily
210 | quizzically
211 | rapidly 
212 | rarely 
213 | readily
214 | really 
215 | reassuringly
216 | recklessly 
217 | regularly 
218 | reluctantly
219 | repeatedly 
220 | reproachfully
221 | restfully
222 | righteously
223 | rightfully
224 | rigidly 
225 | roughly 
226 | rudely 
227 | sadly 
228 | safely 
229 | scarcely 
230 | scarily 
231 | searchingly 
232 | sedately 
233 | seemingly 
234 | seldom 
235 | selfishly 
236 | separately
237 | seriously 
238 | shakily 
239 | sharply
240 | sheepishly 
241 | shrilly 
242 | shyly 
243 | silently 
244 | sleepily 
245 | slowly 
246 | smoothly 
247 | softly
248 | solemnly 
249 | solidly
250 | sometimes 
251 | soon 
252 | speedily 
253 | stealthily 
254 | sternly 
255 | strictly
256 | successfully
257 | suddenly 
258 | surprisingly
259 | suspiciously
260 | sweetly 
261 | swiftly 
262 | sympathetically
263 | tenderly
264 | tensely 
265 | terribly
266 | thankfully
267 | thoroughly
268 | thoughtfully
269 | tightly
270 | tomorrow 
271 | too 
272 | tremendously
273 | triumphantly
274 | truly
275 | truthfully
276 | ultimately
277 | unabashedly
278 | unaccountably
279 | unbearably
280 | unethically
281 | unexpectedly 
282 | unfortunately
283 | unimpressively
284 | unnaturally
285 | unnecessarily
286 | utterly
287 | upbeat
288 | upliftingly
289 | upright
290 | upside-down
291 | upward
292 | upwardly
293 | urgently
294 | usefully
295 | uselessly
296 | usually
297 | utterly
298 | vacantly
299 | vaguely
300 | vainly
301 | valiantly
302 | vastly
303 | verbally
304 | very 
305 | viciously
306 | victoriously 
307 | violently
308 | vivaciously 
309 | voluntarily	
310 | warmly
311 | weakly 
312 | wearily 
313 | well 
314 | wetly
315 | wholly
316 | wildly
317 | willfully
318 | wisely
319 | woefully
320 | wonderfully
321 | worriedly
322 | wrongly
323 | yawningly
324 | yearly 
325 | yearningly
326 | yesterday
327 | yieldingly
328 | youthfully	
329 | zealously   
330 | zestfully    
331 | zestily
332 | yet
333 | else
334 | just


--------------------------------------------------------------------------------
/process.py:
--------------------------------------------------------------------------------
  1 | from utils import compress_attention, create_mapping, BFS, build_graph, is_word
  2 | from multiprocessing import Pool
  3 | import spacy
  4 | import en_core_web_md
  5 | import torch
  6 | from transformers import AutoTokenizer, BertModel, GPT2Model
  7 | from constant import invalid_relations_set
  8 | 
  9 | 
 10 | def process_matrix(attentions, layer_idx = -1, head_num = 0, avg_head=False, trim=True, use_cuda=True):
 11 |     if avg_head:
 12 |         if use_cuda:
 13 |             attn =  torch.mean(attentions[0][layer_idx], 0).cpu()
 14 |         else:
 15 |             attn = torch.mean(attentions[0][layer_idx], 0)
 16 |         attention_matrix = attn.detach().numpy()
 17 |     else:
 18 |         attn = attentions[0][layer_idx][head_num]
 19 |         if use_cuda:
 20 |             attn = attn.cpu()
 21 |         attention_matrix = attn.detach().numpy()
 22 | 
 23 |     attention_matrix = attention_matrix[1:-1, 1:-1]
 24 | 
 25 |     return attention_matrix
 26 | 
 27 | def bfs(args):
 28 |     s, end, graph, max_size, black_list_relation = args
 29 |     return BFS(s, end, graph, max_size, black_list_relation)
 30 | 
 31 | 
 32 | def check_relations_validity(relations):
 33 |     for rel in relations:
 34 |         if rel.lower() in invalid_relations_set or rel.isnumeric():
 35 |             return False
 36 |     return True
 37 | 
 38 | def global_initializer(nlp_object):
 39 |     global spacy_nlp
 40 |     spacy_nlp = nlp_object
 41 | 
 42 | def filter_relation_sets(params):
 43 |     triplet, id2token = params
 44 | 
 45 |     triplet_idx = triplet[0]
 46 |     confidence = triplet[1]
 47 |     head, tail = triplet_idx[0], triplet_idx[-1]
 48 |     if head in id2token and tail in id2token:
 49 |         head = id2token[head]
 50 |         tail = id2token[tail]
 51 |         relations = [ spacy_nlp(id2token[idx])[0].lemma_  for idx in triplet_idx[1:-1] if idx in id2token ]
 52 |         if len(relations) > 0 and check_relations_validity(relations) and head.lower() not in invalid_relations_set and tail.lower() not in invalid_relations_set:
 53 |             return {'h': head, 't': tail, 'r': relations, 'c': confidence }
 54 |     return {}
 55 | 
 56 | def parse_sentence(sentence, tokenizer, encoder, nlp, use_cuda=True):
 57 |     '''Implement the match part of MAMA
 58 | 
 59 |     '''
 60 |     tokenizer_name = str(tokenizer.__str__)
 61 | 
 62 |     inputs, tokenid2word_mapping, token2id, noun_chunks  = create_mapping(sentence, return_pt=True, nlp=nlp, tokenizer=tokenizer)
 63 | 
 64 |     with torch.no_grad():
 65 |         if use_cuda:
 66 |             for key in inputs.keys():
 67 |                 inputs[key] = inputs[key].cuda()
 68 |         outputs = encoder(**inputs, output_attentions=True)
 69 |     trim = True
 70 |     if 'GPT2' in tokenizer_name:
 71 |         trim  = False
 72 | 
 73 |     '''
 74 |     Use average of last layer attention : page 6, section 3.1.2
 75 |     '''
 76 |     attention = process_matrix(outputs[2], avg_head=True, trim=trim, use_cuda=use_cuda)
 77 | 
 78 |     merged_attention = compress_attention(attention, tokenid2word_mapping)
 79 |     attn_graph = build_graph(merged_attention)
 80 | 
 81 |     tail_head_pairs = []
 82 |     for head in noun_chunks:
 83 |         for tail in noun_chunks:
 84 |             if head != tail:
 85 |                 tail_head_pairs.append((token2id[head], token2id[tail]))
 86 | 
 87 |     black_list_relation = set([ token2id[n]  for n in noun_chunks ])
 88 | 
 89 |     all_relation_pairs = []
 90 |     id2token = { value: key for key, value in token2id.items()}
 91 | 
 92 |     with Pool(10) as pool:
 93 |         params = [  ( pair[0], pair[1], attn_graph, max(tokenid2word_mapping), black_list_relation, ) for pair in tail_head_pairs]
 94 |         for output in pool.imap_unordered(bfs, params):
 95 |             if len(output):
 96 |                 all_relation_pairs += [ (o, id2token) for o in output ]
 97 | 
 98 |     triplet_text = []
 99 |     with Pool(10, global_initializer, (nlp,)) as pool:
100 |         for triplet in pool.imap_unordered(filter_relation_sets, all_relation_pairs):
101 |             if len(triplet) > 0:
102 |                 triplet_text.append(triplet)
103 |     return triplet_text
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     import json
108 |     from tqdm import tqdm
109 | 
110 |     nlp = en_core_web_md.load()
111 |     selected_model = 'gpt2-medium'
112 | 
113 |     use_cuda = True
114 | 
115 | 
116 |     tokenizer = AutoTokenizer.from_pretrained(selected_model)
117 |     encoder = GPT2Model.from_pretrained(selected_model)
118 |     encoder.eval()
119 |     if use_cuda:
120 |         encoder = encoder.cuda()
121 | 
122 |     target_file = [
123 |         '../../Documents/KGERT-v2/datasets/squad_v1.1/train-v1.1.json',
124 |         # '../../Documents/KGERT-v2/datasets/squad_v1.1/wiki_dev_2020-18.json',
125 |         # '../../Documents/KGERT-v2/datasets/squad_v1/dev-v1.1.json',
126 |     ]
127 | 
128 |     output_filename = [
129 |         'train_v1.1.jsonl',
130 |         # 'wiki_2020-18.jsonl',
131 |         # 'dev-v1.1.jsonl',
132 |     ]
133 | 
134 |     for target_file, output_filename in zip(target_file, output_filename):
135 |         with open(target_file, 'r') as f:
136 |             dataset = json.load(f)
137 | 
138 |         output_filename = selected_model +'_'+ output_filename
139 | 
140 |         print(target_file, output_filename)
141 | 
142 |         f = open(output_filename,'w')
143 |         for data in tqdm(dataset['data'], dynamic_ncols=True):
144 |             for para in data['paragraphs']:
145 |                 context = para['context']
146 |                 for sent in nlp(context).sents:
147 |                     for output in parse_sentence(sent.text, tokenizer, encoder, nlp, use_cuda=use_cuda):
148 |                         f.write(json.dumps(output)+'\n')
149 |                 f.flush()
150 | 
151 |                 for question in para['qas']:
152 |                     question = question['question']
153 |                     for output in parse_sentence(question, tokenizer, encoder, nlp, use_cuda=use_cuda):
154 |                         f.write(json.dumps(output)+'\n')
155 |                 f.flush()
156 |         f.close()


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import re
  4 | 
  5 | alphabet = re.compile(r'^[a-zA-Z]+$')
  6 | 
  7 | from copy import copy
  8 | from collections import defaultdict
  9 | 
 10 | def build_graph(matrix):
 11 |     graph = defaultdict(list) 
 12 | 
 13 |     for idx in range(0, len(matrix)):
 14 |         for col in range(idx+1, len(matrix)):
 15 |             graph[idx].append((col, matrix[idx][col] ))
 16 |     return graph
 17 | 
 18 | def BFS(s, end, graph, max_size=-1, black_list_relation=[]):
 19 |     visited = [False] * (max(graph.keys())+100) 
 20 |   
 21 |     # Create a queue for BFS 
 22 |     queue = [] 
 23 | 
 24 |     # Mark the source node as  
 25 |     # visited and enqueue it 
 26 |     queue.append((s, [(s, 0)]))
 27 |     
 28 |     found_paths = []
 29 | 
 30 |     visited[s] = True
 31 |     
 32 |     while queue: 
 33 | 
 34 |         s, path = queue.pop(0)
 35 | 
 36 |         # Get all adjacent vertices of the 
 37 |         # dequeued vertex s. If a adjacent 
 38 |         # has not been visited, then mark it 
 39 |         # visited and enqueue it 
 40 |         for i, conf in graph[s]:
 41 |             if i == end:
 42 |                 found_paths.append(path+[(i, conf)])
 43 |                 break
 44 |             if visited[i] == False:
 45 |                 queue.append((i, copy(path)+[(i, conf)]))
 46 |                 visited[i] = True
 47 |     
 48 |     candidate_facts = []
 49 |     for path_pairs in found_paths:
 50 |         if len(path_pairs) < 3:
 51 |             continue
 52 |         path = []
 53 |         cum_conf = 0
 54 |         for (node, conf) in path_pairs:
 55 |             path.append(node)
 56 |             cum_conf += conf
 57 | 
 58 |         if path[1] in black_list_relation:
 59 |             continue
 60 | 
 61 |         candidate_facts.append((path, cum_conf))
 62 | 
 63 |     candidate_facts = sorted(candidate_facts, key=lambda x: x[1], reverse=True)
 64 |     return candidate_facts
 65 | 
 66 | def is_word(token):
 67 |     if len(token) == 1 and alphabet.match(token) == None:
 68 |         return False
 69 |     return True
 70 | 
 71 | def create_mapping(sentence, return_pt=False, nlp = None, tokenizer=None):
 72 |     '''Create a mapping
 73 |         nlp: spacy model
 74 |         tokenizer: huggingface tokenizer
 75 |     '''
 76 |     doc = nlp(sentence)
 77 | 
 78 |     tokens = list(doc)
 79 | 
 80 |     chunk2id = {}
 81 | 
 82 |     start_chunk = []
 83 |     end_chunk = []
 84 |     noun_chunks = []
 85 |     for chunk in doc.noun_chunks:
 86 |         noun_chunks.append(chunk.text)
 87 |         start_chunk.append(chunk.start)
 88 |         end_chunk.append(chunk.end)
 89 | 
 90 |     sentence_mapping = []
 91 |     token2id = {}
 92 |     mode = 0 # 1 in chunk, 0 not in chunk
 93 |     chunk_id = 0
 94 |     for idx, token in enumerate(doc):
 95 |         if idx in start_chunk:
 96 |             mode = 1
 97 |             sentence_mapping.append(noun_chunks[chunk_id])
 98 |             token2id[sentence_mapping[-1]] = len(token2id)
 99 |             chunk_id += 1
100 |         elif idx in end_chunk:
101 |             mode = 0
102 | 
103 |         if mode == 0:
104 |             sentence_mapping.append(token.text)
105 |             token2id[sentence_mapping[-1]] = len(token2id)
106 | 
107 | 
108 |     token_ids = []
109 |     tokenid2word_mapping = []
110 | 
111 |     for token in sentence_mapping:
112 |         subtoken_ids = tokenizer(str(token), add_special_tokens=False)['input_ids']
113 |         tokenid2word_mapping += [ token2id[token] ]*len(subtoken_ids)
114 |         token_ids += subtoken_ids
115 | 
116 |     tokenizer_name = str(tokenizer.__str__)
117 |     if 'GPT2' in tokenizer_name:
118 |         outputs = {
119 |             'input_ids': token_ids,
120 |             'attention_mask': [1]*(len(token_ids)),
121 |         }
122 | 
123 |     else:
124 |         outputs = {
125 |             'input_ids': [tokenizer.cls_token_id] + token_ids + [tokenizer.sep_token_id],
126 |             'attention_mask': [1]*(len(token_ids)+2),
127 |             'token_type_ids': [0]*(len(token_ids)+2)
128 |         }
129 | 
130 |     if return_pt:
131 |         for key, value in outputs.items():
132 |             outputs[key] = torch.from_numpy(np.array(value)).long().unsqueeze(0)
133 |     
134 |     return outputs, tokenid2word_mapping, token2id, noun_chunks
135 | 
136 | def compress_attention(attention, tokenid2word_mapping, operator=np.mean):
137 | 
138 |     new_index = []
139 |     
140 |     prev = -1
141 |     for idx, row in enumerate(attention):
142 |         token_id = tokenid2word_mapping[idx]
143 |         if token_id != prev:
144 |             new_index.append( [row])
145 |             prev = token_id
146 |         else:
147 |             new_index[-1].append(row)
148 | 
149 |     new_matrix = []
150 |     for row in new_index:
151 |         new_matrix.append(operator(np.array(row), 0))
152 | 
153 |     new_matrix = np.array(new_matrix)
154 | 
155 |     attention = np.array(new_matrix).T
156 | 
157 |     prev = -1
158 |     new_index=  []
159 |     for idx, row in enumerate(attention):
160 |         token_id = tokenid2word_mapping[idx]
161 |         if token_id != prev:
162 |             new_index.append( [row])
163 |             prev = token_id
164 |         else:
165 |             new_index[-1].append(row)
166 | 
167 |     
168 |     new_matrix = []
169 |     for row in new_index:
170 |         new_matrix.append(operator(np.array(row), 0))
171 |     
172 |     new_matrix = np.array(new_matrix)
173 | 
174 |     return new_matrix.T
175 | 
176 | def index2word(tokenid2word_mapping, token2id):
177 |     tokens = []
178 |     prev = -1
179 |     for token_id in tokenid2word_mapping:
180 |         if token_id == prev:
181 |             continue
182 | 
183 |         tokens.append(token2id[token_id])
184 |         prev = token_id
185 | 
186 |     return tokens
187 | 
188 | 
189 | 
190 | if __name__ == '__main__':
191 |     import en_core_web_sm
192 |     from transformers import AutoTokenizer, BertModel
193 |     tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
194 |     encoder = BertModel.from_pretrained('bert-base-cased')
195 |     nlp = en_core_web_sm.load()
196 | 
197 |     sentence = 'Rolling Stone wrote: “No other pop song has so thoroughly challenged artistic conventions”'
198 |     sentence = 'Dylan sing "Time They Are Changing"'
199 |     inputs, tokenid2word_mapping, token2id, noun_chunks  = create_mapping(sentence, return_pt=True, nlp=nlp, tokenizer=tokenizer)
200 | 
201 |     outputs = encoder(**inputs, output_attentions=True)
202 |     print(noun_chunks, tokenid2word_mapping, token2id)
203 | 


--------------------------------------------------------------------------------
/corpus/english-adjectives.txt:
--------------------------------------------------------------------------------
   1 | abandoned
   2 | able
   3 | absolute
   4 | adorable
   5 | adventurous
   6 | academic
   7 | acceptable
   8 | acclaimed
   9 | accomplished
  10 | accurate
  11 | aching
  12 | acidic
  13 | acrobatic
  14 | active
  15 | actual
  16 | adept
  17 | admirable
  18 | admired
  19 | adolescent
  20 | adorable
  21 | adored
  22 | advanced
  23 | afraid
  24 | affectionate
  25 | aged
  26 | aggravating
  27 | aggressive
  28 | agile
  29 | agitated
  30 | agonizing
  31 | agreeable
  32 | ajar
  33 | alarmed
  34 | alarming
  35 | alert
  36 | alienated
  37 | alive
  38 | all
  39 | altruistic
  40 | amazing
  41 | ambitious
  42 | ample
  43 | amused
  44 | amusing
  45 | anchored
  46 | ancient
  47 | angelic
  48 | angry
  49 | anguished
  50 | animated
  51 | annual
  52 | another
  53 | antique
  54 | anxious
  55 | any
  56 | apprehensive
  57 | appropriate
  58 | apt
  59 | arctic
  60 | arid
  61 | aromatic
  62 | artistic
  63 | ashamed
  64 | assured
  65 | astonishing
  66 | athletic
  67 | attached
  68 | attentive
  69 | attractive
  70 | austere
  71 | authentic
  72 | authorized
  73 | automatic
  74 | avaricious
  75 | average
  76 | aware
  77 | awesome
  78 | awful
  79 | awkward
  80 | babyish
  81 | bad
  82 | back
  83 | baggy
  84 | bare
  85 | barren
  86 | basic
  87 | beautiful
  88 | belated
  89 | beloved
  90 | beneficial
  91 | better
  92 | best
  93 | bewitched
  94 | big
  95 | big-hearted
  96 | biodegradable
  97 | bite-sized
  98 | bitter
  99 | black
 100 | black-and-white
 101 | bland
 102 | blank
 103 | blaring
 104 | bleak
 105 | blind
 106 | blissful
 107 | blond
 108 | blue
 109 | blushing
 110 | bogus
 111 | boiling
 112 | bold
 113 | bony
 114 | boring
 115 | bossy
 116 | both
 117 | bouncy
 118 | bountiful
 119 | bowed
 120 | brave
 121 | breakable
 122 | brief
 123 | bright
 124 | brilliant
 125 | brisk
 126 | broken
 127 | bronze
 128 | brown
 129 | bruised
 130 | bubbly
 131 | bulky
 132 | bumpy
 133 | buoyant
 134 | burdensome
 135 | burly
 136 | bustling
 137 | busy
 138 | buttery
 139 | buzzing
 140 | calculating
 141 | calm
 142 | candid
 143 | canine
 144 | capital
 145 | carefree
 146 | careful
 147 | careless
 148 | caring
 149 | cautious
 150 | cavernous
 151 | celebrated
 152 | charming
 153 | cheap
 154 | cheerful
 155 | cheery
 156 | chief
 157 | chilly
 158 | chubby
 159 | circular
 160 | classic
 161 | clean
 162 | clear
 163 | clear-cut
 164 | clever
 165 | close
 166 | closed
 167 | cloudy
 168 | clueless
 169 | clumsy
 170 | cluttered
 171 | coarse
 172 | cold
 173 | colorful
 174 | colorless
 175 | colossal
 176 | comfortable
 177 | common
 178 | compassionate
 179 | competent
 180 | complete
 181 | complex
 182 | complicated
 183 | composed
 184 | concerned
 185 | concrete
 186 | confused
 187 | conscious
 188 | considerate
 189 | constant
 190 | content
 191 | conventional
 192 | cooked
 193 | cool
 194 | cooperative
 195 | coordinated
 196 | corny
 197 | corrupt
 198 | costly
 199 | courageous
 200 | courteous
 201 | crafty
 202 | crazy
 203 | creamy
 204 | creative
 205 | creepy
 206 | criminal
 207 | crisp
 208 | critical
 209 | crooked
 210 | crowded
 211 | cruel
 212 | crushing
 213 | cuddly
 214 | cultivated
 215 | cultured
 216 | cumbersome
 217 | curly
 218 | curvy
 219 | cute
 220 | cylindrical
 221 | damaged
 222 | damp
 223 | dangerous
 224 | dapper
 225 | daring
 226 | darling
 227 | dark
 228 | dazzling
 229 | dead
 230 | deadly
 231 | deafening
 232 | dear
 233 | dearest
 234 | decent
 235 | decimal
 236 | decisive
 237 | deep
 238 | defenseless
 239 | defensive
 240 | defiant
 241 | deficient
 242 | definite
 243 | definitive
 244 | delayed
 245 | delectable
 246 | delicious
 247 | delightful
 248 | delirious
 249 | demanding
 250 | dense
 251 | dental
 252 | dependable
 253 | dependent
 254 | descriptive
 255 | deserted
 256 | detailed
 257 | determined
 258 | devoted
 259 | different
 260 | difficult
 261 | digital
 262 | diligent
 263 | dim
 264 | dimpled
 265 | dimwitted
 266 | direct
 267 | disastrous
 268 | discrete
 269 | disfigured
 270 | disgusting
 271 | disloyal
 272 | dismal
 273 | distant
 274 | downright
 275 | dreary
 276 | dirty
 277 | disguised
 278 | dishonest
 279 | dismal
 280 | distant
 281 | distinct
 282 | distorted
 283 | dizzy
 284 | dopey
 285 | doting
 286 | double
 287 | downright
 288 | drab
 289 | drafty
 290 | dramatic
 291 | dreary
 292 | droopy
 293 | dry
 294 | dual
 295 | dull
 296 | dutiful
 297 | each
 298 | eager
 299 | earnest
 300 | early
 301 | easy
 302 | easy-going
 303 | ecstatic
 304 | edible
 305 | educated
 306 | elaborate
 307 | elastic
 308 | elated
 309 | elderly
 310 | electric
 311 | elegant
 312 | elementary
 313 | elliptical
 314 | embarrassed
 315 | embellished
 316 | eminent
 317 | emotional
 318 | empty
 319 | enchanted
 320 | enchanting
 321 | energetic
 322 | enlightened
 323 | enormous
 324 | enraged
 325 | entire
 326 | envious
 327 | equal
 328 | equatorial
 329 | essential
 330 | esteemed
 331 | ethical
 332 | euphoric
 333 | even
 334 | evergreen
 335 | everlasting
 336 | every
 337 | evil
 338 | exalted
 339 | excellent
 340 | exemplary
 341 | exhausted
 342 | excitable
 343 | excited
 344 | exciting
 345 | exotic
 346 | expensive
 347 | experienced
 348 | expert
 349 | extraneous
 350 | extroverted
 351 | extra-large
 352 | extra-small
 353 | fabulous
 354 | failing
 355 | faint
 356 | fair
 357 | faithful
 358 | fake
 359 | false
 360 | familiar
 361 | famous
 362 | fancy
 363 | fantastic
 364 | far
 365 | faraway
 366 | far-flung
 367 | far-off
 368 | fast
 369 | fat
 370 | fatal
 371 | fatherly
 372 | favorable
 373 | favorite
 374 | fearful
 375 | fearless
 376 | feisty
 377 | feline
 378 | female
 379 | feminine
 380 | few
 381 | fickle
 382 | filthy
 383 | fine
 384 | finished
 385 | firm
 386 | first
 387 | firsthand
 388 | fitting
 389 | fixed
 390 | flaky
 391 | flamboyant
 392 | flashy
 393 | flat
 394 | flawed
 395 | flawless
 396 | flickering
 397 | flimsy
 398 | flippant
 399 | flowery
 400 | fluffy
 401 | fluid
 402 | flustered
 403 | focused
 404 | fond
 405 | foolhardy
 406 | foolish
 407 | forceful
 408 | forked
 409 | formal
 410 | forsaken
 411 | forthright
 412 | fortunate
 413 | fragrant
 414 | frail
 415 | frank
 416 | frayed
 417 | free
 418 | French
 419 | fresh
 420 | frequent
 421 | friendly
 422 | frightened
 423 | frightening
 424 | frigid
 425 | frilly
 426 | frizzy
 427 | frivolous
 428 | front
 429 | frosty
 430 | frozen
 431 | frugal
 432 | fruitful
 433 | full
 434 | fumbling
 435 | functional
 436 | funny
 437 | fussy
 438 | fuzzy
 439 | gargantuan
 440 | gaseous
 441 | general
 442 | generous
 443 | gentle
 444 | genuine
 445 | giant
 446 | giddy
 447 | gigantic
 448 | gifted
 449 | giving
 450 | glamorous
 451 | glaring
 452 | glass
 453 | gleaming
 454 | gleeful
 455 | glistening
 456 | glittering
 457 | gloomy
 458 | glorious
 459 | glossy
 460 | glum
 461 | golden
 462 | good
 463 | good-natured
 464 | gorgeous
 465 | graceful
 466 | gracious
 467 | grand
 468 | grandiose
 469 | granular
 470 | grateful
 471 | grave
 472 | gray
 473 | great
 474 | greedy
 475 | green
 476 | gregarious
 477 | grim
 478 | grimy
 479 | gripping
 480 | grizzled
 481 | gross
 482 | grotesque
 483 | grouchy
 484 | grounded
 485 | growing
 486 | growling
 487 | grown
 488 | grubby
 489 | gruesome
 490 | grumpy
 491 | guilty
 492 | gullible
 493 | gummy
 494 | hairy
 495 | half
 496 | handmade
 497 | handsome
 498 | handy
 499 | happy
 500 | happy-go-lucky
 501 | hard
 502 | hard-to-find
 503 | harmful
 504 | harmless
 505 | harmonious
 506 | harsh
 507 | hasty
 508 | hateful
 509 | haunting
 510 | healthy
 511 | heartfelt
 512 | hearty
 513 | heavenly
 514 | heavy
 515 | hefty
 516 | helpful
 517 | helpless
 518 | hidden
 519 | hideous
 520 | high
 521 | high-level
 522 | hilarious
 523 | hoarse
 524 | hollow
 525 | homely
 526 | honest
 527 | honorable
 528 | honored
 529 | hopeful
 530 | horrible
 531 | hospitable
 532 | hot
 533 | huge
 534 | humble
 535 | humiliating
 536 | humming
 537 | humongous
 538 | hungry
 539 | hurtful
 540 | husky
 541 | icky
 542 | icy
 543 | ideal
 544 | idealistic
 545 | identical
 546 | idle
 547 | idiotic
 548 | idolized
 549 | ignorant
 550 | ill
 551 | illegal
 552 | ill-fated
 553 | ill-informed
 554 | illiterate
 555 | illustrious
 556 | imaginary
 557 | imaginative
 558 | immaculate
 559 | immaterial
 560 | immediate
 561 | immense
 562 | impassioned
 563 | impeccable
 564 | impartial
 565 | imperfect
 566 | imperturbable
 567 | impish
 568 | impolite
 569 | important
 570 | impossible
 571 | impractical
 572 | impressionable
 573 | impressive
 574 | improbable
 575 | impure
 576 | inborn
 577 | incomparable
 578 | incompatible
 579 | incomplete
 580 | inconsequential
 581 | incredible
 582 | indelible
 583 | inexperienced
 584 | indolent
 585 | infamous
 586 | infantile
 587 | infatuated
 588 | inferior
 589 | infinite
 590 | informal
 591 | innocent
 592 | insecure
 593 | insidious
 594 | insignificant
 595 | insistent
 596 | instructive
 597 | insubstantial
 598 | intelligent
 599 | intent
 600 | intentional
 601 | interesting
 602 | internal
 603 | international
 604 | intrepid
 605 | ironclad
 606 | irresponsible
 607 | irritating
 608 | itchy
 609 | jaded
 610 | jagged
 611 | jam-packed
 612 | jaunty
 613 | jealous
 614 | jittery
 615 | joint
 616 | jolly
 617 | jovial
 618 | joyful
 619 | joyous
 620 | jubilant
 621 | judicious
 622 | juicy
 623 | jumbo
 624 | junior
 625 | jumpy
 626 | juvenile
 627 | kaleidoscopic
 628 | keen
 629 | key
 630 | kind
 631 | kindhearted
 632 | kindly
 633 | klutzy
 634 | knobby
 635 | knotty
 636 | knowledgeable
 637 | knowing
 638 | known
 639 | kooky
 640 | kosher
 641 | lame
 642 | lanky
 643 | large
 644 | last
 645 | lasting
 646 | late
 647 | lavish
 648 | lawful
 649 | lazy
 650 | leading
 651 | lean
 652 | leafy
 653 | left
 654 | legal
 655 | legitimate
 656 | light
 657 | lighthearted
 658 | likable
 659 | likely
 660 | limited
 661 | limp
 662 | limping
 663 | linear
 664 | lined
 665 | liquid
 666 | little
 667 | live
 668 | lively
 669 | livid
 670 | loathsome
 671 | lone
 672 | lonely
 673 | long
 674 | long-term
 675 | loose
 676 | lopsided
 677 | lost
 678 | loud
 679 | lovable
 680 | lovely
 681 | loving
 682 | low
 683 | loyal
 684 | lucky
 685 | lumbering
 686 | luminous
 687 | lumpy
 688 | lustrous
 689 | luxurious
 690 | mad
 691 | made-up
 692 | magnificent
 693 | majestic
 694 | major
 695 | male
 696 | mammoth
 697 | married
 698 | marvelous
 699 | masculine
 700 | massive
 701 | mature
 702 | meager
 703 | mealy
 704 | mean
 705 | measly
 706 | meaty
 707 | medical
 708 | mediocre
 709 | medium
 710 | meek
 711 | mellow
 712 | melodic
 713 | memorable
 714 | menacing
 715 | merry
 716 | messy
 717 | metallic
 718 | mild
 719 | milky
 720 | mindless
 721 | miniature
 722 | minor
 723 | minty
 724 | miserable
 725 | miserly
 726 | misguided
 727 | misty
 728 | mixed
 729 | modern
 730 | modest
 731 | moist
 732 | monstrous
 733 | monthly
 734 | monumental
 735 | moral
 736 | mortified
 737 | motherly
 738 | motionless
 739 | mountainous
 740 | muddy
 741 | muffled
 742 | multicolored
 743 | mundane
 744 | murky
 745 | mushy
 746 | musty
 747 | muted
 748 | mysterious
 749 | naive
 750 | narrow
 751 | nasty
 752 | natural
 753 | naughty
 754 | nautical
 755 | near
 756 | neat
 757 | necessary
 758 | needy
 759 | negative
 760 | neglected
 761 | negligible
 762 | neighboring
 763 | nervous
 764 | new
 765 | next
 766 | nice
 767 | nifty
 768 | nimble
 769 | nippy
 770 | nocturnal
 771 | noisy
 772 | nonstop
 773 | normal
 774 | notable
 775 | noted
 776 | noteworthy
 777 | novel
 778 | noxious
 779 | numb
 780 | nutritious
 781 | nutty
 782 | obedient
 783 | obese
 784 | oblong
 785 | oily
 786 | oblong
 787 | obvious
 788 | occasional
 789 | odd
 790 | oddball
 791 | offbeat
 792 | offensive
 793 | official
 794 | old
 795 | old-fashioned
 796 | only
 797 | open
 798 | optimal
 799 | optimistic
 800 | opulent
 801 | orange
 802 | orderly
 803 | organic
 804 | ornate
 805 | ornery
 806 | ordinary
 807 | original
 808 | other
 809 | our
 810 | outlying
 811 | outgoing
 812 | outlandish
 813 | outrageous
 814 | outstanding
 815 | oval
 816 | overcooked
 817 | overdue
 818 | overjoyed
 819 | overlooked
 820 | palatable
 821 | pale
 822 | paltry
 823 | parallel
 824 | parched
 825 | partial
 826 | passionate
 827 | past
 828 | pastel
 829 | peaceful
 830 | peppery
 831 | perfect
 832 | perfumed
 833 | periodic
 834 | perky
 835 | personal
 836 | pertinent
 837 | pesky
 838 | pessimistic
 839 | petty
 840 | phony
 841 | physical
 842 | piercing
 843 | pink
 844 | pitiful
 845 | plain
 846 | plaintive
 847 | plastic
 848 | playful
 849 | pleasant
 850 | pleased
 851 | pleasing
 852 | plump
 853 | plush
 854 | polished
 855 | polite
 856 | political
 857 | pointed
 858 | pointless
 859 | poised
 860 | poor
 861 | popular
 862 | portly
 863 | posh
 864 | positive
 865 | possible
 866 | potable
 867 | powerful
 868 | powerless
 869 | practical
 870 | precious
 871 | present
 872 | prestigious
 873 | pretty
 874 | precious
 875 | previous
 876 | pricey
 877 | prickly
 878 | primary
 879 | prime
 880 | pristine
 881 | private
 882 | prize
 883 | probable
 884 | productive
 885 | profitable
 886 | profuse
 887 | proper
 888 | proud
 889 | prudent
 890 | punctual
 891 | pungent
 892 | puny
 893 | pure
 894 | purple
 895 | pushy
 896 | putrid
 897 | puzzled
 898 | puzzling
 899 | quaint
 900 | qualified
 901 | quarrelsome
 902 | quarterly
 903 | queasy
 904 | querulous
 905 | questionable
 906 | quick
 907 | quick-witted
 908 | quiet
 909 | quintessential
 910 | quirky
 911 | quixotic
 912 | quizzical
 913 | radiant
 914 | ragged
 915 | rapid
 916 | rare
 917 | rash
 918 | raw
 919 | recent
 920 | reckless
 921 | rectangular
 922 | ready
 923 | real
 924 | realistic
 925 | reasonable
 926 | red
 927 | reflecting
 928 | regal
 929 | regular
 930 | reliable
 931 | relieved
 932 | remarkable
 933 | remorseful
 934 | remote
 935 | repentant
 936 | required
 937 | respectful
 938 | responsible
 939 | repulsive
 940 | revolving
 941 | rewarding
 942 | rich
 943 | rigid
 944 | right
 945 | ringed
 946 | ripe
 947 | roasted
 948 | robust
 949 | rosy
 950 | rotating
 951 | rotten
 952 | rough
 953 | round
 954 | rowdy
 955 | royal
 956 | rubbery
 957 | rundown
 958 | ruddy
 959 | rude
 960 | runny
 961 | rural
 962 | rusty
 963 | sad
 964 | safe
 965 | salty
 966 | same
 967 | sandy
 968 | sane
 969 | sarcastic
 970 | sardonic
 971 | satisfied
 972 | scaly
 973 | scarce
 974 | scared
 975 | scary
 976 | scented
 977 | scholarly
 978 | scientific
 979 | scornful
 980 | scratchy
 981 | scrawny
 982 | second
 983 | secondary
 984 | second-hand
 985 | secret
 986 | self-assured
 987 | self-reliant
 988 | selfish
 989 | sentimental
 990 | separate
 991 | serene
 992 | serious
 993 | serpentine
 994 | several
 995 | severe
 996 | shabby
 997 | shadowy
 998 | shady
 999 | shallow
1000 | shameful
1001 | shameless
1002 | sharp
1003 | shimmering
1004 | shiny
1005 | shocked
1006 | shocking
1007 | shoddy
1008 | short
1009 | short-term
1010 | showy
1011 | shrill
1012 | shy
1013 | sick
1014 | silent
1015 | silky
1016 | silly
1017 | silver
1018 | similar
1019 | simple
1020 | simplistic
1021 | sinful
1022 | single
1023 | sizzling
1024 | skeletal
1025 | skinny
1026 | sleepy
1027 | slight
1028 | slim
1029 | slimy
1030 | slippery
1031 | slow
1032 | slushy
1033 | small
1034 | smart
1035 | smoggy
1036 | smooth
1037 | smug
1038 | snappy
1039 | snarling
1040 | sneaky
1041 | sniveling
1042 | snoopy
1043 | sociable
1044 | soft
1045 | soggy
1046 | solid
1047 | somber
1048 | some
1049 | spherical
1050 | sophisticated
1051 | sore
1052 | sorrowful
1053 | soulful
1054 | soupy
1055 | sour
1056 | Spanish
1057 | sparkling
1058 | sparse
1059 | specific
1060 | spectacular
1061 | speedy
1062 | spicy
1063 | spiffy
1064 | spirited
1065 | spiteful
1066 | splendid
1067 | spotless
1068 | spotted
1069 | spry
1070 | square
1071 | squeaky
1072 | squiggly
1073 | stable
1074 | staid
1075 | stained
1076 | stale
1077 | standard
1078 | starchy
1079 | stark
1080 | starry
1081 | steep
1082 | sticky
1083 | stiff
1084 | stimulating
1085 | stingy
1086 | stormy
1087 | straight
1088 | strange
1089 | steel
1090 | strict
1091 | strident
1092 | striking
1093 | striped
1094 | strong
1095 | studious
1096 | stunning
1097 | stupendous
1098 | stupid
1099 | sturdy
1100 | stylish
1101 | subdued
1102 | submissive
1103 | substantial
1104 | subtle
1105 | suburban
1106 | sudden
1107 | sugary
1108 | sunny
1109 | super
1110 | superb
1111 | superficial
1112 | superior
1113 | supportive
1114 | sure-footed
1115 | surprised
1116 | suspicious
1117 | svelte
1118 | sweaty
1119 | sweet
1120 | sweltering
1121 | swift
1122 | sympathetic
1123 | tall
1124 | talkative
1125 | tame
1126 | tan
1127 | tangible
1128 | tart
1129 | tasty
1130 | tattered
1131 | taut
1132 | tedious
1133 | teeming
1134 | tempting
1135 | tender
1136 | tense
1137 | tepid
1138 | terrible
1139 | terrific
1140 | testy
1141 | thankful
1142 | that
1143 | these
1144 | thick
1145 | thin
1146 | third
1147 | thirsty
1148 | this
1149 | thorough
1150 | thorny
1151 | those
1152 | thoughtful
1153 | threadbare
1154 | thrifty
1155 | thunderous
1156 | tidy
1157 | tight
1158 | timely
1159 | tinted
1160 | tiny
1161 | tired
1162 | torn
1163 | total
1164 | tough
1165 | traumatic
1166 | treasured
1167 | tremendous
1168 | tragic
1169 | trained
1170 | tremendous
1171 | triangular
1172 | tricky
1173 | trifling
1174 | trim
1175 | trivial
1176 | troubled
1177 | true
1178 | trusting
1179 | trustworthy
1180 | trusty
1181 | truthful
1182 | tubby
1183 | turbulent
1184 | twin
1185 | ugly
1186 | ultimate
1187 | unacceptable
1188 | unaware
1189 | uncomfortable
1190 | uncommon
1191 | unconscious
1192 | understated
1193 | unequaled
1194 | uneven
1195 | unfinished
1196 | unfit
1197 | unfolded
1198 | unfortunate
1199 | unhappy
1200 | unhealthy
1201 | uniform
1202 | unimportant
1203 | unique
1204 | united
1205 | unkempt
1206 | unknown
1207 | unlawful
1208 | unlined
1209 | unlucky
1210 | unnatural
1211 | unpleasant
1212 | unrealistic
1213 | unripe
1214 | unruly
1215 | unselfish
1216 | unsightly
1217 | unsteady
1218 | unsung
1219 | untidy
1220 | untimely
1221 | untried
1222 | untrue
1223 | unused
1224 | unusual
1225 | unwelcome
1226 | unwieldy
1227 | unwilling
1228 | unwitting
1229 | unwritten
1230 | upbeat
1231 | upright
1232 | upset
1233 | urban
1234 | usable
1235 | used
1236 | useful
1237 | useless
1238 | utilized
1239 | utter
1240 | vacant
1241 | vague
1242 | vain
1243 | valid
1244 | valuable
1245 | vapid
1246 | variable
1247 | vast
1248 | velvety
1249 | venerated
1250 | vengeful
1251 | verifiable
1252 | vibrant
1253 | vicious
1254 | victorious
1255 | vigilant
1256 | vigorous
1257 | villainous
1258 | violet
1259 | violent
1260 | virtual
1261 | virtuous
1262 | visible
1263 | vital
1264 | vivacious
1265 | vivid
1266 | voluminous
1267 | wan
1268 | warlike
1269 | warm
1270 | warmhearted
1271 | warped
1272 | wary
1273 | wasteful
1274 | watchful
1275 | waterlogged
1276 | watery
1277 | wavy
1278 | wealthy
1279 | weak
1280 | weary
1281 | webbed
1282 | wee
1283 | weekly
1284 | weepy
1285 | weighty
1286 | weird
1287 | welcome
1288 | well-documented
1289 | well-groomed
1290 | well-informed
1291 | well-lit
1292 | well-made
1293 | well-off
1294 | well-to-do
1295 | well-worn
1296 | wet
1297 | which
1298 | whimsical
1299 | whirlwind
1300 | whispered
1301 | white
1302 | whole
1303 | whopping
1304 | wicked
1305 | wide
1306 | wide-eyed
1307 | wiggly
1308 | wild
1309 | willing
1310 | wilted
1311 | winding
1312 | windy
1313 | winged
1314 | wiry
1315 | wise
1316 | witty
1317 | wobbly
1318 | woeful
1319 | wonderful
1320 | wooden
1321 | woozy
1322 | wordy
1323 | worldly
1324 | worn
1325 | worried
1326 | worrisome
1327 | worse
1328 | worst
1329 | worthless
1330 | worthwhile
1331 | worthy
1332 | wrathful
1333 | wretched
1334 | writhing
1335 | wrong
1336 | wry
1337 | yawning
1338 | yearly
1339 | yellow
1340 | yellowish
1341 | young
1342 | youthful
1343 | yummy
1344 | zany
1345 | zealous
1346 | zesty
1347 | zigzag
1348 | 


--------------------------------------------------------------------------------