├── images └── bob_dylan.png ├── LICENSE ├── mapper.py ├── stats.py ├── README.md ├── constant.py ├── requirements.txt ├── .gitignore ├── extract.py ├── corpus ├── adverbs.txt └── english-adjectives.txt ├── process.py └── utils.py /images/bob_dylan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nidharap/language-models-are-knowledge-graphs-pytorch/HEAD/images/bob_dylan.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ray Tam 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mapper.py: -------------------------------------------------------------------------------- 1 | from constant import invalid_relations_set 2 | 3 | from REL.db.generic import GenericLookup 4 | sqlite_path = "../../Documents/wiki_2020/generated" 5 | emb = GenericLookup("entity_word_embedding", save_dir=sqlite_path, table_name="embeddings") 6 | 7 | 8 | 9 | def Map(head, relations, tail, top_first=True, best_scores = True): 10 | if head == None or tail == None or relations == None: 11 | return {} 12 | head_p_e_m = emb.wiki(str(head), 'wiki') 13 | if head_p_e_m is None: 14 | return {} 15 | tail_p_e_m = emb.wiki(str(tail), 'wiki') 16 | if tail_p_e_m is None: 17 | return {} 18 | tail_p_e_m = tail_p_e_m[0] 19 | head_p_e_m = head_p_e_m[0] 20 | valid_relations = [ r for r in relations if r not in invalid_relations_set and r.isalpha() and len(r) > 1 ] 21 | if len(valid_relations) == 0: 22 | return {} 23 | 24 | return { 'h': head_p_e_m[0], 't': tail_p_e_m[0], 'r': '_'.join(valid_relations) } 25 | 26 | def deduplication(triplets): 27 | unique_pairs = [] 28 | pair_confidence = [] 29 | for t in triplets: 30 | key = '{}\t{}\t{}'.format(t['h'], t['r'], t['t']) 31 | conf = t['c'] 32 | if key not in unique_pairs: 33 | unique_pairs.append(key) 34 | pair_confidence.append(conf) 35 | 36 | unique_triplets = [] 37 | for idx, unique_pair in enumerate(unique_pairs): 38 | h, r, t = unique_pair.split('\t') 39 | unique_triplets.append({ 'h': h, 'r': r, 't': t , 'c': pair_confidence[idx]}) 40 | 41 | return unique_triplets 42 | 43 | 44 | if __name__ == "__main__": 45 | emb = GenericLookup("entity_word_embedding", save_dir=sqlite_path, table_name="embeddings") 46 | p_e_m = emb.wiki("Bob", 'wiki')[:10] 47 | print(p_e_m) 48 | -------------------------------------------------------------------------------- /stats.py: -------------------------------------------------------------------------------- 1 | from utils import compress_attention, create_mapping, BFS, build_graph, is_word 2 | from multiprocessing import Pool 3 | import spacy 4 | import en_core_web_sm 5 | import torch 6 | from transformers import AutoTokenizer, BertModel 7 | 8 | nlp = en_core_web_sm.load() 9 | 10 | if __name__ == '__main__': 11 | import json 12 | from tqdm import tqdm 13 | 14 | target_file = [ 15 | '../../Documents/KGERT-v2/datasets/squad_v1.1/wiki_dev_2020-18.json', 16 | '../../Documents/KGERT-v2/datasets/squad_v1/dev-v1.1.json', 17 | '../../Documents/KGERT-v2/datasets/squad_v1.1/train-v1.1.json', 18 | ] 19 | 20 | with open('stats.txt', 'a') as g: 21 | for target_file in target_file: 22 | with open(target_file, 'r') as f: 23 | dataset = json.load(f) 24 | print(target_file) 25 | 26 | sentence_cnt = 0 27 | word_cnt = 0 28 | for data in tqdm(dataset['data'], dynamic_ncols=True): 29 | for para in data['paragraphs']: 30 | context = para['context'] 31 | doc = nlp(context) 32 | sentence_cnt += len(list(doc.sents)) 33 | word_cnt += len(list(doc)) 34 | 35 | for question in para['qas']: 36 | question = question['question'] 37 | doc = nlp(question) 38 | sentence_cnt += len(list(doc.sents)) 39 | word_cnt += len(list(doc)) 40 | 41 | print('sentence : %d' % sentence_cnt) 42 | print('word : %d' % word_cnt) 43 | 44 | g.write(target_file+'\n') 45 | g.write('sentence : %d\n' % sentence_cnt) 46 | g.write('word : %d\n' % word_cnt) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # language-models-are-knowledge-graphs-pytorch 2 | Language models are open knowledge graphs ( work in progress ) 3 | 4 | A non official reimplementation of [Language models are open knowledge graphs](https://arxiv.org/abs/2010.11967) 5 | 6 | The implemtation of Match is in process.py 7 | 8 | ![example bob dylan](https://raw.githubusercontent.com/theblackcat102/language-models-are-knowledge-graphs-pytorch/main/images/bob_dylan.png) 9 | 10 | ### Execute MAMA(Match and Map) section 11 | 12 | Do note the extracted results is still quite noisy and should then filtered based on relation unique pair frequency 13 | 14 | ``` 15 | python extract.py examples/bob_dylan.txt bert-large-cased-bob_dynlan.jsonl --language_model bert-large-cased --use_cuda true 16 | ``` 17 | 18 | ## Map 19 | 20 | 1. Entity linking 21 | 22 | The original download link for Stanford Entity linking is removed (nlp.stanford.edu/pubs/crosswikis-data.tar.bz2)[nlp.stanford.edu/pubs/crosswikis-data.tar.bz2]. I will use (REL)[https://github.com/informagi/REL] for entity disambiguation model (supervised instead of the original unsupervied) to achieve the same task. 23 | 24 | 2. Relations linking (page 5, 2.2.1) 25 | 26 | Lemmatization is done in the previous steps [process.py](), in this stage we remove inflection, auxiliary verbs, adjectives, adverbs words. 27 | 28 | Adjectives extracted from here: [https://gist.github.com/hugsy/8910dc78d208e40de42deb29e62df913](https://gist.github.com/hugsy/8910dc78d208e40de42deb29e62df913) 29 | 30 | 31 | Adverbs extracted from here : [https://raw.githubusercontent.com/janester/mad_libs/master/List%20of%20Adverbs.txt](https://raw.githubusercontent.com/janester/mad_libs/master/List%20of%20Adverbs.txt) 32 | 33 | 34 | ### Environment setup 35 | 36 | 37 | This repo is run using virtualenv 38 | 39 | ``` 40 | virtualenv -p python3 env 41 | source env/bin/activate 42 | pip install -r requirements.txt 43 | ``` 44 | 45 | -------------------------------------------------------------------------------- /constant.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | found_invalid = [ 4 | 'and', 'of', 'in', 'to', ',', 'for', 'be', 'by', 'with', 'on', 'as', 'that', 'from', 'be', ')', '(', 'which', 5 | 'at', 'be', 'be', 'be', ';', 'or', 'but', 'have', 'have', 'the', 'have', 'not', 'after', '"', 'include', 'also', 6 | 'be', 'into', 'between', 'such', ':', 'do', 'while', 'when', 'during', 'would', 'over', 'since', '2019', 7 | 'well', 'than', '2020', 'under', 'where', 'one', 'be', 'hold', '2018', 'can', 'through', '-', 8 | 'make', 'out', 'there', 'know', 'due', 'a', 'take', 'up', 'begin', 'before', 'about', 9 | "'", '4', '10', '3', '11', '&', '$', '12', '2015', '2008','–', 'will', 10 | 'so', 'do', 'follow', 'most', 'although', 'cause', 'only', '—', '2007', '2014', 'mostly', '5', 'say', '2017', '20', 11 | '2009', 12 | ] 13 | 14 | invalid_relations = [ 15 | 'and', 'but', 'or', 'so', 'because', 'when', 'before', 'although', # conjunction 16 | 'oh', 'wow', 'ouch', 'ah', 'oops', 17 | 'what', 'how', 'where', 'when', 'who', 'whom', 18 | 'a', 'and', 'the', 'there', 19 | 'them', 'he', 'she', 'him', 'her', 'it', # pronoun 20 | 'ten', 'hundred', 'thousand', 'million', 'billion',# unit 21 | 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',# number 22 | 'year', 'month', 'day', 'daily', 23 | ] + found_invalid 24 | 25 | 26 | 27 | 28 | auxiliaries = [ 29 | 'be', 'can', 'have', 'dare', 'may', 'will', 'would', 'should', 30 | 'need', 'ought', 'shall', 'might', 'do', 'does', 'did', 31 | 'be able to', 'had better','have to','need to','ought to','used to', 32 | ] 33 | 34 | with open('corpus/english-adjectives.txt', 'r') as f: 35 | adjectives = [ line.strip().lower() for line in f] 36 | 37 | with open('corpus/adverbs.txt', 'r') as f: 38 | adverbs = [ line.strip().lower() for line in f] 39 | 40 | # with open('corpus/Wordlist-Verbs-All.txt', 'r') as f: 41 | # verbs = [ line.strip().lower() for line in f] 42 | 43 | invalid_relations += adjectives 44 | invalid_relations += adverbs 45 | # invalid_relations += verbs 46 | 47 | invalid_relations_set = set(invalid_relations) 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argon2-cffi==20.1.0 2 | async-generator==1.10 3 | attrs==20.3.0 4 | backcall==0.2.0 5 | bleach==3.2.1 6 | blis==0.4.1 7 | catalogue==1.0.0 8 | certifi==2020.11.8 9 | cffi==1.14.3 10 | chardet==3.0.4 11 | click==7.1.2 12 | cycler==0.10.0 13 | cymem==2.0.4 14 | dataclasses==0.6 15 | decorator==4.4.2 16 | defusedxml==0.6.0 17 | en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.3.1/en_core_web_lg-2.3.1.tar.gz 18 | en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz 19 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz 20 | entrypoints==0.3 21 | filelock==3.0.12 22 | idna==2.10 23 | importlib-metadata==2.0.0 24 | ipykernel==5.3.4 25 | ipython==7.16.1 26 | ipython-genutils==0.2.0 27 | ipywidgets==7.5.1 28 | jedi==0.17.2 29 | Jinja2==2.11.2 30 | joblib==0.17.0 31 | jsonschema==3.2.0 32 | jupyter==1.0.0 33 | jupyter-client==6.1.7 34 | jupyter-console==6.2.0 35 | jupyter-core==4.6.3 36 | jupyterlab-pygments==0.1.2 37 | kiwisolver==1.3.1 38 | MarkupSafe==1.1.1 39 | matplotlib==3.3.3 40 | mistune==0.8.4 41 | murmurhash==1.0.4 42 | nbclient==0.5.1 43 | nbconvert==6.0.7 44 | nbformat==5.0.8 45 | nest-asyncio==1.4.3 46 | nltk==3.5 47 | notebook==6.1.5 48 | numpy==1.19.4 49 | packaging==20.4 50 | pandas==1.1.4 51 | pandocfilters==1.4.3 52 | parso==0.7.1 53 | pexpect==4.8.0 54 | pickleshare==0.7.5 55 | Pillow==8.0.1 56 | plac==1.1.3 57 | preshed==3.0.4 58 | prometheus-client==0.8.0 59 | prompt-toolkit==3.0.8 60 | protobuf==3.13.0 61 | ptyprocess==0.6.0 62 | pycparser==2.20 63 | Pygments==2.7.2 64 | pyparsing==2.4.7 65 | pyrsistent==0.17.3 66 | python-dateutil==2.8.1 67 | pytz==2020.4 68 | pyzmq==19.0.2 69 | qtconsole==4.7.7 70 | QtPy==1.9.0 71 | regex==2020.11.11 72 | requests==2.25.0 73 | sacremoses==0.0.43 74 | scipy==1.5.4 75 | seaborn==0.11.0 76 | Send2Trash==1.5.0 77 | sentencepiece==0.1.90 78 | six==1.15.0 79 | spacy==2.3.2 80 | srsly==1.0.4 81 | terminado==0.9.1 82 | testpath==0.4.4 83 | thinc==7.4.1 84 | tokenizers==0.8.1rc2 85 | torch>=1.5 86 | tornado==6.1 87 | tqdm==4.51.0 88 | traitlets==4.3.3 89 | transformers==3.3.1 90 | typing-extensions==3.7.4.3 91 | urllib3==1.26.1 92 | wasabi==0.8.0 93 | wcwidth==0.2.5 94 | webencodings==0.5.1 95 | widgetsnbextension==3.5.1 96 | zipp==3.4.0 97 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /extract.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | from process import parse_sentence 3 | from mapper import Map, deduplication 4 | from transformers import AutoTokenizer, BertModel, GPT2Model 5 | import argparse 6 | import en_core_web_md 7 | from tqdm import tqdm 8 | import json 9 | 10 | def str2bool(v): 11 | if isinstance(v, bool): 12 | return v 13 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 14 | return True 15 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 16 | return False 17 | else: 18 | raise argparse.ArgumentTypeError('Boolean value expected.') 19 | 20 | parser = argparse.ArgumentParser(description='Process lines of text corpus into knowledgraph') 21 | parser.add_argument('input_filename', type=str, help='text file as input') 22 | parser.add_argument('output_filename', type=str, help='output text file') 23 | parser.add_argument('--language_model',default='bert-base-cased', 24 | choices=[ 'bert-large-uncased', 'bert-large-cased', 'bert-base-uncased', 'bert-base-cased', 'gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl'], 25 | help='which language model to use') 26 | parser.add_argument('--use_cuda', default=True, 27 | type=str2bool, nargs='?', 28 | help="Use cuda?") 29 | parser.add_argument('--include_text_output', default=False, 30 | type=str2bool, nargs='?', 31 | help="Include original sentence in output") 32 | parser.add_argument('--threshold', default=0.003, 33 | type=float, help="Any attention score lower than this is removed") 34 | 35 | args = parser.parse_args() 36 | 37 | use_cuda = args.use_cuda 38 | nlp = en_core_web_md.load() 39 | 40 | '''Create 41 | Tested language model: 42 | 43 | 1. bert-base-cased 44 | 45 | 2. gpt2-medium 46 | 47 | Basically any model that belongs to this family should work 48 | 49 | ''' 50 | 51 | language_model = args.language_model 52 | 53 | 54 | if __name__ == '__main__': 55 | tokenizer = AutoTokenizer.from_pretrained(language_model) 56 | if 'gpt2' in language_model: 57 | encoder = GPT2Model.from_pretrained(language_model) 58 | else: 59 | encoder = BertModel.from_pretrained(language_model) 60 | encoder.eval() 61 | if use_cuda: 62 | encoder = encoder.cuda() 63 | input_filename = args.input_filename 64 | output_filename = args.output_filename 65 | include_sentence = args.include_text_output 66 | 67 | with open(input_filename, 'r') as f, open(output_filename, 'w') as g: 68 | for idx, line in enumerate(tqdm(f)): 69 | sentence = line.strip() 70 | if len(sentence): 71 | valid_triplets = [] 72 | for sent in nlp(sentence).sents: 73 | # Match 74 | for triplets in parse_sentence(sent.text, tokenizer, encoder, nlp, use_cuda=use_cuda): 75 | valid_triplets.append(triplets) 76 | if len(valid_triplets) > 0: 77 | # Map 78 | mapped_triplets = [] 79 | for triplet in valid_triplets: 80 | head = triplet['h'] 81 | tail = triplet['t'] 82 | relations = triplet['r'] 83 | conf = triplet['c'] 84 | if conf < args.threshold: 85 | continue 86 | mapped_triplet = Map(head, relations, tail) 87 | if 'h' in mapped_triplet: 88 | mapped_triplet['c'] = conf 89 | mapped_triplets.append(mapped_triplet) 90 | output = { 'line': idx, 'tri': deduplication(mapped_triplets) } 91 | 92 | if include_sentence: 93 | output['sent'] = sentence 94 | if len(output['tri']) > 0: 95 | g.write(json.dumps( output )+'\n') -------------------------------------------------------------------------------- /corpus/adverbs.txt: -------------------------------------------------------------------------------- 1 | 2 | abnormally 3 | absentmindedly 4 | accidentally 5 | acidly 6 | actually 7 | adventurously 8 | afterwards 9 | almost 10 | always 11 | angrily 12 | annually 13 | anxiously 14 | arrogantly 15 | awkwardly 16 | badly 17 | bashfully 18 | beautifully 19 | bitterly 20 | bleakly 21 | blindly 22 | blissfully 23 | boastfully 24 | boldly 25 | bravely 26 | briefly 27 | brightly 28 | briskly 29 | broadly 30 | busily 31 | calmly 32 | carefully 33 | carelessly 34 | cautiously 35 | certainly 36 | cheerfully 37 | clearly 38 | cleverly 39 | closely 40 | coaxingly 41 | colorfully 42 | commonly 43 | continually 44 | coolly 45 | correctly 46 | courageously 47 | crossly 48 | cruelly 49 | curiously 50 | daily 51 | daintily 52 | dearly 53 | deceivingly 54 | delightfully 55 | deeply 56 | defiantly 57 | deliberately 58 | delightfully 59 | diligently 60 | dimly 61 | doubtfully 62 | dreamily 63 | easily 64 | elegantly 65 | energetically 66 | enormously 67 | enthusiastically 68 | equally 69 | especially 70 | even 71 | evenly 72 | eventually 73 | exactly 74 | excitedly 75 | extremely 76 | fairly 77 | faithfully 78 | famously 79 | far 80 | fast 81 | fatally 82 | ferociously 83 | fervently 84 | fiercely 85 | fondly 86 | foolishly 87 | fortunately 88 | frankly 89 | frantically 90 | freely 91 | frenetically 92 | frightfully 93 | fully 94 | furiously 95 | generally 96 | generously 97 | gently 98 | gladly 99 | gleefully 100 | gracefully 101 | gratefully 102 | greatly 103 | greedily 104 | happily 105 | hastily 106 | healthily 107 | heavily 108 | helpfully 109 | helplessly 110 | highly 111 | honestly 112 | hopelessly 113 | hourly 114 | hungrily 115 | immediately 116 | innocently 117 | inquisitively 118 | instantly 119 | intensely 120 | intently 121 | interestingly 122 | inwardly 123 | irritably 124 | jaggedly 125 | jealously 126 | joshingly 127 | joyfully 128 | joyously 129 | jovially 130 | jubilantly 131 | judgmentally 132 | justly 133 | keenly 134 | kiddingly 135 | kindheartedly 136 | kindly 137 | knavishly 138 | knottily 139 | knowingly 140 | knowledgeably 141 | kookily 142 | lazily 143 | less 144 | lightly 145 | likely 146 | limply 147 | lively 148 | loftily 149 | longingly 150 | loosely 151 | lovingly 152 | loudly 153 | loyally 154 | madly 155 | majestically 156 | meaningfully 157 | mechanically 158 | merrily 159 | miserably 160 | mockingly 161 | monthly 162 | more 163 | mortally 164 | mostly 165 | mysteriously 166 | naturally 167 | nearly 168 | neatly 169 | needily 170 | nervously 171 | never 172 | nicely 173 | noisily 174 | not 175 | obediently 176 | obnoxiously 177 | oddly 178 | offensively 179 | officially 180 | often 181 | only 182 | openly 183 | optimistically 184 | overconfidently 185 | owlishly 186 | painfully 187 | partially 188 | patiently 189 | perfectly 190 | physically 191 | playfully 192 | politely 193 | poorly 194 | positively 195 | potentially 196 | powerfully 197 | promptly 198 | properly 199 | punctually 200 | quaintly 201 | quarrelsomely 202 | queasily 203 | queerly 204 | questionably 205 | questioningly 206 | quicker 207 | quickly 208 | quietly 209 | quirkily 210 | quizzically 211 | rapidly 212 | rarely 213 | readily 214 | really 215 | reassuringly 216 | recklessly 217 | regularly 218 | reluctantly 219 | repeatedly 220 | reproachfully 221 | restfully 222 | righteously 223 | rightfully 224 | rigidly 225 | roughly 226 | rudely 227 | sadly 228 | safely 229 | scarcely 230 | scarily 231 | searchingly 232 | sedately 233 | seemingly 234 | seldom 235 | selfishly 236 | separately 237 | seriously 238 | shakily 239 | sharply 240 | sheepishly 241 | shrilly 242 | shyly 243 | silently 244 | sleepily 245 | slowly 246 | smoothly 247 | softly 248 | solemnly 249 | solidly 250 | sometimes 251 | soon 252 | speedily 253 | stealthily 254 | sternly 255 | strictly 256 | successfully 257 | suddenly 258 | surprisingly 259 | suspiciously 260 | sweetly 261 | swiftly 262 | sympathetically 263 | tenderly 264 | tensely 265 | terribly 266 | thankfully 267 | thoroughly 268 | thoughtfully 269 | tightly 270 | tomorrow 271 | too 272 | tremendously 273 | triumphantly 274 | truly 275 | truthfully 276 | ultimately 277 | unabashedly 278 | unaccountably 279 | unbearably 280 | unethically 281 | unexpectedly 282 | unfortunately 283 | unimpressively 284 | unnaturally 285 | unnecessarily 286 | utterly 287 | upbeat 288 | upliftingly 289 | upright 290 | upside-down 291 | upward 292 | upwardly 293 | urgently 294 | usefully 295 | uselessly 296 | usually 297 | utterly 298 | vacantly 299 | vaguely 300 | vainly 301 | valiantly 302 | vastly 303 | verbally 304 | very 305 | viciously 306 | victoriously 307 | violently 308 | vivaciously 309 | voluntarily 310 | warmly 311 | weakly 312 | wearily 313 | well 314 | wetly 315 | wholly 316 | wildly 317 | willfully 318 | wisely 319 | woefully 320 | wonderfully 321 | worriedly 322 | wrongly 323 | yawningly 324 | yearly 325 | yearningly 326 | yesterday 327 | yieldingly 328 | youthfully 329 | zealously 330 | zestfully 331 | zestily 332 | yet 333 | else 334 | just -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | from utils import compress_attention, create_mapping, BFS, build_graph, is_word 2 | from multiprocessing import Pool 3 | import spacy 4 | import en_core_web_md 5 | import torch 6 | from transformers import AutoTokenizer, BertModel, GPT2Model 7 | from constant import invalid_relations_set 8 | 9 | 10 | def process_matrix(attentions, layer_idx = -1, head_num = 0, avg_head=False, trim=True, use_cuda=True): 11 | if avg_head: 12 | if use_cuda: 13 | attn = torch.mean(attentions[0][layer_idx], 0).cpu() 14 | else: 15 | attn = torch.mean(attentions[0][layer_idx], 0) 16 | attention_matrix = attn.detach().numpy() 17 | else: 18 | attn = attentions[0][layer_idx][head_num] 19 | if use_cuda: 20 | attn = attn.cpu() 21 | attention_matrix = attn.detach().numpy() 22 | 23 | attention_matrix = attention_matrix[1:-1, 1:-1] 24 | 25 | return attention_matrix 26 | 27 | def bfs(args): 28 | s, end, graph, max_size, black_list_relation = args 29 | return BFS(s, end, graph, max_size, black_list_relation) 30 | 31 | 32 | def check_relations_validity(relations): 33 | for rel in relations: 34 | if rel.lower() in invalid_relations_set or rel.isnumeric(): 35 | return False 36 | return True 37 | 38 | def global_initializer(nlp_object): 39 | global spacy_nlp 40 | spacy_nlp = nlp_object 41 | 42 | def filter_relation_sets(params): 43 | triplet, id2token = params 44 | 45 | triplet_idx = triplet[0] 46 | confidence = triplet[1] 47 | head, tail = triplet_idx[0], triplet_idx[-1] 48 | if head in id2token and tail in id2token: 49 | head = id2token[head] 50 | tail = id2token[tail] 51 | relations = [ spacy_nlp(id2token[idx])[0].lemma_ for idx in triplet_idx[1:-1] if idx in id2token ] 52 | if len(relations) > 0 and check_relations_validity(relations) and head.lower() not in invalid_relations_set and tail.lower() not in invalid_relations_set: 53 | return {'h': head, 't': tail, 'r': relations, 'c': confidence } 54 | return {} 55 | 56 | def parse_sentence(sentence, tokenizer, encoder, nlp, use_cuda=True): 57 | '''Implement the match part of MAMA 58 | 59 | ''' 60 | tokenizer_name = str(tokenizer.__str__) 61 | 62 | inputs, tokenid2word_mapping, token2id, noun_chunks = create_mapping(sentence, return_pt=True, nlp=nlp, tokenizer=tokenizer) 63 | 64 | with torch.no_grad(): 65 | if use_cuda: 66 | for key in inputs.keys(): 67 | inputs[key] = inputs[key].cuda() 68 | outputs = encoder(**inputs, output_attentions=True) 69 | trim = True 70 | if 'GPT2' in tokenizer_name: 71 | trim = False 72 | 73 | ''' 74 | Use average of last layer attention : page 6, section 3.1.2 75 | ''' 76 | attention = process_matrix(outputs[2], avg_head=True, trim=trim, use_cuda=use_cuda) 77 | 78 | merged_attention = compress_attention(attention, tokenid2word_mapping) 79 | attn_graph = build_graph(merged_attention) 80 | 81 | tail_head_pairs = [] 82 | for head in noun_chunks: 83 | for tail in noun_chunks: 84 | if head != tail: 85 | tail_head_pairs.append((token2id[head], token2id[tail])) 86 | 87 | black_list_relation = set([ token2id[n] for n in noun_chunks ]) 88 | 89 | all_relation_pairs = [] 90 | id2token = { value: key for key, value in token2id.items()} 91 | 92 | with Pool(10) as pool: 93 | params = [ ( pair[0], pair[1], attn_graph, max(tokenid2word_mapping), black_list_relation, ) for pair in tail_head_pairs] 94 | for output in pool.imap_unordered(bfs, params): 95 | if len(output): 96 | all_relation_pairs += [ (o, id2token) for o in output ] 97 | 98 | triplet_text = [] 99 | with Pool(10, global_initializer, (nlp,)) as pool: 100 | for triplet in pool.imap_unordered(filter_relation_sets, all_relation_pairs): 101 | if len(triplet) > 0: 102 | triplet_text.append(triplet) 103 | return triplet_text 104 | 105 | 106 | if __name__ == "__main__": 107 | import json 108 | from tqdm import tqdm 109 | 110 | nlp = en_core_web_md.load() 111 | selected_model = 'gpt2-medium' 112 | 113 | use_cuda = True 114 | 115 | 116 | tokenizer = AutoTokenizer.from_pretrained(selected_model) 117 | encoder = GPT2Model.from_pretrained(selected_model) 118 | encoder.eval() 119 | if use_cuda: 120 | encoder = encoder.cuda() 121 | 122 | target_file = [ 123 | '../../Documents/KGERT-v2/datasets/squad_v1.1/train-v1.1.json', 124 | # '../../Documents/KGERT-v2/datasets/squad_v1.1/wiki_dev_2020-18.json', 125 | # '../../Documents/KGERT-v2/datasets/squad_v1/dev-v1.1.json', 126 | ] 127 | 128 | output_filename = [ 129 | 'train_v1.1.jsonl', 130 | # 'wiki_2020-18.jsonl', 131 | # 'dev-v1.1.jsonl', 132 | ] 133 | 134 | for target_file, output_filename in zip(target_file, output_filename): 135 | with open(target_file, 'r') as f: 136 | dataset = json.load(f) 137 | 138 | output_filename = selected_model +'_'+ output_filename 139 | 140 | print(target_file, output_filename) 141 | 142 | f = open(output_filename,'w') 143 | for data in tqdm(dataset['data'], dynamic_ncols=True): 144 | for para in data['paragraphs']: 145 | context = para['context'] 146 | for sent in nlp(context).sents: 147 | for output in parse_sentence(sent.text, tokenizer, encoder, nlp, use_cuda=use_cuda): 148 | f.write(json.dumps(output)+'\n') 149 | f.flush() 150 | 151 | for question in para['qas']: 152 | question = question['question'] 153 | for output in parse_sentence(question, tokenizer, encoder, nlp, use_cuda=use_cuda): 154 | f.write(json.dumps(output)+'\n') 155 | f.flush() 156 | f.close() -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import re 4 | 5 | alphabet = re.compile(r'^[a-zA-Z]+$') 6 | 7 | from copy import copy 8 | from collections import defaultdict 9 | 10 | def build_graph(matrix): 11 | graph = defaultdict(list) 12 | 13 | for idx in range(0, len(matrix)): 14 | for col in range(idx+1, len(matrix)): 15 | graph[idx].append((col, matrix[idx][col] )) 16 | return graph 17 | 18 | def BFS(s, end, graph, max_size=-1, black_list_relation=[]): 19 | visited = [False] * (max(graph.keys())+100) 20 | 21 | # Create a queue for BFS 22 | queue = [] 23 | 24 | # Mark the source node as 25 | # visited and enqueue it 26 | queue.append((s, [(s, 0)])) 27 | 28 | found_paths = [] 29 | 30 | visited[s] = True 31 | 32 | while queue: 33 | 34 | s, path = queue.pop(0) 35 | 36 | # Get all adjacent vertices of the 37 | # dequeued vertex s. If a adjacent 38 | # has not been visited, then mark it 39 | # visited and enqueue it 40 | for i, conf in graph[s]: 41 | if i == end: 42 | found_paths.append(path+[(i, conf)]) 43 | break 44 | if visited[i] == False: 45 | queue.append((i, copy(path)+[(i, conf)])) 46 | visited[i] = True 47 | 48 | candidate_facts = [] 49 | for path_pairs in found_paths: 50 | if len(path_pairs) < 3: 51 | continue 52 | path = [] 53 | cum_conf = 0 54 | for (node, conf) in path_pairs: 55 | path.append(node) 56 | cum_conf += conf 57 | 58 | if path[1] in black_list_relation: 59 | continue 60 | 61 | candidate_facts.append((path, cum_conf)) 62 | 63 | candidate_facts = sorted(candidate_facts, key=lambda x: x[1], reverse=True) 64 | return candidate_facts 65 | 66 | def is_word(token): 67 | if len(token) == 1 and alphabet.match(token) == None: 68 | return False 69 | return True 70 | 71 | def create_mapping(sentence, return_pt=False, nlp = None, tokenizer=None): 72 | '''Create a mapping 73 | nlp: spacy model 74 | tokenizer: huggingface tokenizer 75 | ''' 76 | doc = nlp(sentence) 77 | 78 | tokens = list(doc) 79 | 80 | chunk2id = {} 81 | 82 | start_chunk = [] 83 | end_chunk = [] 84 | noun_chunks = [] 85 | for chunk in doc.noun_chunks: 86 | noun_chunks.append(chunk.text) 87 | start_chunk.append(chunk.start) 88 | end_chunk.append(chunk.end) 89 | 90 | sentence_mapping = [] 91 | token2id = {} 92 | mode = 0 # 1 in chunk, 0 not in chunk 93 | chunk_id = 0 94 | for idx, token in enumerate(doc): 95 | if idx in start_chunk: 96 | mode = 1 97 | sentence_mapping.append(noun_chunks[chunk_id]) 98 | token2id[sentence_mapping[-1]] = len(token2id) 99 | chunk_id += 1 100 | elif idx in end_chunk: 101 | mode = 0 102 | 103 | if mode == 0: 104 | sentence_mapping.append(token.text) 105 | token2id[sentence_mapping[-1]] = len(token2id) 106 | 107 | 108 | token_ids = [] 109 | tokenid2word_mapping = [] 110 | 111 | for token in sentence_mapping: 112 | subtoken_ids = tokenizer(str(token), add_special_tokens=False)['input_ids'] 113 | tokenid2word_mapping += [ token2id[token] ]*len(subtoken_ids) 114 | token_ids += subtoken_ids 115 | 116 | tokenizer_name = str(tokenizer.__str__) 117 | if 'GPT2' in tokenizer_name: 118 | outputs = { 119 | 'input_ids': token_ids, 120 | 'attention_mask': [1]*(len(token_ids)), 121 | } 122 | 123 | else: 124 | outputs = { 125 | 'input_ids': [tokenizer.cls_token_id] + token_ids + [tokenizer.sep_token_id], 126 | 'attention_mask': [1]*(len(token_ids)+2), 127 | 'token_type_ids': [0]*(len(token_ids)+2) 128 | } 129 | 130 | if return_pt: 131 | for key, value in outputs.items(): 132 | outputs[key] = torch.from_numpy(np.array(value)).long().unsqueeze(0) 133 | 134 | return outputs, tokenid2word_mapping, token2id, noun_chunks 135 | 136 | def compress_attention(attention, tokenid2word_mapping, operator=np.mean): 137 | 138 | new_index = [] 139 | 140 | prev = -1 141 | for idx, row in enumerate(attention): 142 | token_id = tokenid2word_mapping[idx] 143 | if token_id != prev: 144 | new_index.append( [row]) 145 | prev = token_id 146 | else: 147 | new_index[-1].append(row) 148 | 149 | new_matrix = [] 150 | for row in new_index: 151 | new_matrix.append(operator(np.array(row), 0)) 152 | 153 | new_matrix = np.array(new_matrix) 154 | 155 | attention = np.array(new_matrix).T 156 | 157 | prev = -1 158 | new_index= [] 159 | for idx, row in enumerate(attention): 160 | token_id = tokenid2word_mapping[idx] 161 | if token_id != prev: 162 | new_index.append( [row]) 163 | prev = token_id 164 | else: 165 | new_index[-1].append(row) 166 | 167 | 168 | new_matrix = [] 169 | for row in new_index: 170 | new_matrix.append(operator(np.array(row), 0)) 171 | 172 | new_matrix = np.array(new_matrix) 173 | 174 | return new_matrix.T 175 | 176 | def index2word(tokenid2word_mapping, token2id): 177 | tokens = [] 178 | prev = -1 179 | for token_id in tokenid2word_mapping: 180 | if token_id == prev: 181 | continue 182 | 183 | tokens.append(token2id[token_id]) 184 | prev = token_id 185 | 186 | return tokens 187 | 188 | 189 | 190 | if __name__ == '__main__': 191 | import en_core_web_sm 192 | from transformers import AutoTokenizer, BertModel 193 | tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') 194 | encoder = BertModel.from_pretrained('bert-base-cased') 195 | nlp = en_core_web_sm.load() 196 | 197 | sentence = 'Rolling Stone wrote: “No other pop song has so thoroughly challenged artistic conventions”' 198 | sentence = 'Dylan sing "Time They Are Changing"' 199 | inputs, tokenid2word_mapping, token2id, noun_chunks = create_mapping(sentence, return_pt=True, nlp=nlp, tokenizer=tokenizer) 200 | 201 | outputs = encoder(**inputs, output_attentions=True) 202 | print(noun_chunks, tokenid2word_mapping, token2id) 203 | -------------------------------------------------------------------------------- /corpus/english-adjectives.txt: -------------------------------------------------------------------------------- 1 | abandoned 2 | able 3 | absolute 4 | adorable 5 | adventurous 6 | academic 7 | acceptable 8 | acclaimed 9 | accomplished 10 | accurate 11 | aching 12 | acidic 13 | acrobatic 14 | active 15 | actual 16 | adept 17 | admirable 18 | admired 19 | adolescent 20 | adorable 21 | adored 22 | advanced 23 | afraid 24 | affectionate 25 | aged 26 | aggravating 27 | aggressive 28 | agile 29 | agitated 30 | agonizing 31 | agreeable 32 | ajar 33 | alarmed 34 | alarming 35 | alert 36 | alienated 37 | alive 38 | all 39 | altruistic 40 | amazing 41 | ambitious 42 | ample 43 | amused 44 | amusing 45 | anchored 46 | ancient 47 | angelic 48 | angry 49 | anguished 50 | animated 51 | annual 52 | another 53 | antique 54 | anxious 55 | any 56 | apprehensive 57 | appropriate 58 | apt 59 | arctic 60 | arid 61 | aromatic 62 | artistic 63 | ashamed 64 | assured 65 | astonishing 66 | athletic 67 | attached 68 | attentive 69 | attractive 70 | austere 71 | authentic 72 | authorized 73 | automatic 74 | avaricious 75 | average 76 | aware 77 | awesome 78 | awful 79 | awkward 80 | babyish 81 | bad 82 | back 83 | baggy 84 | bare 85 | barren 86 | basic 87 | beautiful 88 | belated 89 | beloved 90 | beneficial 91 | better 92 | best 93 | bewitched 94 | big 95 | big-hearted 96 | biodegradable 97 | bite-sized 98 | bitter 99 | black 100 | black-and-white 101 | bland 102 | blank 103 | blaring 104 | bleak 105 | blind 106 | blissful 107 | blond 108 | blue 109 | blushing 110 | bogus 111 | boiling 112 | bold 113 | bony 114 | boring 115 | bossy 116 | both 117 | bouncy 118 | bountiful 119 | bowed 120 | brave 121 | breakable 122 | brief 123 | bright 124 | brilliant 125 | brisk 126 | broken 127 | bronze 128 | brown 129 | bruised 130 | bubbly 131 | bulky 132 | bumpy 133 | buoyant 134 | burdensome 135 | burly 136 | bustling 137 | busy 138 | buttery 139 | buzzing 140 | calculating 141 | calm 142 | candid 143 | canine 144 | capital 145 | carefree 146 | careful 147 | careless 148 | caring 149 | cautious 150 | cavernous 151 | celebrated 152 | charming 153 | cheap 154 | cheerful 155 | cheery 156 | chief 157 | chilly 158 | chubby 159 | circular 160 | classic 161 | clean 162 | clear 163 | clear-cut 164 | clever 165 | close 166 | closed 167 | cloudy 168 | clueless 169 | clumsy 170 | cluttered 171 | coarse 172 | cold 173 | colorful 174 | colorless 175 | colossal 176 | comfortable 177 | common 178 | compassionate 179 | competent 180 | complete 181 | complex 182 | complicated 183 | composed 184 | concerned 185 | concrete 186 | confused 187 | conscious 188 | considerate 189 | constant 190 | content 191 | conventional 192 | cooked 193 | cool 194 | cooperative 195 | coordinated 196 | corny 197 | corrupt 198 | costly 199 | courageous 200 | courteous 201 | crafty 202 | crazy 203 | creamy 204 | creative 205 | creepy 206 | criminal 207 | crisp 208 | critical 209 | crooked 210 | crowded 211 | cruel 212 | crushing 213 | cuddly 214 | cultivated 215 | cultured 216 | cumbersome 217 | curly 218 | curvy 219 | cute 220 | cylindrical 221 | damaged 222 | damp 223 | dangerous 224 | dapper 225 | daring 226 | darling 227 | dark 228 | dazzling 229 | dead 230 | deadly 231 | deafening 232 | dear 233 | dearest 234 | decent 235 | decimal 236 | decisive 237 | deep 238 | defenseless 239 | defensive 240 | defiant 241 | deficient 242 | definite 243 | definitive 244 | delayed 245 | delectable 246 | delicious 247 | delightful 248 | delirious 249 | demanding 250 | dense 251 | dental 252 | dependable 253 | dependent 254 | descriptive 255 | deserted 256 | detailed 257 | determined 258 | devoted 259 | different 260 | difficult 261 | digital 262 | diligent 263 | dim 264 | dimpled 265 | dimwitted 266 | direct 267 | disastrous 268 | discrete 269 | disfigured 270 | disgusting 271 | disloyal 272 | dismal 273 | distant 274 | downright 275 | dreary 276 | dirty 277 | disguised 278 | dishonest 279 | dismal 280 | distant 281 | distinct 282 | distorted 283 | dizzy 284 | dopey 285 | doting 286 | double 287 | downright 288 | drab 289 | drafty 290 | dramatic 291 | dreary 292 | droopy 293 | dry 294 | dual 295 | dull 296 | dutiful 297 | each 298 | eager 299 | earnest 300 | early 301 | easy 302 | easy-going 303 | ecstatic 304 | edible 305 | educated 306 | elaborate 307 | elastic 308 | elated 309 | elderly 310 | electric 311 | elegant 312 | elementary 313 | elliptical 314 | embarrassed 315 | embellished 316 | eminent 317 | emotional 318 | empty 319 | enchanted 320 | enchanting 321 | energetic 322 | enlightened 323 | enormous 324 | enraged 325 | entire 326 | envious 327 | equal 328 | equatorial 329 | essential 330 | esteemed 331 | ethical 332 | euphoric 333 | even 334 | evergreen 335 | everlasting 336 | every 337 | evil 338 | exalted 339 | excellent 340 | exemplary 341 | exhausted 342 | excitable 343 | excited 344 | exciting 345 | exotic 346 | expensive 347 | experienced 348 | expert 349 | extraneous 350 | extroverted 351 | extra-large 352 | extra-small 353 | fabulous 354 | failing 355 | faint 356 | fair 357 | faithful 358 | fake 359 | false 360 | familiar 361 | famous 362 | fancy 363 | fantastic 364 | far 365 | faraway 366 | far-flung 367 | far-off 368 | fast 369 | fat 370 | fatal 371 | fatherly 372 | favorable 373 | favorite 374 | fearful 375 | fearless 376 | feisty 377 | feline 378 | female 379 | feminine 380 | few 381 | fickle 382 | filthy 383 | fine 384 | finished 385 | firm 386 | first 387 | firsthand 388 | fitting 389 | fixed 390 | flaky 391 | flamboyant 392 | flashy 393 | flat 394 | flawed 395 | flawless 396 | flickering 397 | flimsy 398 | flippant 399 | flowery 400 | fluffy 401 | fluid 402 | flustered 403 | focused 404 | fond 405 | foolhardy 406 | foolish 407 | forceful 408 | forked 409 | formal 410 | forsaken 411 | forthright 412 | fortunate 413 | fragrant 414 | frail 415 | frank 416 | frayed 417 | free 418 | French 419 | fresh 420 | frequent 421 | friendly 422 | frightened 423 | frightening 424 | frigid 425 | frilly 426 | frizzy 427 | frivolous 428 | front 429 | frosty 430 | frozen 431 | frugal 432 | fruitful 433 | full 434 | fumbling 435 | functional 436 | funny 437 | fussy 438 | fuzzy 439 | gargantuan 440 | gaseous 441 | general 442 | generous 443 | gentle 444 | genuine 445 | giant 446 | giddy 447 | gigantic 448 | gifted 449 | giving 450 | glamorous 451 | glaring 452 | glass 453 | gleaming 454 | gleeful 455 | glistening 456 | glittering 457 | gloomy 458 | glorious 459 | glossy 460 | glum 461 | golden 462 | good 463 | good-natured 464 | gorgeous 465 | graceful 466 | gracious 467 | grand 468 | grandiose 469 | granular 470 | grateful 471 | grave 472 | gray 473 | great 474 | greedy 475 | green 476 | gregarious 477 | grim 478 | grimy 479 | gripping 480 | grizzled 481 | gross 482 | grotesque 483 | grouchy 484 | grounded 485 | growing 486 | growling 487 | grown 488 | grubby 489 | gruesome 490 | grumpy 491 | guilty 492 | gullible 493 | gummy 494 | hairy 495 | half 496 | handmade 497 | handsome 498 | handy 499 | happy 500 | happy-go-lucky 501 | hard 502 | hard-to-find 503 | harmful 504 | harmless 505 | harmonious 506 | harsh 507 | hasty 508 | hateful 509 | haunting 510 | healthy 511 | heartfelt 512 | hearty 513 | heavenly 514 | heavy 515 | hefty 516 | helpful 517 | helpless 518 | hidden 519 | hideous 520 | high 521 | high-level 522 | hilarious 523 | hoarse 524 | hollow 525 | homely 526 | honest 527 | honorable 528 | honored 529 | hopeful 530 | horrible 531 | hospitable 532 | hot 533 | huge 534 | humble 535 | humiliating 536 | humming 537 | humongous 538 | hungry 539 | hurtful 540 | husky 541 | icky 542 | icy 543 | ideal 544 | idealistic 545 | identical 546 | idle 547 | idiotic 548 | idolized 549 | ignorant 550 | ill 551 | illegal 552 | ill-fated 553 | ill-informed 554 | illiterate 555 | illustrious 556 | imaginary 557 | imaginative 558 | immaculate 559 | immaterial 560 | immediate 561 | immense 562 | impassioned 563 | impeccable 564 | impartial 565 | imperfect 566 | imperturbable 567 | impish 568 | impolite 569 | important 570 | impossible 571 | impractical 572 | impressionable 573 | impressive 574 | improbable 575 | impure 576 | inborn 577 | incomparable 578 | incompatible 579 | incomplete 580 | inconsequential 581 | incredible 582 | indelible 583 | inexperienced 584 | indolent 585 | infamous 586 | infantile 587 | infatuated 588 | inferior 589 | infinite 590 | informal 591 | innocent 592 | insecure 593 | insidious 594 | insignificant 595 | insistent 596 | instructive 597 | insubstantial 598 | intelligent 599 | intent 600 | intentional 601 | interesting 602 | internal 603 | international 604 | intrepid 605 | ironclad 606 | irresponsible 607 | irritating 608 | itchy 609 | jaded 610 | jagged 611 | jam-packed 612 | jaunty 613 | jealous 614 | jittery 615 | joint 616 | jolly 617 | jovial 618 | joyful 619 | joyous 620 | jubilant 621 | judicious 622 | juicy 623 | jumbo 624 | junior 625 | jumpy 626 | juvenile 627 | kaleidoscopic 628 | keen 629 | key 630 | kind 631 | kindhearted 632 | kindly 633 | klutzy 634 | knobby 635 | knotty 636 | knowledgeable 637 | knowing 638 | known 639 | kooky 640 | kosher 641 | lame 642 | lanky 643 | large 644 | last 645 | lasting 646 | late 647 | lavish 648 | lawful 649 | lazy 650 | leading 651 | lean 652 | leafy 653 | left 654 | legal 655 | legitimate 656 | light 657 | lighthearted 658 | likable 659 | likely 660 | limited 661 | limp 662 | limping 663 | linear 664 | lined 665 | liquid 666 | little 667 | live 668 | lively 669 | livid 670 | loathsome 671 | lone 672 | lonely 673 | long 674 | long-term 675 | loose 676 | lopsided 677 | lost 678 | loud 679 | lovable 680 | lovely 681 | loving 682 | low 683 | loyal 684 | lucky 685 | lumbering 686 | luminous 687 | lumpy 688 | lustrous 689 | luxurious 690 | mad 691 | made-up 692 | magnificent 693 | majestic 694 | major 695 | male 696 | mammoth 697 | married 698 | marvelous 699 | masculine 700 | massive 701 | mature 702 | meager 703 | mealy 704 | mean 705 | measly 706 | meaty 707 | medical 708 | mediocre 709 | medium 710 | meek 711 | mellow 712 | melodic 713 | memorable 714 | menacing 715 | merry 716 | messy 717 | metallic 718 | mild 719 | milky 720 | mindless 721 | miniature 722 | minor 723 | minty 724 | miserable 725 | miserly 726 | misguided 727 | misty 728 | mixed 729 | modern 730 | modest 731 | moist 732 | monstrous 733 | monthly 734 | monumental 735 | moral 736 | mortified 737 | motherly 738 | motionless 739 | mountainous 740 | muddy 741 | muffled 742 | multicolored 743 | mundane 744 | murky 745 | mushy 746 | musty 747 | muted 748 | mysterious 749 | naive 750 | narrow 751 | nasty 752 | natural 753 | naughty 754 | nautical 755 | near 756 | neat 757 | necessary 758 | needy 759 | negative 760 | neglected 761 | negligible 762 | neighboring 763 | nervous 764 | new 765 | next 766 | nice 767 | nifty 768 | nimble 769 | nippy 770 | nocturnal 771 | noisy 772 | nonstop 773 | normal 774 | notable 775 | noted 776 | noteworthy 777 | novel 778 | noxious 779 | numb 780 | nutritious 781 | nutty 782 | obedient 783 | obese 784 | oblong 785 | oily 786 | oblong 787 | obvious 788 | occasional 789 | odd 790 | oddball 791 | offbeat 792 | offensive 793 | official 794 | old 795 | old-fashioned 796 | only 797 | open 798 | optimal 799 | optimistic 800 | opulent 801 | orange 802 | orderly 803 | organic 804 | ornate 805 | ornery 806 | ordinary 807 | original 808 | other 809 | our 810 | outlying 811 | outgoing 812 | outlandish 813 | outrageous 814 | outstanding 815 | oval 816 | overcooked 817 | overdue 818 | overjoyed 819 | overlooked 820 | palatable 821 | pale 822 | paltry 823 | parallel 824 | parched 825 | partial 826 | passionate 827 | past 828 | pastel 829 | peaceful 830 | peppery 831 | perfect 832 | perfumed 833 | periodic 834 | perky 835 | personal 836 | pertinent 837 | pesky 838 | pessimistic 839 | petty 840 | phony 841 | physical 842 | piercing 843 | pink 844 | pitiful 845 | plain 846 | plaintive 847 | plastic 848 | playful 849 | pleasant 850 | pleased 851 | pleasing 852 | plump 853 | plush 854 | polished 855 | polite 856 | political 857 | pointed 858 | pointless 859 | poised 860 | poor 861 | popular 862 | portly 863 | posh 864 | positive 865 | possible 866 | potable 867 | powerful 868 | powerless 869 | practical 870 | precious 871 | present 872 | prestigious 873 | pretty 874 | precious 875 | previous 876 | pricey 877 | prickly 878 | primary 879 | prime 880 | pristine 881 | private 882 | prize 883 | probable 884 | productive 885 | profitable 886 | profuse 887 | proper 888 | proud 889 | prudent 890 | punctual 891 | pungent 892 | puny 893 | pure 894 | purple 895 | pushy 896 | putrid 897 | puzzled 898 | puzzling 899 | quaint 900 | qualified 901 | quarrelsome 902 | quarterly 903 | queasy 904 | querulous 905 | questionable 906 | quick 907 | quick-witted 908 | quiet 909 | quintessential 910 | quirky 911 | quixotic 912 | quizzical 913 | radiant 914 | ragged 915 | rapid 916 | rare 917 | rash 918 | raw 919 | recent 920 | reckless 921 | rectangular 922 | ready 923 | real 924 | realistic 925 | reasonable 926 | red 927 | reflecting 928 | regal 929 | regular 930 | reliable 931 | relieved 932 | remarkable 933 | remorseful 934 | remote 935 | repentant 936 | required 937 | respectful 938 | responsible 939 | repulsive 940 | revolving 941 | rewarding 942 | rich 943 | rigid 944 | right 945 | ringed 946 | ripe 947 | roasted 948 | robust 949 | rosy 950 | rotating 951 | rotten 952 | rough 953 | round 954 | rowdy 955 | royal 956 | rubbery 957 | rundown 958 | ruddy 959 | rude 960 | runny 961 | rural 962 | rusty 963 | sad 964 | safe 965 | salty 966 | same 967 | sandy 968 | sane 969 | sarcastic 970 | sardonic 971 | satisfied 972 | scaly 973 | scarce 974 | scared 975 | scary 976 | scented 977 | scholarly 978 | scientific 979 | scornful 980 | scratchy 981 | scrawny 982 | second 983 | secondary 984 | second-hand 985 | secret 986 | self-assured 987 | self-reliant 988 | selfish 989 | sentimental 990 | separate 991 | serene 992 | serious 993 | serpentine 994 | several 995 | severe 996 | shabby 997 | shadowy 998 | shady 999 | shallow 1000 | shameful 1001 | shameless 1002 | sharp 1003 | shimmering 1004 | shiny 1005 | shocked 1006 | shocking 1007 | shoddy 1008 | short 1009 | short-term 1010 | showy 1011 | shrill 1012 | shy 1013 | sick 1014 | silent 1015 | silky 1016 | silly 1017 | silver 1018 | similar 1019 | simple 1020 | simplistic 1021 | sinful 1022 | single 1023 | sizzling 1024 | skeletal 1025 | skinny 1026 | sleepy 1027 | slight 1028 | slim 1029 | slimy 1030 | slippery 1031 | slow 1032 | slushy 1033 | small 1034 | smart 1035 | smoggy 1036 | smooth 1037 | smug 1038 | snappy 1039 | snarling 1040 | sneaky 1041 | sniveling 1042 | snoopy 1043 | sociable 1044 | soft 1045 | soggy 1046 | solid 1047 | somber 1048 | some 1049 | spherical 1050 | sophisticated 1051 | sore 1052 | sorrowful 1053 | soulful 1054 | soupy 1055 | sour 1056 | Spanish 1057 | sparkling 1058 | sparse 1059 | specific 1060 | spectacular 1061 | speedy 1062 | spicy 1063 | spiffy 1064 | spirited 1065 | spiteful 1066 | splendid 1067 | spotless 1068 | spotted 1069 | spry 1070 | square 1071 | squeaky 1072 | squiggly 1073 | stable 1074 | staid 1075 | stained 1076 | stale 1077 | standard 1078 | starchy 1079 | stark 1080 | starry 1081 | steep 1082 | sticky 1083 | stiff 1084 | stimulating 1085 | stingy 1086 | stormy 1087 | straight 1088 | strange 1089 | steel 1090 | strict 1091 | strident 1092 | striking 1093 | striped 1094 | strong 1095 | studious 1096 | stunning 1097 | stupendous 1098 | stupid 1099 | sturdy 1100 | stylish 1101 | subdued 1102 | submissive 1103 | substantial 1104 | subtle 1105 | suburban 1106 | sudden 1107 | sugary 1108 | sunny 1109 | super 1110 | superb 1111 | superficial 1112 | superior 1113 | supportive 1114 | sure-footed 1115 | surprised 1116 | suspicious 1117 | svelte 1118 | sweaty 1119 | sweet 1120 | sweltering 1121 | swift 1122 | sympathetic 1123 | tall 1124 | talkative 1125 | tame 1126 | tan 1127 | tangible 1128 | tart 1129 | tasty 1130 | tattered 1131 | taut 1132 | tedious 1133 | teeming 1134 | tempting 1135 | tender 1136 | tense 1137 | tepid 1138 | terrible 1139 | terrific 1140 | testy 1141 | thankful 1142 | that 1143 | these 1144 | thick 1145 | thin 1146 | third 1147 | thirsty 1148 | this 1149 | thorough 1150 | thorny 1151 | those 1152 | thoughtful 1153 | threadbare 1154 | thrifty 1155 | thunderous 1156 | tidy 1157 | tight 1158 | timely 1159 | tinted 1160 | tiny 1161 | tired 1162 | torn 1163 | total 1164 | tough 1165 | traumatic 1166 | treasured 1167 | tremendous 1168 | tragic 1169 | trained 1170 | tremendous 1171 | triangular 1172 | tricky 1173 | trifling 1174 | trim 1175 | trivial 1176 | troubled 1177 | true 1178 | trusting 1179 | trustworthy 1180 | trusty 1181 | truthful 1182 | tubby 1183 | turbulent 1184 | twin 1185 | ugly 1186 | ultimate 1187 | unacceptable 1188 | unaware 1189 | uncomfortable 1190 | uncommon 1191 | unconscious 1192 | understated 1193 | unequaled 1194 | uneven 1195 | unfinished 1196 | unfit 1197 | unfolded 1198 | unfortunate 1199 | unhappy 1200 | unhealthy 1201 | uniform 1202 | unimportant 1203 | unique 1204 | united 1205 | unkempt 1206 | unknown 1207 | unlawful 1208 | unlined 1209 | unlucky 1210 | unnatural 1211 | unpleasant 1212 | unrealistic 1213 | unripe 1214 | unruly 1215 | unselfish 1216 | unsightly 1217 | unsteady 1218 | unsung 1219 | untidy 1220 | untimely 1221 | untried 1222 | untrue 1223 | unused 1224 | unusual 1225 | unwelcome 1226 | unwieldy 1227 | unwilling 1228 | unwitting 1229 | unwritten 1230 | upbeat 1231 | upright 1232 | upset 1233 | urban 1234 | usable 1235 | used 1236 | useful 1237 | useless 1238 | utilized 1239 | utter 1240 | vacant 1241 | vague 1242 | vain 1243 | valid 1244 | valuable 1245 | vapid 1246 | variable 1247 | vast 1248 | velvety 1249 | venerated 1250 | vengeful 1251 | verifiable 1252 | vibrant 1253 | vicious 1254 | victorious 1255 | vigilant 1256 | vigorous 1257 | villainous 1258 | violet 1259 | violent 1260 | virtual 1261 | virtuous 1262 | visible 1263 | vital 1264 | vivacious 1265 | vivid 1266 | voluminous 1267 | wan 1268 | warlike 1269 | warm 1270 | warmhearted 1271 | warped 1272 | wary 1273 | wasteful 1274 | watchful 1275 | waterlogged 1276 | watery 1277 | wavy 1278 | wealthy 1279 | weak 1280 | weary 1281 | webbed 1282 | wee 1283 | weekly 1284 | weepy 1285 | weighty 1286 | weird 1287 | welcome 1288 | well-documented 1289 | well-groomed 1290 | well-informed 1291 | well-lit 1292 | well-made 1293 | well-off 1294 | well-to-do 1295 | well-worn 1296 | wet 1297 | which 1298 | whimsical 1299 | whirlwind 1300 | whispered 1301 | white 1302 | whole 1303 | whopping 1304 | wicked 1305 | wide 1306 | wide-eyed 1307 | wiggly 1308 | wild 1309 | willing 1310 | wilted 1311 | winding 1312 | windy 1313 | winged 1314 | wiry 1315 | wise 1316 | witty 1317 | wobbly 1318 | woeful 1319 | wonderful 1320 | wooden 1321 | woozy 1322 | wordy 1323 | worldly 1324 | worn 1325 | worried 1326 | worrisome 1327 | worse 1328 | worst 1329 | worthless 1330 | worthwhile 1331 | worthy 1332 | wrathful 1333 | wretched 1334 | writhing 1335 | wrong 1336 | wry 1337 | yawning 1338 | yearly 1339 | yellow 1340 | yellowish 1341 | young 1342 | youthful 1343 | yummy 1344 | zany 1345 | zealous 1346 | zesty 1347 | zigzag 1348 | --------------------------------------------------------------------------------