├── README.md └── Subject_Verb_Object_Extractor.py /README.md: -------------------------------------------------------------------------------- 1 | # Extract-SVO 2 | A python script to extract subject-predicate-object (SVO) triplets from English sentences using Stanford Parser according to the following paper "Rusu, Delia, et al. "Triplet extraction from sentences." Proceedings of the 10th International Multiconference" Information Society-IS. 2007." 3 | You can download the paper from the following link https://www.researchgate.net/profile/Bla_Fortuna/publication/228905420_Triplet_extraction_from_sentences/links/0912f50866261e6a54000000.pdf 4 | 5 | Stanford Parser is a natural language parser developed by Dan Klein and Christopher D. Manning from The Stanford Natural Language Processing Group. you can download the package from https://stanfordnlp.github.io/CoreNLP/download.html 6 | 7 | Stanford Parser generates a Treebank parse tree for the input sentence.An example of a parse tree for the sentence “A rare black squirrel has become a regular visitor to a suburban garden”. The triplet extracted out of this sentence is squirrel – become – visitor. 8 | -------------------------------------------------------------------------------- /Subject_Verb_Object_Extractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | #Set standford parser and models in your environment variables. 3 | os.environ['STANFORD_PARSER'] = 'stanford-parser' 4 | os.environ['STANFORD_MODELS'] = 'stanford-parser' 5 | from nltk.parse.stanford import StanfordParser 6 | from nltk.tree import ParentedTree, Tree 7 | 8 | parser = StanfordParser() 9 | 10 | def find_subject(t): 11 | for s in t.subtrees(lambda t: t.label() == 'NP'): 12 | for n in s.subtrees(lambda n: n.label().startswith('NN')): 13 | return (n[0], find_attrs(n)) 14 | 15 | def find_predicate(t): 16 | v = None 17 | 18 | for s in t.subtrees(lambda t: t.label() == 'VP'): 19 | for n in s.subtrees(lambda n: n.label().startswith('VB')): 20 | v = n 21 | return (v[0], find_attrs(v)) 22 | 23 | def find_object(t): 24 | for s in t.subtrees(lambda t: t.label() == 'VP'): 25 | for n in s.subtrees(lambda n: n.label() in ['NP', 'PP', 'ADJP']): 26 | if n.label() in ['NP', 'PP']: 27 | for c in n.subtrees(lambda c: c.label().startswith('NN')): 28 | return (c[0], find_attrs(c)) 29 | else: 30 | for c in n.subtrees(lambda c: c.label().startswith('JJ')): 31 | return (c[0], find_attrs(c)) 32 | 33 | def find_attrs(node): 34 | attrs = [] 35 | p = node.parent() 36 | 37 | # Search siblings 38 | if node.label().startswith('JJ'): 39 | for s in p: 40 | if s.label() == 'RB': 41 | attrs.append(s[0]) 42 | 43 | elif node.label().startswith('NN'): 44 | for s in p: 45 | if s.label() in ['DT','PRP$','POS','JJ','CD','ADJP','QP','NP']: 46 | attrs.append(' '.join(s.flatten())) 47 | 48 | elif node.label().startswith('VB'): 49 | for s in p: 50 | if s.label() == 'ADVP': 51 | attrs.append(' '.join(s.flatten())) 52 | 53 | # Search uncles 54 | if node.label().startswith('JJ') or node.label().startswith('NN'): 55 | for s in p.parent(): 56 | if s != p and s.label() == 'PP': 57 | attrs.append(' '.join(s.flatten())) 58 | 59 | elif node.label().startswith('VB'): 60 | for s in p.parent(): 61 | if s != p and s.label().startswith('VB'): 62 | attrs.append(s[0]) 63 | 64 | return attrs 65 | 66 | def main(sentence): 67 | print find_subject(sentence) 68 | print find_predicate(sentence) 69 | print find_object(sentence) 70 | 71 | if __name__=="__main__" : 72 | import sys 73 | # Parse the example sentence 74 | sent = 'A rare black squirrel has become a regular visitor to a suburban garden' 75 | main(sent) 76 | --------------------------------------------------------------------------------