├── .gitignore ├── .idea ├── other.xml └── testrunner.xml ├── .python-version ├── README.md ├── abstract_record_loader.py ├── sense_loader.py ├── synlink_loader.py ├── synset_loader.py ├── wn.py ├── word_loader.py └── wordnet_python.iml /.gitignore: -------------------------------------------------------------------------------- 1 | # Intellij 2 | .idea/ 3 | .idea/~ 4 | .idea/* 5 | *.iml 6 | *.iws 7 | .idea/workspace.xml 8 | .idea/other.xml 9 | .idea/testrunner.xml 10 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/testrunner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.3.1 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | wordnet_python 2 | ============== 3 | 4 | [日本語WordNet](http://nlpwww.nict.go.jp/wn-ja/)をPythonで扱うためのラッパー。 5 | 6 | [yanbeさんのコード](https://gist.github.com/yanbe/79057)をPython3.3で動くようにして、ついでにオブジェクト指向的に読みやすくしたもの。 7 | 8 | [yanbeさんのブログ記事](http://subtech.g.hatena.ne.jp/y_yanbe/20090314/p2)に詳細が載っている。 9 | 10 | Python3.3.1で動作確認。Python2.7.4では動かない。 11 | 12 | ## 使い方 13 | 14 | SQLiteのファイル名をabstract_record_loader.pyに書き込んで(初期設定ではディレクトリ構造1つ上のwnjpn.db)、次のようにターミナル上で実行する。 15 | 16 | ``` 17 | $ python wn.py 夢 hype 18 | ``` 19 | 20 | こうすると、「夢」の上位語を以下のように出力してくれる。 21 | 22 | ``` 23 | 夢 dreaming 24 | イマジネイション imagery 25 | 夢 aspiration 26 | 渇き desire 27 | 心緒 feeling 28 | 動静 state 29 | 属性 attribute 30 | 抽象的実体 abstract_entity 31 | 夢 dream 32 | 夢 want 33 | 渇き desire 34 | 心緒 feeling 35 | 動静 state 36 | 属性 attribute 37 | 抽象的実体 abstract_entity 38 | 夢 pipe_dream 39 | ファンタジー fantasy 40 | イマジネイション vision 41 | 創造性 creativity 42 | 才幹 ability 43 | 了知 knowledge 44 | ``` 45 | 46 | 47 | ## DB接続設定 48 | 49 | SQLiteデータベースへの接続は、DBのファイル名 wnjpn.db を abstract_record_loader.pyに以下のように書き込む。 50 | 51 | 52 | ```python 53 | import sqlite3 54 | 55 | 56 | class AbstractRecordLoader(object): 57 | def __init__(self): 58 | self.conn = sqlite3.connect('wnjpn.db') 59 | ``` -------------------------------------------------------------------------------- /abstract_record_loader.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | 4 | class AbstractRecordLoader(object): 5 | def __init__(self): 6 | self.conn = sqlite3.connect('../wnjpn.db') 7 | 8 | def __enter__(self): 9 | return self 10 | 11 | def __del__(self): 12 | self.conn.close() 13 | 14 | def __exit__(self, exc_type, exc_val, exc_tb): 15 | self.conn.close() 16 | -------------------------------------------------------------------------------- /sense_loader.py: -------------------------------------------------------------------------------- 1 | from abstract_record_loader import AbstractRecordLoader 2 | from collections import namedtuple 3 | 4 | 5 | class SenseLoader(AbstractRecordLoader): 6 | def __init__(self): 7 | super().__init__() 8 | self.sense = namedtuple('Sense', 'synset wordid lang rank lexid freq src') 9 | 10 | def load_senses_with_synset(self, word): 11 | cur = self.conn.execute("select * from sense where wordid=?", (word.wordid,)) 12 | return [self.sense(*row) for row in cur] 13 | 14 | def load_sense_with_synset(self, synset, lang='jpn'): 15 | cur = self.conn.execute("select * from sense where synset=? and lang=?", 16 | (synset, lang)) 17 | row = cur.fetchone() 18 | return row and self.sense(*row) or None 19 | -------------------------------------------------------------------------------- /synlink_loader.py: -------------------------------------------------------------------------------- 1 | from abstract_record_loader import AbstractRecordLoader 2 | from collections import namedtuple 3 | 4 | 5 | class SynlinkLoader(AbstractRecordLoader): 6 | def __init__(self): 7 | super().__init__() 8 | self.synlink = namedtuple('SynLink', 'synset1 synset2 link src') 9 | 10 | def load_synlinks_with_sense_and_link(self, sense, link): 11 | cur = self.conn.execute("select * from synlink where synset1=? and link=?", 12 | (sense.synset, link)) 13 | return [self.synlink(*row) for row in cur] 14 | 15 | -------------------------------------------------------------------------------- /synset_loader.py: -------------------------------------------------------------------------------- 1 | from abstract_record_loader import AbstractRecordLoader 2 | from collections import namedtuple 3 | 4 | 5 | class SynsetLoader(AbstractRecordLoader): 6 | def __init__(self): 7 | super().__init__() 8 | self.synset = namedtuple('Synset', 'synset pos name src') 9 | 10 | def load_synset_with_synset(self, synset): 11 | cur = self.conn.execute("select * from synset where synset=?", (synset,)) 12 | return self.synset(*cur.fetchone()) 13 | -------------------------------------------------------------------------------- /wn.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # For Python3.3 or higher. 3 | # You cannot use it with Python 2.7 or lower because of super() function. 4 | import sys 5 | from sense_loader import SenseLoader 6 | from word_loader import WordLoader 7 | from synset_loader import SynsetLoader 8 | from synlink_loader import SynlinkLoader 9 | 10 | 11 | def print_synlinks_recursively(senses, link, lang='jpn', _depth=0): 12 | for sense in senses: 13 | with SynlinkLoader() as synlink_loader: 14 | synlinks = synlink_loader.load_synlinks_with_sense_and_link(sense, link) 15 | if synlinks: 16 | with WordLoader() as word_loader: 17 | with SynsetLoader() as synset_loader: 18 | print(''.join([ 19 | ' ' * 2 * _depth, 20 | word_loader.load_word_with_wordid(sense.wordid).lemma, 21 | ' ', 22 | synset_loader.load_synset_with_synset(sense.synset).name])) 23 | _senses = [] 24 | for synLink in synlinks: 25 | with SenseLoader() as sense_loader: 26 | sense = sense_loader.load_sense_with_synset(synLink.synset2, lang) 27 | if sense: 28 | _senses.append(sense) 29 | 30 | print_synlinks_recursively(_senses, link, lang, _depth + 1) 31 | 32 | 33 | if __name__ == '__main__': 34 | if len(sys.argv) >= 3: 35 | with WordLoader() as word_loader: 36 | words = word_loader.load_words_with_lemma(sys.argv[1]) 37 | if words: 38 | with SenseLoader() as sense_loader: 39 | senses = sense_loader.load_senses_with_synset(words[0]) 40 | 41 | if len(sys.argv) >= 3: 42 | link = sys.argv[2] 43 | else: 44 | link = 'hypo' 45 | 46 | if len(sys.argv) == 4: 47 | lang = sys.argv[3] 48 | else: 49 | lang = 'jpn' 50 | 51 | print_synlinks_recursively(senses, link, lang) 52 | sys.exit() 53 | 54 | print("(nothing found)", file=sys.stderr) 55 | sys.exit() 56 | 57 | # 引数の書き方を間違えたときなどにはhelpを表示 58 | print("""usage: wn.py word link [lang] 59 | word 60 | word to investigate 61 | 62 | link 63 | syns - Synonyms 64 | hype - Hypernyms 65 | inst - Instances 66 | hypo - Hyponym 67 | hasi - Has Instance 68 | mero - Meronyms 69 | mmem - Meronyms --- Member 70 | msub - Meronyms --- Substance 71 | mprt - Meronyms --- Part 72 | holo - Holonyms 73 | hmem - Holonyms --- Member 74 | hsub - Holonyms --- Substance 75 | hprt - Holonyms -- Part 76 | attr - Attributes 77 | sim - Similar to 78 | entag - Entails 79 | causg - Causes 80 | dmncg - Domain --- Category 81 | dmnug - Domain --- Usage 82 | dmnrg - Domain --- Region 83 | dmtcg - In Domain --- Category 84 | dmtug - In Domain --- Usage 85 | dmtrg - In Domain --- Region 86 | antsg - Antonyms 87 | 88 | lang (default: jpn) 89 | jpn - Japanese 90 | eng - English 91 | 92 | 例(example) 93 | python wn.py 夢 hype 94 | """) 95 | -------------------------------------------------------------------------------- /word_loader.py: -------------------------------------------------------------------------------- 1 | from abstract_record_loader import AbstractRecordLoader 2 | from collections import namedtuple 3 | 4 | 5 | class WordLoader(AbstractRecordLoader): 6 | def __init__(self): 7 | super().__init__() 8 | self.word = namedtuple('Word', 'wordid lang lemma pron pos') 9 | 10 | def load_words_with_lemma(self, lemma): 11 | cur = self.conn.execute("select * from word where lemma=?", (lemma,)) 12 | return [self.word(*row) for row in cur] 13 | 14 | def load_word_with_wordid(self, wordid): 15 | cur = self.conn.execute("select * from word where wordid=?", (wordid,)) 16 | return self.word(*cur.fetchone()) 17 | -------------------------------------------------------------------------------- /wordnet_python.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | --------------------------------------------------------------------------------