├── LICENSE
├── README.md
├── best_syn.py
├── best_syn.pyc
├── example.py
├── requirements.txt
├── text_rewrite.py
└── text_rewrite.pyc
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Thiago Cassimiro
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Text-Rewrite-NLP
2 | This lib uses two Natural Language Processing (Spacy & NLTK) and a online word-finding query engine for developers called datamuse as base to rewrite texts.
3 |
4 | ## First step, install python dependencies
5 |
pip install -r requirements.txt
6 |
7 | ## Second step, install spacy en support
8 | python -m spacy download en
9 |
10 | ## Third step, install NLTK corpora
11 | Run this code in any python file or python terminal
12 | import nltk
nltk.download()
13 | After that select `all-corpora` and download it
14 |
15 |
16 | ## Last step, Enjoy :)
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/best_syn.py:
--------------------------------------------------------------------------------
1 |
2 | __author__ = 'woolz'
3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP'
4 |
5 | from nltk.corpus import wordnet
6 | import spacy
7 | import urllib
8 | import json
9 |
10 | nlp = spacy.load('en')
11 |
12 | class BestSyn:
13 |
14 | def get_datamuse_syn_list(self):
15 | url = "https://api.datamuse.com/words?ml=" + self.word
16 | response = urllib.urlopen(url)
17 | data = response.read().decode("utf-8")
18 | json_data = json.loads(data)
19 | word_list = []
20 | for x in json_data:
21 | word_list.append(x['word'])
22 | return word_list
23 |
24 | def __init__(self, word):
25 | self.word = word
26 | self.best_score = 0.0
27 | self.best_choice = ""
28 |
29 |
30 | def pull(self):
31 | words_list = self.get_datamuse_syn_list()
32 | for syn_word in words_list:
33 | use_nltk = True
34 | try:
35 | nltk_raw_word = wordnet.synsets(self.word)[0]
36 | nltk_syn_word = wordnet.synsets(syn_word)[0]
37 | except:
38 | use_nltk = False
39 |
40 | spacy_raw_word = nlp(unicode(self.word.lower()))
41 | spacy_syn_word = nlp(unicode(syn_word.lower()))
42 |
43 |
44 | spacy_score = spacy_raw_word.similarity(spacy_syn_word)
45 |
46 | if (use_nltk == True):
47 | nltk_score = nltk_syn_word.wup_similarity(nltk_raw_word)
48 | if (nltk_score == None):
49 | nltk_score = 0
50 | score = (nltk_score+spacy_score)/2
51 | else:
52 | score = spacy_score
53 |
54 |
55 | if (score > self.best_score):
56 | self.best_score = score
57 | self.best_choice = syn_word
58 | result = [self.best_score, self.best_choice]
59 | return result
60 |
61 | def __del__(self):
62 | self.word = False
63 | self.best_score = False
64 | self.best_choice = False
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/best_syn.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/best_syn.pyc
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from text_rewrite import TextRewrite
2 |
3 |
4 | sentences = ['My machine is so bad and dramatic', 'I have one dog and two cars', 'This season is so weak.', 'my home is so sucky']
5 | for sentence in sentences:
6 | new_sentence = TextRewrite(sentence).work()
7 | print(sentence + " -> " + new_sentence)
8 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk==3.3
2 | spacy==2.0.12
3 |
--------------------------------------------------------------------------------
/text_rewrite.py:
--------------------------------------------------------------------------------
1 |
2 | __author__ = 'woolz'
3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP'
4 |
5 | from spacy.tokenizer import Tokenizer
6 | from spacy.lang.en.examples import sentences
7 | from best_syn import *
8 |
9 |
10 | class TextRewrite:
11 |
12 |
13 | def __init__(self, sentence):
14 | self.sentence = sentence
15 |
16 | def work(self):
17 | """
18 | @var rewrite_types: Type of words that can rewrited
19 | """
20 | rewrite_types = [u'NN', u'NNS', u'JJ', u'JJS']
21 | pos_tokenizer = nlp(unicode(self.sentence))
22 | words = []
23 | for token in pos_tokenizer:
24 | #print(token.pos_, token.text, token.tag_)
25 | if token.tag_ in rewrite_types:
26 | words.append(token.text)
27 | rewrited_sentence = self.sentence
28 | for word in words:
29 | word_syn = BestSyn(word).pull()[1]
30 | rewrited_sentence = rewrited_sentence.replace(word, word_syn)
31 | return rewrited_sentence
32 |
33 | def __del__(self):
34 | self.sentence = False
35 |
36 |
--------------------------------------------------------------------------------
/text_rewrite.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/text_rewrite.pyc
--------------------------------------------------------------------------------