├── LICENSE
├── README.md
├── best_syn.py
├── best_syn.pyc
├── example.py
├── requirements.txt
├── text_rewrite.py
└── text_rewrite.pyc


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Thiago Cassimiro
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Text-Rewrite-NLP
 2 | This lib uses two Natural Language Processing (<a href="https://spacy.io">Spacy</a> &amp; <a href="https://www.nltk.org">NLTK</a>) and a online word-finding query engine for developers called <a href="https://www.datamuse.com/api">datamuse</a> as base to rewrite texts.
 3 | 
 4 | ## First step, install python dependencies
 5 | <pre>pip install -r requirements.txt</pre>
 6 | 
 7 | ## Second step, install spacy en support
 8 | <pre>python -m spacy download en</pre>
 9 | 
10 | ## Third step, install NLTK corpora
11 | Run this code in any python file or python terminal
12 | <pre>import nltk<br>nltk.download()</pre>
13 | After that select `all-corpora` and download it
14 | 
15 | 
16 | ## Last step, Enjoy :)
17 | <img alt="Example" src="https://i.imgur.com/kwxYtAN.png">
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/best_syn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __author__ = 'woolz'
 3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP'
 4 | 
 5 | from nltk.corpus import wordnet
 6 | import spacy
 7 | import urllib
 8 | import json
 9 | 
10 | nlp = spacy.load('en')
11 | 
12 | class BestSyn:
13 | 
14 |     def get_datamuse_syn_list(self):
15 |         url = "https://api.datamuse.com/words?ml=" + self.word
16 |         response = urllib.urlopen(url)
17 |         data = response.read().decode("utf-8")
18 |         json_data = json.loads(data)
19 |         word_list = []
20 |         for x in json_data:
21 |             word_list.append(x['word'])
22 |         return word_list
23 | 
24 |     def __init__(self, word):
25 |         self.word = word
26 |         self.best_score = 0.0
27 |         self.best_choice = ""
28 | 
29 | 
30 |     def pull(self):
31 |         words_list = self.get_datamuse_syn_list()
32 |         for syn_word in words_list:
33 |             use_nltk = True
34 |             try:
35 |                 nltk_raw_word = wordnet.synsets(self.word)[0]
36 |                 nltk_syn_word = wordnet.synsets(syn_word)[0]
37 |             except:
38 |                 use_nltk = False
39 |             
40 |             spacy_raw_word = nlp(unicode(self.word.lower()))
41 |             spacy_syn_word = nlp(unicode(syn_word.lower()))
42 | 
43 |             
44 |             spacy_score = spacy_raw_word.similarity(spacy_syn_word)
45 |             
46 |             if (use_nltk == True):
47 |                 nltk_score = nltk_syn_word.wup_similarity(nltk_raw_word)
48 |                 if (nltk_score == None):
49 |                     nltk_score = 0
50 |                 score = (nltk_score+spacy_score)/2
51 |             else:
52 |                 score = spacy_score
53 | 
54 |                 
55 |             if (score > self.best_score):
56 |                 self.best_score = score
57 |                 self.best_choice = syn_word
58 |         result = [self.best_score, self.best_choice]
59 |         return result
60 | 
61 |     def __del__(self):
62 |         self.word = False
63 |         self.best_score = False
64 |         self.best_choice = False
65 | 
66 | 
67 | 
68 |             
69 |         
70 | 
71 | 


--------------------------------------------------------------------------------
/best_syn.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/best_syn.pyc


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from text_rewrite import TextRewrite
2 | 
3 | 
4 | sentences = ['My machine is so bad and dramatic', 'I have one dog and two cars', 'This season is so weak.', 'my home is so sucky']
5 | for sentence in sentences:
6 |     new_sentence = TextRewrite(sentence).work() 
7 |     print(sentence + " -> " + new_sentence)
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk==3.3
2 | spacy==2.0.12
3 | 


--------------------------------------------------------------------------------
/text_rewrite.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __author__ = 'woolz'
 3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP'
 4 | 
 5 | from spacy.tokenizer import Tokenizer
 6 | from spacy.lang.en.examples import sentences
 7 | from best_syn import *
 8 | 
 9 | 
10 | class TextRewrite:
11 | 
12 | 
13 |     def __init__(self, sentence):
14 |         self.sentence = sentence
15 | 
16 |     def work(self):
17 |         """
18 |         @var rewrite_types: Type of words that can rewrited 
19 |         """
20 |         rewrite_types = [u'NN', u'NNS', u'JJ', u'JJS']
21 |         pos_tokenizer = nlp(unicode(self.sentence))
22 |         words = []
23 |         for token in pos_tokenizer:
24 |             #print(token.pos_, token.text, token.tag_)
25 |             if token.tag_ in rewrite_types:
26 |                 words.append(token.text)
27 |         rewrited_sentence = self.sentence
28 |         for word in words:
29 |             word_syn = BestSyn(word).pull()[1]
30 |             rewrited_sentence = rewrited_sentence.replace(word, word_syn)
31 |         return rewrited_sentence
32 | 
33 |     def __del__(self):
34 |         self.sentence = False
35 | 
36 | 


--------------------------------------------------------------------------------
/text_rewrite.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/text_rewrite.pyc


--------------------------------------------------------------------------------