├── LICENSE ├── README.md ├── best_syn.py ├── best_syn.pyc ├── example.py ├── requirements.txt ├── text_rewrite.py └── text_rewrite.pyc /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Thiago Cassimiro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text-Rewrite-NLP 2 | This lib uses two Natural Language Processing (Spacy & NLTK) and a online word-finding query engine for developers called datamuse as base to rewrite texts. 3 | 4 | ## First step, install python dependencies 5 |
pip install -r requirements.txt
6 | 7 | ## Second step, install spacy en support 8 |
python -m spacy download en
9 | 10 | ## Third step, install NLTK corpora 11 | Run this code in any python file or python terminal 12 |
import nltk
nltk.download()
13 | After that select `all-corpora` and download it 14 | 15 | 16 | ## Last step, Enjoy :) 17 | Example 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /best_syn.py: -------------------------------------------------------------------------------- 1 | 2 | __author__ = 'woolz' 3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP' 4 | 5 | from nltk.corpus import wordnet 6 | import spacy 7 | import urllib 8 | import json 9 | 10 | nlp = spacy.load('en') 11 | 12 | class BestSyn: 13 | 14 | def get_datamuse_syn_list(self): 15 | url = "https://api.datamuse.com/words?ml=" + self.word 16 | response = urllib.urlopen(url) 17 | data = response.read().decode("utf-8") 18 | json_data = json.loads(data) 19 | word_list = [] 20 | for x in json_data: 21 | word_list.append(x['word']) 22 | return word_list 23 | 24 | def __init__(self, word): 25 | self.word = word 26 | self.best_score = 0.0 27 | self.best_choice = "" 28 | 29 | 30 | def pull(self): 31 | words_list = self.get_datamuse_syn_list() 32 | for syn_word in words_list: 33 | use_nltk = True 34 | try: 35 | nltk_raw_word = wordnet.synsets(self.word)[0] 36 | nltk_syn_word = wordnet.synsets(syn_word)[0] 37 | except: 38 | use_nltk = False 39 | 40 | spacy_raw_word = nlp(unicode(self.word.lower())) 41 | spacy_syn_word = nlp(unicode(syn_word.lower())) 42 | 43 | 44 | spacy_score = spacy_raw_word.similarity(spacy_syn_word) 45 | 46 | if (use_nltk == True): 47 | nltk_score = nltk_syn_word.wup_similarity(nltk_raw_word) 48 | if (nltk_score == None): 49 | nltk_score = 0 50 | score = (nltk_score+spacy_score)/2 51 | else: 52 | score = spacy_score 53 | 54 | 55 | if (score > self.best_score): 56 | self.best_score = score 57 | self.best_choice = syn_word 58 | result = [self.best_score, self.best_choice] 59 | return result 60 | 61 | def __del__(self): 62 | self.word = False 63 | self.best_score = False 64 | self.best_choice = False 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /best_syn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/best_syn.pyc -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from text_rewrite import TextRewrite 2 | 3 | 4 | sentences = ['My machine is so bad and dramatic', 'I have one dog and two cars', 'This season is so weak.', 'my home is so sucky'] 5 | for sentence in sentences: 6 | new_sentence = TextRewrite(sentence).work() 7 | print(sentence + " -> " + new_sentence) 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.3 2 | spacy==2.0.12 3 | -------------------------------------------------------------------------------- /text_rewrite.py: -------------------------------------------------------------------------------- 1 | 2 | __author__ = 'woolz' 3 | __git__ = 'https://github.com/woolz/Text-Rewrite-NLP' 4 | 5 | from spacy.tokenizer import Tokenizer 6 | from spacy.lang.en.examples import sentences 7 | from best_syn import * 8 | 9 | 10 | class TextRewrite: 11 | 12 | 13 | def __init__(self, sentence): 14 | self.sentence = sentence 15 | 16 | def work(self): 17 | """ 18 | @var rewrite_types: Type of words that can rewrited 19 | """ 20 | rewrite_types = [u'NN', u'NNS', u'JJ', u'JJS'] 21 | pos_tokenizer = nlp(unicode(self.sentence)) 22 | words = [] 23 | for token in pos_tokenizer: 24 | #print(token.pos_, token.text, token.tag_) 25 | if token.tag_ in rewrite_types: 26 | words.append(token.text) 27 | rewrited_sentence = self.sentence 28 | for word in words: 29 | word_syn = BestSyn(word).pull()[1] 30 | rewrited_sentence = rewrited_sentence.replace(word, word_syn) 31 | return rewrited_sentence 32 | 33 | def __del__(self): 34 | self.sentence = False 35 | 36 | -------------------------------------------------------------------------------- /text_rewrite.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woolz/Text-Rewrite-NLP/fdac757f93d3f11cabc78fdb57ea938a2949787a/text_rewrite.pyc --------------------------------------------------------------------------------