├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt └── script.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ankit Solanki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyLex 2 | Python3 library for performing lexical analysis on words, one word at a time. 3 | 4 | ## Usage 5 | 6 | #### Help Usage 7 | 8 | ```console 9 | gavy42@jarvis:~/PyLex$ python3 script.py -h 10 | usage: script.py [-h] [-r] [-s] [-a] [-m] [-hg] [-sa] [-n NUMBER] [-f] 11 | word 12 | 13 | PyLex: Perform lexical analysis, one word at a time. 14 | 15 | positional arguments: 16 | word an input of the word 17 | 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | -r, --rhyme get rhyming words 21 | -s, --synonym get synonym 22 | -a, --antonym get antonyms 23 | -m, --meaning get meaning 24 | -hg, --homographs get homographs 25 | -sa, --sound_alike get words that sound alike 26 | -n NUMBER, --number NUMBER 27 | number of words should be returned 28 | -f, --full FULL lexical analysis 29 | ``` 30 | 31 | #### Interpreter Usage 32 | 33 | ```python3 34 | >>> from script import Lex 35 | >>> lex = Lex("alone") 36 | >>> lex 37 | 38 | >>> wordlist = lex.rhyming_words() 39 | [*] Getting rhyming words for the word: alone... 40 | >>> lex.display_wordlist(wordlist, 4) 41 | [*] Displaying list; Format: Descending 42 | cologne 43 | malone 44 | overblown 45 | blown 46 | ``` 47 | 48 | #### Functions Usage 49 | 50 | After creating an object instance as `Lex()`, these functions are available 51 | 52 | - `rhyming_words()` : Returns a list of words rhyming with the entered word. 53 | - `synonyms()` : Returns a list of synonyms 54 | - `antonyms()` : Returns a list of antonyms 55 | - `meaning()` : Returns a list of possible meanings 56 | - `homophones()` : Returns a list of homophones 57 | - `homographs()` : Returns a list of homographs 58 | - `sound_alike()` : Returns a list of words that sound alike the given word 59 | 60 | ## PyLex Full Analysis 61 | 62 | - Run `python3 script.py -f` to get full lexical analysis of any word. 63 | - Returns a JSON format file with contained information. 64 | 65 | ```console 66 | gavy42@jarvis:~/PyLex$ python3 script.py alone -f 67 | [!][!] Starting full analysis of: alone 68 | 69 | [*][*] JSON file saved in local directory named - alone_lex_analysis.json 70 | 71 | ``` 72 | 73 | ### Note 74 | - Program makes calls to external website to gather the information and scrapes content wherever needed. 75 | 76 | ## Support 77 | If you have any trouble understading some part of the code, feel free to raise an issue or for contributing, feel free to make a pull request. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | argparse 3 | bs4 4 | click 5 | lxml 6 | -------------------------------------------------------------------------------- /script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import ast 4 | import click 5 | import json 6 | import requests 7 | import argparse 8 | from bs4 import BeautifulSoup 9 | 10 | class Lex: 11 | def __init__(self, word): 12 | self.word = word 13 | 14 | def rhyming_words(self): 15 | url = "http://www.b-rhymes.com/rhyme/word/" + self.word 16 | raw = requests.get(url) 17 | 18 | soup = BeautifulSoup(raw.text, "lxml") 19 | rows = soup.find_all('tr') 20 | 21 | templist = [] 22 | wordlist = [] 23 | 24 | for row in rows: 25 | cols = row.find_all('td') 26 | cols = [x.text.strip() for x in cols] 27 | 28 | templist.append(cols) 29 | 30 | for t in templist: 31 | if len(t)!=0: 32 | wordlist.append(t[1]) 33 | 34 | return wordlist 35 | 36 | 37 | def synonyms(self): 38 | url = "http://www.thesaurus.com/browse/" + self.word 39 | 40 | raw = requests.get(url) 41 | soup = BeautifulSoup(raw.text, "lxml") 42 | 43 | section = soup.find_all('section') 44 | ul = section[0].find_all('ul') 45 | li = ul[0].find_all('li') 46 | 47 | wordlist = [] 48 | 49 | for element in li: 50 | for x in element: 51 | x = x.text.strip() 52 | if not x.startswith('.css'): 53 | wordlist.append(x) 54 | else: 55 | temp_list = x.split('}') 56 | wordlist.append(temp_list[len(temp_list)-1]) 57 | 58 | return wordlist 59 | 60 | 61 | def antonyms(self): 62 | url = "http://www.thesaurus.com/browse/" + self.word 63 | 64 | raw = requests.get(url) 65 | soup = BeautifulSoup(raw.text, "lxml") 66 | 67 | section = soup.find_all('section') 68 | ul = section[1].find_all('ul') 69 | li = ul[0].find_all('li') 70 | 71 | wordlist = [] 72 | 73 | for element in li: 74 | for x in element: 75 | x = x.text.strip() 76 | if not x.startswith('.css'): 77 | wordlist.append(x) 78 | else: 79 | temp_list = x.split('}') 80 | wordlist.append(temp_list[len(temp_list)-1]) 81 | 82 | return wordlist 83 | 84 | def sound_alike(self): 85 | templist = [] 86 | wordlist = [] 87 | url = "https://api.datamuse.com/words?sl=" + self.word 88 | 89 | data = requests.get(url) 90 | 91 | for dlist in data: 92 | dlist = dlist.decode("utf-8") 93 | templist.append(dlist) 94 | 95 | string = "".join(templist) 96 | strdict = ast.literal_eval(string) 97 | 98 | for dlist in strdict: 99 | wordlist.append(dlist['word']) 100 | 101 | return wordlist 102 | 103 | 104 | def homophones(self): 105 | wordlist = [] 106 | url = "https://api.datamuse.com/words?rel_hom=" + self.word 107 | 108 | data = requests.get(url) 109 | 110 | for dlist in data: 111 | dlist = ast.literal_eval(dlist.decode("utf-8")) 112 | 113 | for d in dlist: 114 | wordlist.append(d["word"]) 115 | 116 | return wordlist 117 | 118 | # def homographs(self): 119 | # wordlist = [] 120 | # url = "http://www.roget.org/BRIAN0.html" 121 | 122 | # raw = requests.get(url) 123 | # soup = BeautifulSoup(raw.text, "lxml") 124 | # rows = soup.find_all('tr') 125 | 126 | # for row in rows: 127 | # cols = row.find_all('td') 128 | # cols = [x.text.strip() for x in cols] 129 | 130 | # print(cols) 131 | 132 | def meaning(self): 133 | string = self.word.split(" ") 134 | string = "-".join(string) 135 | 136 | url = "http://www.dictionary.com/browse/"+string 137 | session = requests.get(url) 138 | 139 | soup = BeautifulSoup(session.text, "lxml") 140 | 141 | sec = soup.find_all('section') 142 | content = sec[0].find_all("div", {"class": "def-content"}) 143 | 144 | meaninglist = [] 145 | 146 | for c in content: 147 | meaninglist.append(c.text.strip()) 148 | 149 | return meaninglist 150 | 151 | 152 | def display_wordlist(self, wordlist, num): 153 | print("[*] Displaying list; Format: Descending") 154 | 155 | if num>=len(wordlist): 156 | for w in wordlist: 157 | print(w) 158 | else: 159 | for i in range(0, num): 160 | print(wordlist[i]) 161 | 162 | 163 | if __name__=="__main__": 164 | parser = argparse.ArgumentParser(description='PyLex: Perform lexical analysis, one word at a time.') 165 | parser.add_argument("word", help="an input of the word") 166 | 167 | parser.add_argument("-r", "--rhyme", help="get rhyming words", action="store_true") 168 | parser.add_argument("-s", "--synonym", help="get synonym", action="store_true") 169 | parser.add_argument("-a", "--antonym", help="get antonyms", action="store_true") 170 | parser.add_argument("-m", "--meaning", help="get meaning", action="store_true") 171 | 172 | parser.add_argument("-hp", "--homophones", help="get homophones", action="store_true") 173 | # parser.add_argument("-hg", "--homographs", help="get homographs", action="store_true") 174 | parser.add_argument("-sa", "--sound_alike", help="get words that sound alike", action="store_true") 175 | 176 | parser.add_argument("-n", "--number", type=int, help="number of words need to be returned", default=50) 177 | 178 | parser.add_argument("-f", "--full", help="FULL lexical analysis", action="store_true") 179 | 180 | 181 | args = parser.parse_args() 182 | 183 | lex = Lex(args.word) 184 | 185 | if args.rhyme: 186 | print("[*] Getting rhyming words for the word:", args.word,"...") 187 | 188 | wl = lex.rhyming_words() 189 | lex.display_wordlist(wl, args.number) 190 | 191 | if args.synonym: 192 | print("[*] Getting synonyms for the word:", args.word, "...") 193 | 194 | wl = lex.synonyms() 195 | lex.display_wordlist(wl, args.number) 196 | 197 | if args.antonym: 198 | print("[*] Getting antonyms for the word:", args.word, "...") 199 | 200 | wl = lex.antonyms() 201 | lex.display_wordlist(wl, args.number) 202 | 203 | if args.homophones: 204 | print("[*] Getting homophones for the word:", args.word, "...") 205 | print("[!] Homophones are words that sound identical but are written differently [!]\n") 206 | wl = lex.homophones() 207 | lex.display_wordlist(wl, args.number) 208 | 209 | # if args.homographs: 210 | # print("[*] Getting homographs for the word:", args.word, "...") 211 | # print("[!] Homographs are words that spelled identical but have different meaning [!]\n") 212 | # wl = lex.homographs() 213 | # lex.display_wordlist(wl, args.number) 214 | 215 | if args.sound_alike: 216 | print("[*] Getting words that sound alike with :", args.word, "...\n") 217 | wl = lex.sound_alike() 218 | lex.display_wordlist(wl, args.number) 219 | 220 | if args.meaning: 221 | print("[*] Fetching meaning of the word...") 222 | wl = lex.meaning() 223 | print((wl[0].split(":")[0])) 224 | 225 | if args.full: 226 | print('[!][!] Starting full analysis of:', args.word) 227 | analysis_dict = {} 228 | 229 | wl = lex.meaning() 230 | if len(wl) == 0: 231 | analysis_dict['meaning'] = [] 232 | else: 233 | analysis_dict['meaning'] = (wl[0].split(":")[0]) 234 | 235 | wl = lex.synonyms() 236 | analysis_dict['synonyms'] = wl 237 | 238 | wl = lex.antonyms() 239 | analysis_dict['antonyms'] = wl 240 | 241 | wl = lex.homophones() 242 | analysis_dict['homophones'] = wl 243 | 244 | # wl = lex.homographs() 245 | # analysis_dict['homographs'] = wl 246 | 247 | wl = lex.sound_alike() 248 | analysis_dict['sound_alike'] = wl 249 | 250 | wl = lex.rhyming_words() 251 | analysis_dict['rhyming_words'] = wl 252 | 253 | with open(args.word + "_lex_analysis.json", 'w') as outfile: 254 | json.dump(analysis_dict, outfile, indent=4) 255 | 256 | print('\n[*][*] JSON file saved in local directory named - ' + args.word + "_lex_analysis.json") --------------------------------------------------------------------------------