├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
└── script.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ankit Solanki
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyLex
 2 | Python3 library for performing lexical analysis on words, one word at a time.
 3 | 
 4 | ## Usage
 5 | 
 6 | #### Help Usage
 7 | 
 8 | ```console
 9 | gavy42@jarvis:~/PyLex$ python3 script.py -h
10 | usage: script.py [-h] [-r] [-s] [-a] [-m] [-hg] [-sa] [-n NUMBER] [-f]
11 |                  word
12 | 
13 | PyLex: Perform lexical analysis, one word at a time.
14 | 
15 | positional arguments:
16 |   word                  an input of the word
17 | 
18 | optional arguments:
19 |   -h, --help            show this help message and exit
20 |   -r, --rhyme           get rhyming words
21 |   -s, --synonym         get synonym
22 |   -a, --antonym         get antonyms
23 |   -m, --meaning         get meaning
24 |   -hg, --homographs     get homographs
25 |   -sa, --sound_alike    get words that sound alike
26 |   -n NUMBER, --number NUMBER
27 |                         number of words should be returned
28 |   -f, --full            FULL lexical analysis
29 | ```
30 | 
31 | #### Interpreter Usage
32 | 
33 | ```python3
34 | >>> from script import Lex
35 | >>> lex = Lex("alone")
36 | >>> lex
37 | <script.Lex object at 0x7f8c075c5d68>
38 | >>> wordlist = lex.rhyming_words()
39 | [*] Getting rhyming words for the word: alone...
40 | >>> lex.display_wordlist(wordlist, 4)
41 | [*] Displaying list; Format: Descending
42 | cologne
43 | malone
44 | overblown
45 | blown
46 | ```
47 | 
48 | #### Functions Usage
49 | 
50 | After creating an object instance as `Lex(<string>)`, these functions are available
51 | 
52 | - `rhyming_words()` : Returns a list of words rhyming with the entered word.
53 | - `synonyms()` : Returns a list of synonyms
54 | - `antonyms()` : Returns a list of antonyms
55 | - `meaning()` : Returns a list of possible meanings
56 | - `homophones()` : Returns a list of homophones
57 | - `homographs()` : Returns a list of homographs
58 | - `sound_alike()` : Returns a list of words that sound alike the given word
59 | 
60 | ## PyLex Full Analysis
61 | 
62 | - Run `python3 script.py <word> -f` to get full lexical analysis of any word.
63 | - Returns a JSON format file with contained information.
64 | 
65 | ```console
66 | gavy42@jarvis:~/PyLex$ python3 script.py alone -f
67 | [!][!] Starting full analysis of: alone
68 | 
69 | [*][*] JSON file saved in local directory named - alone_lex_analysis.json
70 | 
71 | ```
72 | 
73 | ### Note
74 | - Program makes calls to external website to gather the information and scrapes content wherever needed.
75 | 
76 | ## Support
77 | If you have any trouble understading some part of the code, feel free to raise an issue or for contributing, feel free to make a pull request.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | argparse
3 | bs4
4 | click
5 | lxml
6 | 


--------------------------------------------------------------------------------
/script.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import ast
  4 | import click
  5 | import json
  6 | import requests
  7 | import argparse
  8 | from bs4 import BeautifulSoup
  9 | 
 10 | class Lex:
 11 | 	def __init__(self, word):
 12 | 		self.word = word
 13 | 
 14 | 	def rhyming_words(self):
 15 | 		url = "http://www.b-rhymes.com/rhyme/word/" + self.word
 16 | 		raw = requests.get(url)
 17 | 
 18 | 		soup = BeautifulSoup(raw.text, "lxml")
 19 | 		rows = soup.find_all('tr')
 20 | 
 21 | 		templist = []
 22 | 		wordlist = []
 23 | 
 24 | 		for row in rows:			
 25 | 			cols = row.find_all('td')
 26 | 			cols = [x.text.strip() for x in cols]
 27 | 
 28 | 			templist.append(cols)
 29 | 		
 30 | 		for t in templist:
 31 | 			if len(t)!=0: 
 32 | 				wordlist.append(t[1])
 33 | 
 34 | 		return wordlist
 35 | 
 36 | 
 37 | 	def synonyms(self):
 38 | 		url = "http://www.thesaurus.com/browse/" + self.word
 39 | 
 40 | 		raw = requests.get(url)
 41 | 		soup = BeautifulSoup(raw.text, "lxml")
 42 | 
 43 | 		section = soup.find_all('section')
 44 | 		ul = section[0].find_all('ul')
 45 | 		li = ul[0].find_all('li')
 46 | 
 47 | 		wordlist = []
 48 | 		
 49 | 		for element in li:
 50 | 			for x in element:
 51 | 				x = x.text.strip()
 52 | 				if not x.startswith('.css'):
 53 | 					wordlist.append(x)
 54 | 				else:
 55 | 					temp_list = x.split('}')
 56 | 					wordlist.append(temp_list[len(temp_list)-1])
 57 | 
 58 | 		return wordlist
 59 | 
 60 | 
 61 | 	def antonyms(self):
 62 | 		url = "http://www.thesaurus.com/browse/" + self.word
 63 | 
 64 | 		raw = requests.get(url)
 65 | 		soup = BeautifulSoup(raw.text, "lxml")
 66 | 
 67 | 		section = soup.find_all('section')
 68 | 		ul = section[1].find_all('ul')
 69 | 		li = ul[0].find_all('li')
 70 | 
 71 | 		wordlist = []
 72 | 
 73 | 		for element in li:
 74 | 			for x in element:
 75 | 				x = x.text.strip()
 76 | 				if not x.startswith('.css'):
 77 | 					wordlist.append(x)
 78 | 				else:
 79 | 					temp_list = x.split('}')
 80 | 					wordlist.append(temp_list[len(temp_list)-1])
 81 | 
 82 | 		return wordlist
 83 | 
 84 | 	def sound_alike(self):
 85 | 		templist = []
 86 | 		wordlist = []
 87 | 		url = "https://api.datamuse.com/words?sl=" + self.word
 88 | 
 89 | 		data = requests.get(url)
 90 | 
 91 | 		for dlist in data:
 92 | 			dlist = dlist.decode("utf-8")
 93 | 			templist.append(dlist)
 94 | 
 95 | 		string = "".join(templist)
 96 | 		strdict = ast.literal_eval(string)
 97 | 
 98 | 		for dlist in strdict:
 99 | 			wordlist.append(dlist['word'])
100 | 		
101 | 		return wordlist
102 | 
103 | 
104 | 	def homophones(self):
105 | 		wordlist = []
106 | 		url = "https://api.datamuse.com/words?rel_hom=" + self.word
107 | 
108 | 		data = requests.get(url)
109 | 
110 | 		for dlist in data:
111 | 			dlist = ast.literal_eval(dlist.decode("utf-8"))
112 | 
113 | 			for d in dlist:
114 | 				wordlist.append(d["word"])
115 | 
116 | 		return wordlist
117 | 
118 | 	# def homographs(self):
119 | 	# 	wordlist = []
120 | 	# 	url = "http://www.roget.org/BRIAN0.html"
121 | 
122 | 	# 	raw = requests.get(url)
123 | 	# 	soup = BeautifulSoup(raw.text, "lxml")
124 | 	# 	rows = soup.find_all('tr')
125 | 
126 | 	# 	for row in rows:			
127 | 	# 		cols = row.find_all('td')
128 | 	# 		cols = [x.text.strip() for x in cols]
129 | 
130 | 	# 		print(cols)
131 | 		
132 | 	def meaning(self):
133 | 		string = self.word.split(" ")
134 | 		string = "-".join(string)
135 | 
136 | 		url = "http://www.dictionary.com/browse/"+string
137 | 		session = requests.get(url)
138 | 		
139 | 		soup = BeautifulSoup(session.text, "lxml")
140 | 		
141 | 		sec = soup.find_all('section')
142 | 		content = sec[0].find_all("div", {"class": "def-content"})
143 | 
144 | 		meaninglist = []
145 | 
146 | 		for c in content:
147 | 			meaninglist.append(c.text.strip())
148 | 
149 | 		return meaninglist
150 | 
151 | 
152 | 	def display_wordlist(self, wordlist, num):
153 | 		print("[*] Displaying list; Format: Descending")
154 | 
155 | 		if num>=len(wordlist):
156 | 			for w in wordlist: 
157 | 				print(w)
158 | 		else:
159 | 			for i in range(0, num):
160 | 				print(wordlist[i])
161 | 
162 | 
163 | if __name__=="__main__":
164 | 	parser = argparse.ArgumentParser(description='PyLex: Perform lexical analysis, one word at a time.')
165 | 	parser.add_argument("word", help="an input of the word")
166 | 
167 | 	parser.add_argument("-r", "--rhyme", help="get rhyming words", action="store_true")
168 | 	parser.add_argument("-s", "--synonym", help="get synonym", action="store_true")
169 | 	parser.add_argument("-a", "--antonym", help="get antonyms", action="store_true")
170 | 	parser.add_argument("-m", "--meaning", help="get meaning", action="store_true")
171 | 
172 | 	parser.add_argument("-hp", "--homophones", help="get homophones", action="store_true")
173 | 	# parser.add_argument("-hg", "--homographs", help="get homographs", action="store_true")
174 | 	parser.add_argument("-sa", "--sound_alike", help="get words that sound alike", action="store_true")
175 | 
176 | 	parser.add_argument("-n", "--number", type=int, help="number of words need to be returned", default=50)
177 | 
178 | 	parser.add_argument("-f", "--full", help="FULL lexical analysis", action="store_true")
179 | 
180 | 
181 | 	args = parser.parse_args()
182 | 
183 | 	lex = Lex(args.word)
184 | 
185 | 	if args.rhyme:
186 | 		print("[*] Getting rhyming words for the word:", args.word,"...")
187 | 
188 | 		wl = lex.rhyming_words()
189 | 		lex.display_wordlist(wl, args.number)
190 | 
191 | 	if args.synonym:
192 | 		print("[*] Getting synonyms for the word:", args.word, "...")
193 | 
194 | 		wl = lex.synonyms()
195 | 		lex.display_wordlist(wl, args.number)
196 | 
197 | 	if args.antonym:
198 | 		print("[*] Getting antonyms for the word:", args.word, "...")
199 | 
200 | 		wl = lex.antonyms()		
201 | 		lex.display_wordlist(wl, args.number)
202 | 
203 | 	if args.homophones:
204 | 		print("[*] Getting homophones for the word:", args.word, "...")
205 | 		print("[!] Homophones are words that sound identical but are written differently [!]\n")
206 | 		wl = lex.homophones()		
207 | 		lex.display_wordlist(wl, args.number)
208 | 
209 | 	# if args.homographs:
210 | 	# 	print("[*] Getting homographs for the word:", args.word, "...")
211 | 	# 	print("[!] Homographs are words that spelled identical but have different meaning [!]\n")
212 | 	# 	wl = lex.homographs()		
213 | 	# 	lex.display_wordlist(wl, args.number)
214 | 
215 | 	if args.sound_alike:
216 | 		print("[*] Getting words that sound alike with :", args.word, "...\n")
217 | 		wl = lex.sound_alike()		
218 | 		lex.display_wordlist(wl, args.number)
219 | 
220 | 	if args.meaning:
221 | 		print("[*] Fetching meaning of the word...")
222 | 		wl = lex.meaning()
223 | 		print((wl[0].split(":")[0]))
224 | 
225 | 	if args.full:
226 | 		print('[!][!] Starting full analysis of:', args.word)
227 | 		analysis_dict = {}
228 | 
229 | 		wl = lex.meaning()
230 | 		if len(wl) == 0:
231 | 			analysis_dict['meaning'] = []
232 | 		else:
233 | 			analysis_dict['meaning'] = (wl[0].split(":")[0])
234 | 
235 | 		wl = lex.synonyms()
236 | 		analysis_dict['synonyms'] = wl
237 | 
238 | 		wl = lex.antonyms()
239 | 		analysis_dict['antonyms'] = wl
240 | 
241 | 		wl = lex.homophones()
242 | 		analysis_dict['homophones'] = wl
243 | 
244 | 		# wl = lex.homographs()
245 | 		# analysis_dict['homographs'] = wl
246 | 
247 | 		wl = lex.sound_alike()
248 | 		analysis_dict['sound_alike'] = wl
249 | 
250 | 		wl = lex.rhyming_words()
251 | 		analysis_dict['rhyming_words'] = wl
252 | 
253 | 		with open(args.word + "_lex_analysis.json", 'w') as outfile:
254 | 			json.dump(analysis_dict, outfile, indent=4)			
255 | 
256 | 		print('\n[*][*] JSON file saved in local directory named - ' + args.word + "_lex_analysis.json")


--------------------------------------------------------------------------------