├── README.md
├── project.py
└── requirements.txt
/README.md:
--------------------------------------------------------------------------------
1 | Hey!! I'm still new to programming so you might find a lotta erros (hopefully not tho). If you get error 403, it's no big deal just rerun the program.
2 | You can run get_wordlist() once then put a # to make it a comment. So if you get any errors just go to the word list, delete the words that have been translated,
3 | and rerun the program! (worked for me).
4 | Error 404 just means the word didn't have a definition in the dictionary, so no need to worry the program'll automatically ignore it.
5 | Any error 500+ means the server blew up or smth im not sure dw abt it too much and jus restart the program.
6 | 200 is what you want to see.
7 | All these numbers only show if you're using the dictionary, so if you're translating to a non-english language no need to worry abt this
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/project.py:
--------------------------------------------------------------------------------
1 | import string
2 | from time import sleep
3 | import random
4 | from requests_html import HTMLSession #pip install requests-html
5 | from googletrans import Translator #pip install googletrans==3.1.0a0
6 | import googletrans
7 |
8 | trans = Translator()
9 | session = HTMLSession()
10 | UAs = [
11 | "Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
12 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; rv:11.0) like Gecko",
13 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; TNJB; rv:11.0) like Gecko",
14 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36",
15 | "Mozilla/5.0 (Windows NT 6.3; ARM; Trident/7.0; Touch; rv:11.0) like Gecko",
16 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
17 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0",
18 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; MDDCJS; rv:11.0) like Gecko",
19 | "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
20 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
21 | "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
22 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4",
23 | "Mozilla/5.0 (Linux; U; Android 4.4.3; en-us; KFASWI Build/KTU84M) AppleWebKit/537.36 (KHTML, like Gecko) Silk/3.68 like Chrome/39.0.2171.93 Safari/537.36",
24 | "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) GSA/7.0.55539 Mobile/12H321 Safari/600.1.4",
25 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36",
26 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
27 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
28 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko",
29 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:40.0) Gecko/20100101 Firefox/40.0",
30 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0",
31 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4",
32 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; MATBJS; rv:11.0) like Gecko",
33 | "Mozilla/5.0 (Linux; U; Android 4.0.4; en-us; KFJWI Build/IMM76D) AppleWebKit/537.36 (KHTML, like Gecko) Silk/3.68 like Chrome/39.0.2171.93 Safari/537.36",
34 | "Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167 Safari/9537.53",
35 | "Mozilla/5.0 (X11; CrOS armv7l 7077.134.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.156 Safari/537.36",
36 | "Mozilla/5.0 (X11; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
37 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
38 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
39 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5",
40 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36",
41 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
42 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36",
43 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
44 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.1.25 (KHTML, like Gecko)",
45 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
46 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/40.0",
47 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.8.9 (KHTML, like Gecko) Version/7.1.8 Safari/537.85.17",
48 | "Mozilla/5.0 (iPad; CPU OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4",
49 | "Mozilla/5.0 (iPad; CPU OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F69 Safari/600.1.4",
50 | "Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0",
51 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
52 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
53 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko",
54 | "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0",
55 | ]
56 | string.punctuation = (
57 | string.punctuation
58 | + "“"
59 | + "–"
60 | + "…"
61 | + "”"
62 | + "•••"
63 | + "—"
64 | + "«"
65 | + "»"
66 | + "،"
67 | )
68 | word_bank = {}
69 |
70 |
71 | def get_wordlist(input_path, output_path, word_count):
72 | with open(input_path, "r", encoding="UTF-8") as file:
73 | answer1 = file.read()
74 | string.punctuation = string.punctuation + "1234567890"
75 | for character in string.punctuation:
76 | answer1 = answer1.replace(character, "")
77 |
78 | answer2 = answer1.split()
79 | for word in answer2:
80 | if word not in word_bank:
81 | word_bank[word] = 0
82 | word_bank[word] += 1
83 | top_words = sort_dict(word_bank)[:word_count]
84 | for pair in top_words:
85 | value, key = pair
86 | with open(output_path, "a", encoding="UTF-8") as file:
87 | file.write(f"{key}\n")
88 |
89 |
90 | def sort_dict(dictionary):
91 | sorted_values = []
92 | for entry in dictionary:
93 | sorted_values.append((dictionary[entry], entry))
94 | sorted_values = sorted(sorted_values)
95 | sorted_values = sorted_values[::-1]
96 | return sorted_values
97 |
98 |
99 | def get_definition(OG, ED, prompt):
100 | r = session.get(
101 | f"https://en.bab.la/dictionary/{OG}-{ED}/{prompt}",
102 | headers={"User Agent": "random.choice(UAs)"},
103 | )
104 | translations = r.html.find(".scroll-link")
105 | translations = translations[:5]
106 | print(r.status_code)
107 | if r.status_code not in [401, 402, 403, 404]:
108 | try:
109 | return f"{(translations[0]).text}; {(translations[1]).text}; {(translations[2]).text}; {(translations[3]).text}; {(translations[4]).text}"
110 | except IndexError:
111 | if (translations[0]).text != "arrow_upward":
112 | return f"{(translations[0]).text}"
113 | else:
114 | return None
115 | else:
116 | print("Error 400-404")
117 | sleep(0.1)
118 |
119 |
120 | def get_sentence(path, word):
121 | index = []
122 | with open(
123 | path, "r", encoding="UTF-8"
124 | ) as file:
125 | text = file.read()
126 | text = text.replace("\t\t", "")
127 | text = text.replace("\t", "")
128 | text = text.replace("\n", "")
129 | text_list = text.split(".")
130 | for entry in text_list:
131 | if (" " + word + " ") in entry:
132 | index.append(entry)
133 | try:
134 | return random.choice(index)
135 | except IndexError:
136 | return None
137 |
138 |
139 | def make_deck(deck_name, og_lang, trans_lang, word, definition, sentence1):
140 | with open(deck_name, "a", encoding="UTF-8") as file:
141 | if definition != None and sentence1 != None:
142 | file.write(
143 | f"{word}\t{definition}\t{sentence1}\t{(trans.translate(sentence1, dest=f'{trans_lang}', src=f'{og_lang}')).text}\n"
144 | )
145 |
146 | def get_multilingual_definition(original_language, Translated_language, word):
147 | return trans.translate(word, dest=f'{Translated_language}', src=f'{original_language}').text
148 |
149 |
150 |
151 | def main():
152 | counter = 1
153 | original_language = input("Original language:").lower()
154 | Translated_language = input("Translated language:").lower()
155 | option = ''
156 | if Translated_language == 'english':
157 | option = input('Choose "dictionary"(less accurate but 5 meaning per word), or "translator"(more accurate but 1 meaning per word)').lower().strip()
158 | path = input("What is the path of your file? ")
159 | path = path.replace('"', '')
160 | path = r"{}".format(path)
161 | word_list = input('What would you like the word list to be called? ') + '.txt'
162 | deck_name = input('What would you like the finished deck to be called? ') + '.txt'
163 | word_count = int(input('How many words do you want in your list? '))
164 |
165 |
166 | for value in googletrans.LANGUAGES.items():
167 | if original_language in value:
168 | original_language2 = (value[0])
169 | for value in googletrans.LANGUAGES.items():
170 | if Translated_language in value:
171 | Translated_language2 = (value[0])
172 |
173 | get_wordlist(path, word_list, word_count)
174 | with open(word_list, "r", encoding="UTF-8") as file:
175 | var1 = file.read()
176 | var2 = var1.split()
177 | if Translated_language == 'english' and option == 'dictionary':
178 | for word in var2:
179 | definition = get_definition(original_language, Translated_language, word)
180 | sentence = get_sentence(path, word)
181 | make_deck(
182 | deck_name,
183 | original_language,
184 | Translated_language,
185 | word,
186 | definition,
187 | sentence,
188 | )
189 | print(f'word {counter} finished')
190 | counter += 1
191 |
192 | else:
193 | for word in var2:
194 | definition = get_multilingual_definition(original_language2, Translated_language2, word)
195 | sentence = get_sentence(path, word)
196 | make_deck(
197 | deck_name,
198 | original_language,
199 | Translated_language,
200 | word,
201 | definition,
202 | sentence,
203 | )
204 | print(f'word {counter} finished')
205 | counter += 1
206 |
207 |
208 |
209 | if __name__ == "__main__":
210 | main()
211 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | googletrans==3.1.0a0
2 | requests-html
--------------------------------------------------------------------------------