├── README.md ├── project.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | Hey!! I'm still new to programming so you might find a lotta erros (hopefully not tho). If you get error 403, it's no big deal just rerun the program. 2 | You can run get_wordlist() once then put a # to make it a comment. So if you get any errors just go to the word list, delete the words that have been translated, 3 | and rerun the program! (worked for me). 4 | Error 404 just means the word didn't have a definition in the dictionary, so no need to worry the program'll automatically ignore it. 5 | Any error 500+ means the server blew up or smth im not sure dw abt it too much and jus restart the program. 6 | 200 is what you want to see. 7 | All these numbers only show if you're using the dictionary, so if you're translating to a non-english language no need to worry abt this 8 | anki text 9 | txt file 10 | Capture 11 | -------------------------------------------------------------------------------- /project.py: -------------------------------------------------------------------------------- 1 | import string 2 | from time import sleep 3 | import random 4 | from requests_html import HTMLSession #pip install requests-html 5 | from googletrans import Translator #pip install googletrans==3.1.0a0 6 | import googletrans 7 | 8 | trans = Translator() 9 | session = HTMLSession() 10 | UAs = [ 11 | "Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 12 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; rv:11.0) like Gecko", 13 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; TNJB; rv:11.0) like Gecko", 14 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", 15 | "Mozilla/5.0 (Windows NT 6.3; ARM; Trident/7.0; Touch; rv:11.0) like Gecko", 16 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 17 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0", 18 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; MDDCJS; rv:11.0) like Gecko", 19 | "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", 20 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36", 21 | "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0", 22 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4", 23 | "Mozilla/5.0 (Linux; U; Android 4.4.3; en-us; KFASWI Build/KTU84M) AppleWebKit/537.36 (KHTML, like Gecko) Silk/3.68 like Chrome/39.0.2171.93 Safari/537.36", 24 | "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) GSA/7.0.55539 Mobile/12H321 Safari/600.1.4", 25 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36", 26 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 27 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 28 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko", 29 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:40.0) Gecko/20100101 Firefox/40.0", 30 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0", 31 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4", 32 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; MATBJS; rv:11.0) like Gecko", 33 | "Mozilla/5.0 (Linux; U; Android 4.0.4; en-us; KFJWI Build/IMM76D) AppleWebKit/537.36 (KHTML, like Gecko) Silk/3.68 like Chrome/39.0.2171.93 Safari/537.36", 34 | "Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167 Safari/9537.53", 35 | "Mozilla/5.0 (X11; CrOS armv7l 7077.134.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.156 Safari/537.36", 36 | "Mozilla/5.0 (X11; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 37 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 38 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25", 39 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5", 40 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36", 41 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 42 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", 43 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36", 44 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.1.25 (KHTML, like Gecko)", 45 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36", 46 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/40.0", 47 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.8.9 (KHTML, like Gecko) Version/7.1.8 Safari/537.85.17", 48 | "Mozilla/5.0 (iPad; CPU OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4", 49 | "Mozilla/5.0 (iPad; CPU OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F69 Safari/600.1.4", 50 | "Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0", 51 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", 52 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)", 53 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko", 54 | "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0", 55 | ] 56 | string.punctuation = ( 57 | string.punctuation 58 | + "“" 59 | + "–" 60 | + "…" 61 | + "”" 62 | + "•••" 63 | + "—" 64 | + "«" 65 | + "»" 66 | + "،" 67 | ) 68 | word_bank = {} 69 | 70 | 71 | def get_wordlist(input_path, output_path, word_count): 72 | with open(input_path, "r", encoding="UTF-8") as file: 73 | answer1 = file.read() 74 | string.punctuation = string.punctuation + "1234567890" 75 | for character in string.punctuation: 76 | answer1 = answer1.replace(character, "") 77 | 78 | answer2 = answer1.split() 79 | for word in answer2: 80 | if word not in word_bank: 81 | word_bank[word] = 0 82 | word_bank[word] += 1 83 | top_words = sort_dict(word_bank)[:word_count] 84 | for pair in top_words: 85 | value, key = pair 86 | with open(output_path, "a", encoding="UTF-8") as file: 87 | file.write(f"{key}\n") 88 | 89 | 90 | def sort_dict(dictionary): 91 | sorted_values = [] 92 | for entry in dictionary: 93 | sorted_values.append((dictionary[entry], entry)) 94 | sorted_values = sorted(sorted_values) 95 | sorted_values = sorted_values[::-1] 96 | return sorted_values 97 | 98 | 99 | def get_definition(OG, ED, prompt): 100 | r = session.get( 101 | f"https://en.bab.la/dictionary/{OG}-{ED}/{prompt}", 102 | headers={"User Agent": "random.choice(UAs)"}, 103 | ) 104 | translations = r.html.find(".scroll-link") 105 | translations = translations[:5] 106 | print(r.status_code) 107 | if r.status_code not in [401, 402, 403, 404]: 108 | try: 109 | return f"{(translations[0]).text}; {(translations[1]).text}; {(translations[2]).text}; {(translations[3]).text}; {(translations[4]).text}" 110 | except IndexError: 111 | if (translations[0]).text != "arrow_upward": 112 | return f"{(translations[0]).text}" 113 | else: 114 | return None 115 | else: 116 | print("Error 400-404") 117 | sleep(0.1) 118 | 119 | 120 | def get_sentence(path, word): 121 | index = [] 122 | with open( 123 | path, "r", encoding="UTF-8" 124 | ) as file: 125 | text = file.read() 126 | text = text.replace("\t\t", "") 127 | text = text.replace("\t", "") 128 | text = text.replace("\n", "") 129 | text_list = text.split(".") 130 | for entry in text_list: 131 | if (" " + word + " ") in entry: 132 | index.append(entry) 133 | try: 134 | return random.choice(index) 135 | except IndexError: 136 | return None 137 | 138 | 139 | def make_deck(deck_name, og_lang, trans_lang, word, definition, sentence1): 140 | with open(deck_name, "a", encoding="UTF-8") as file: 141 | if definition != None and sentence1 != None: 142 | file.write( 143 | f"{word}\t{definition}\t{sentence1}\t{(trans.translate(sentence1, dest=f'{trans_lang}', src=f'{og_lang}')).text}\n" 144 | ) 145 | 146 | def get_multilingual_definition(original_language, Translated_language, word): 147 | return trans.translate(word, dest=f'{Translated_language}', src=f'{original_language}').text 148 | 149 | 150 | 151 | def main(): 152 | counter = 1 153 | original_language = input("Original language:").lower() 154 | Translated_language = input("Translated language:").lower() 155 | option = '' 156 | if Translated_language == 'english': 157 | option = input('Choose "dictionary"(less accurate but 5 meaning per word), or "translator"(more accurate but 1 meaning per word)').lower().strip() 158 | path = input("What is the path of your file? ") 159 | path = path.replace('"', '') 160 | path = r"{}".format(path) 161 | word_list = input('What would you like the word list to be called? ') + '.txt' 162 | deck_name = input('What would you like the finished deck to be called? ') + '.txt' 163 | word_count = int(input('How many words do you want in your list? ')) 164 | 165 | 166 | for value in googletrans.LANGUAGES.items(): 167 | if original_language in value: 168 | original_language2 = (value[0]) 169 | for value in googletrans.LANGUAGES.items(): 170 | if Translated_language in value: 171 | Translated_language2 = (value[0]) 172 | 173 | get_wordlist(path, word_list, word_count) 174 | with open(word_list, "r", encoding="UTF-8") as file: 175 | var1 = file.read() 176 | var2 = var1.split() 177 | if Translated_language == 'english' and option == 'dictionary': 178 | for word in var2: 179 | definition = get_definition(original_language, Translated_language, word) 180 | sentence = get_sentence(path, word) 181 | make_deck( 182 | deck_name, 183 | original_language, 184 | Translated_language, 185 | word, 186 | definition, 187 | sentence, 188 | ) 189 | print(f'word {counter} finished') 190 | counter += 1 191 | 192 | else: 193 | for word in var2: 194 | definition = get_multilingual_definition(original_language2, Translated_language2, word) 195 | sentence = get_sentence(path, word) 196 | make_deck( 197 | deck_name, 198 | original_language, 199 | Translated_language, 200 | word, 201 | definition, 202 | sentence, 203 | ) 204 | print(f'word {counter} finished') 205 | counter += 1 206 | 207 | 208 | 209 | if __name__ == "__main__": 210 | main() 211 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | googletrans==3.1.0a0 2 | requests-html --------------------------------------------------------------------------------