├── tests ├── __init__.py ├── test_api_helpers.py └── test_formatting.py ├── .gitignore ├── TESTING.md ├── assets ├── back.png ├── front.png └── import.png ├── templates ├── front.html ├── README.md └── back.html ├── formatting.py ├── api_helpers.py ├── README.md └── reverso_note_maker.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | queries.txt 3 | reverso.csv 4 | .DS_Store 5 | __pycache__ 6 | -------------------------------------------------------------------------------- /TESTING.md: -------------------------------------------------------------------------------- 1 | To run tests from the parent directory: 2 | ``` 3 | nosetests 4 | ``` 5 | -------------------------------------------------------------------------------- /assets/back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/back.png -------------------------------------------------------------------------------- /assets/front.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/front.png -------------------------------------------------------------------------------- /assets/import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/import.png -------------------------------------------------------------------------------- /templates/front.html: -------------------------------------------------------------------------------- 1 | {{Image}} 2 |
3 | {{Frequencies}} 4 | 5 |
6 |
7 | {{cloze:Cloze Sentences}} 8 |
9 |
10 | {{Cloze Sentences Hints}} 11 |
12 | -------------------------------------------------------------------------------- /formatting.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | INVALID_PUNCTUATION = ''.join([c for c in string.punctuation if c != "'"]) 4 | 5 | def strip_punctuation(s: str) -> str: 6 | """ 7 | Strips punctuation since Reverso matches punctuation exactly. 8 | Does NOT remove apostrophes. 9 | """ 10 | return s.translate(str.maketrans('', '', INVALID_PUNCTUATION)) 11 | -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- 1 | These are suggested templates for use with the created cards, although you can 2 | definitely adjust or add your own cards as you wish. 3 | 4 | 1. Create a **cloze** note type with any of the fields that are used in the template. 5 | 2. Create a card type with the front and back contained in the given files. 6 | 3. Adjust any links in `back.html` that might be language specific. 7 | 8 | For how to create note or card types, you'll have to check the Anki 9 | documentation or poke around the UI yourself. 10 | -------------------------------------------------------------------------------- /tests/test_api_helpers.py: -------------------------------------------------------------------------------- 1 | from api_helpers import make_cloze 2 | import unittest 3 | 4 | class TestApiHelpers(unittest.TestCase): 5 | 6 | def test_make_cloze_one_term(self): 7 | before = "foo'bar" 8 | expected = "" 9 | self.assertEqual(make_cloze(before), expected) 10 | 11 | def test_make_cloze_two_term(self): 12 | before = "foo'bar" 13 | expected = "" 14 | self.assertEqual(make_cloze(before), expected) 15 | 16 | if __name__ == '__main__': 17 | unittest.main() 18 | 19 | -------------------------------------------------------------------------------- /templates/back.html: -------------------------------------------------------------------------------- 1 | {{Image}} 2 |
3 | {{Frequencies}} 4 | 5 |
6 |
7 | {{cloze:Cloze Sentences}} 8 |
9 | 10 |
11 | {{Cloze Sentences Hints}} 12 |
13 | 14 |
15 | {{Audio}} 16 | 17 |
18 | 19 | wiktionary (en)
20 | reverso
21 | yandex image 22 | -------------------------------------------------------------------------------- /tests/test_formatting.py: -------------------------------------------------------------------------------- 1 | from formatting import strip_punctuation 2 | import unittest 3 | 4 | class TestFormatting(unittest.TestCase): 5 | 6 | def test_preserves_apostrophe(self): 7 | before = "foo'bar" 8 | self.assertEqual(strip_punctuation(before), "foo'bar") 9 | 10 | def test_strips_punct(self): 11 | before = "!foo#ba,r" 12 | self.assertEqual(strip_punctuation(before), "foobar") 13 | 14 | def test_strips_punct_with_apostr(self): 15 | before = "!foo#'ba,r" 16 | self.assertEqual(strip_punctuation(before), "foo'bar") 17 | 18 | 19 | if __name__ == '__main__': 20 | unittest.main() 21 | 22 | -------------------------------------------------------------------------------- /api_helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions relating to parsing the API output. 3 | """ 4 | 5 | def get_highlighted(text, highlighted): 6 | """ 7 | Given `highlighted`, which is a list of pair of indexes, return the first 8 | highlighted string in the Reverso sentence 9 | """ 10 | start = highlighted[0][0] 11 | end = highlighted[0][1] 12 | return text[start:end] 13 | 14 | 15 | def make_cloze(text, highlighted): 16 | """ 17 | Takes a string and a pair of numbers and clozes the parts of the string 18 | between the given indices. 19 | """ 20 | start = highlighted[0][0] 21 | end = highlighted[0][1] 22 | prefix = text[:start] 23 | highlighted = text[start:end] 24 | suffix = text[end:] 25 | return "%s{{c1::%s}}%s" % (prefix, highlighted, suffix) 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Anki Reverso Cloze Card Creator 2 | This is a script for automatically creating Anki cloze cards using sentence 3 | examples from Reverso for language 4 | learning. It leverages the [unofficial Python Reverso 5 | API](https://github.com/demian-wolf/ReversoAPI). 6 | 7 | Given a list of strings, it will create a CSV file that can be used to create Anki 8 | notes with three columns: 9 | 10 | 1. Term 11 | 2. Example sentences from Reverso, with cloze 12 | 3. Reverso's translation of the definition in the example sentences 13 | 4. Top N frequencies of definitions (e.g., a word can have different meanings) 14 | 15 | It can easily be combined with 16 | [AwesomeTTS](https://github.com/AwesomeTTS/awesometts-anki-addon) or 17 | [FawsImageSearch](https://github.com/louisrli/anki-faws-image-search) to 18 | generate cards with audio and images. 19 | 20 | This README assumes basic background knowledge of Anki cards and does not 21 | explain any of the related terminology. 22 | 23 | ## Examples 24 | 25 | The script creates a file `reverso.csv` that can be imported into Anki. If you're interested in 26 | exactly what's generated, you can look in the spoiler below. Otherwise, continue 27 | reading. 28 | 29 |
30 | Here is an example of a line from the raw CSV created by the script, pulling 31 | three example sentences for the Italian phrase `a memoria` with the target term 32 | contained in a cloze. It also has relative ratios for the frequency of 33 | translations. For example, this term is most commonly translated to "by heart", 34 | but it can also be translated to "from memory" (with a relative 0.27 frequency 35 | to "by heart"). 36 | 37 | ``` 38 | a memoria,"Studialo bene ed imparalo {{c1::a memoria}}. 39 | 40 | È fuori questione imparare tutte queste frasi {{c1::a memoria}}. 41 | 42 | Cantatelo {{c1::a memoria}}, o leggendolo.",by heart | by heart | from memory,by heart
from memory (0.27); in memory (0.12) 43 | ``` 44 |
45 | 46 | After creating a note type and importing the CSV, you'll have cloze cards that have the 47 | following respective front and back sides: 48 | 49 | ### Front 50 | ![Front of card](assets/front.png) 51 | 52 | ### Back 53 | ![Back of card](assets/back.png) 54 | 55 | ## Usage 56 | Using this assumes basic knowledge of how to run programs on the command line. 57 | Coding is not required. 58 | 59 | Install the prerequisites: 60 | ``` 61 | pip3 install progress 62 | ``` 63 | 64 | ### Steps 65 | 1. **Gather terms:** Create a file called `queries.txt` with one Reverso search term per line. 66 | 2. **Run the script:** Run `python3 reverso_note_maker.py -s it` where `it` (Italian) can be 67 | replaced by the two letter language code. Wait for it to finish. 68 | 3. **Import the file to Anki:** Import the created file `reverso.csv` into the Anki desktop app, with the 69 | columns matching in the order shown below. You will have to create a new note 70 | type. 71 | 4. **Add a card template:** If you'd like cards that look like the examples, check the `templates/` 72 | folder for some example card templates that you can simply copy-paste into 73 | Anki's card template editor. 74 | 75 | ![Example of importing](assets/import.png) 76 | 77 | To see the full list of options: 78 | ``` 79 | python3 reverso_note_maker.py --help 80 | ``` 81 | 82 | ### Potential issues 83 | * **Sleeping**: If your computer goes to sleep, you'll need to restart the 84 | script, but it'll pick up from where you left off. 85 | * **Rate-limiting**: Reverso may block or throttle you. The script waits one 86 | second between requests and has a retry mechanism, but if it happens, run 87 | the script again. 88 | * **Bad translations**: Reverso isn't always perfect, so sometimes the 89 | translations will be weird. This is will happen frequently with less common 90 | terms. 91 | -------------------------------------------------------------------------------- /reverso_note_maker.py: -------------------------------------------------------------------------------- 1 | """ 2 | A script that pulls from the file `queries.txt` (by default) in the same 3 | directory, which should be a file with one query per line. It then generates a 4 | CSV file that can be imported into Anki. 5 | 6 | At the moment, it's not written as an anki add-on that can add to existing cards 7 | and should be used to generate notes from scratch. 8 | 9 | Example usage: 10 | 11 | python3 reverso_note_maker.py -s ru 12 | """ 13 | from typing import Generator 14 | from optparse import OptionParser 15 | from reverso_api import context 16 | from itertools import islice 17 | import formatting 18 | import os 19 | import time 20 | import progress.bar 21 | import requests.exceptions 22 | from collections import namedtuple 23 | import csv 24 | import logging 25 | from api_helpers import get_highlighted, make_cloze 26 | 27 | logger = logging.getLogger(__name__) 28 | logger.setLevel(logging.INFO) 29 | 30 | # Maximum number of examples to pull from Reverso. 31 | MAX_EXAMPLES = 3 32 | 33 | # Number of examples to ask for when using prefer_short option. As the API 34 | # provides a generator, we ask for a reasonable amount by eyeballing the UI. 35 | # Unfortunately, there's no easy way to ask for "just one page" using the API 36 | # library, but this should add a ceiling to the request time. 37 | PREFER_SHORT_MAX_EXAMPLES = 15 38 | 39 | # Maximum number of frequencies ("translations" in library) to fetch. 40 | MAX_FREQUENCIES = 5 41 | # Reverso can start giving really weird frequencies. This cuts out any 42 | # frequencies relative to n * the highest frequency word. For example, if it's 43 | # 0.1, you can think that this means that any translation appearing < 10% 44 | # relative to the most common translation can be ignored. 45 | FREQUENCY_THRESHOLD = 0.1 46 | 47 | MAX_RETRIES = 5 48 | 49 | # Wait this long between each request to prevent getting blocked by reverso. 50 | SLEEP_THROTTLE_SEC = 1 51 | RETRY_WAIT_SEC = 60 52 | 53 | parser = OptionParser() 54 | parser.add_option("-s", "--source_lang", dest="source_lang", 55 | help="Source language code of words to read.") 56 | parser.add_option("-t", "--target_lang", dest="target_lang", 57 | help="Target language code.", 58 | default="en") 59 | parser.add_option("-q", "--queries", dest="query_file", 60 | help="Path to queries file", default="queries.txt") 61 | parser.add_option("-o", "--output", dest="output_file", 62 | help="Path to output file", default="reverso.csv") 63 | parser.add_option( 64 | "--prefer-short", 65 | action="store_true", 66 | dest="prefer_short", 67 | help="Sort example sentences by length, preferring shorter sentences", 68 | default=False) 69 | 70 | parser.add_option( 71 | "--keep-punctuation", 72 | action="store_true", 73 | dest="keep_punctuation", 74 | help="By default, script removes non-apostrophe punctuation because it Reverso matches the punctuation and it usually leads to bad examples. This flag keeps all punctuation", 75 | default=False) 76 | 77 | 78 | # frequencies is a (definition, count) pair that shows up at the top of the 79 | # Reverso UI, indicating what the most frequent translation is 80 | AnkiReversoNote = namedtuple( 81 | "AnkiReversoNote", 82 | ("query", 83 | "hints", 84 | "cloze_texts", 85 | "frequencies")) 86 | 87 | 88 | def reverso_note_to_csv(note: AnkiReversoNote) -> list[str]: 89 | """ 90 | Processes an AnkiReversoNote into a single row of CSV output. 91 | 92 | See the README for an example of what this output would look like. 93 | """ 94 | # Format: foo (0.5) where the number is the relative frequency to the most 95 | # common word. However, don't put any number next to the first word. 96 | freq_strs = [] 97 | for i, f in enumerate(note.frequencies): 98 | if i == 0: 99 | freq_strs.append("%s
" % f[0]) 100 | else: 101 | highest_freq = note.frequencies[0][1] 102 | freq_strs.append("%s (%.2f)" % (f[0], f[1] / highest_freq)) 103 | 104 | return [note.query, '

'.join(note.cloze_texts), 105 | ' | '.join(note.hints), 106 | # First one gets its own line, so no semicolon after it. 107 | freq_strs[0] + '; '.join(freq_strs[1:]) if freq_strs else '' 108 | ] 109 | 110 | 111 | def make_notes(queries, existing_notes, options) -> Generator[AnkiReversoNote, 112 | None, None]: 113 | """ 114 | Main function for generating notes 115 | """ 116 | bar = progress.bar.Bar('Processing', max=len(queries)) 117 | for q in queries: 118 | bar.next() 119 | if q in existing_notes: 120 | continue 121 | # We need to normalize because of this article: 122 | # https://www.ojisanseiuchi.com/2021/05/08/encoding-of-the-cyrillic-letter-%D0%B9-a-utf-8-gotcha/ 123 | # It doesn't handle the character й well, treating it as и + diacritic in a 124 | # lot of cases. 125 | # TODO: Move this into some other function. 126 | normalized = q.strip().lower().replace(u"\u0438\u0306", u"\u0439") 127 | if not options.keep_punctuation: 128 | normalized = formatting.strip_punctuation(normalized) 129 | 130 | api = context.ReversoContextAPI( 131 | normalized, 132 | "", 133 | options.source_lang, 134 | options.target_lang) 135 | # Rate limit to prevent getting blocked by Reverso. 136 | time.sleep(SLEEP_THROTTLE_SEC) 137 | 138 | note = AnkiReversoNote( 139 | query=q, 140 | hints=[], 141 | cloze_texts=[], 142 | frequencies=[]) 143 | 144 | num_retries = 0 145 | while num_retries < MAX_RETRIES: 146 | try: 147 | if num_retries == MAX_RETRIES: 148 | raise Exception("Hit max number of retries.") 149 | translations = islice( 150 | api.get_translations(), 0, MAX_FREQUENCIES) 151 | if options.prefer_short: 152 | # Sort by the length of the sort text. 153 | examples = list(islice(api.get_examples(), 154 | 0, 155 | PREFER_SHORT_MAX_EXAMPLES)) 156 | examples.sort(key=lambda s: len(s[0].text)) 157 | examples = examples[:MAX_EXAMPLES] 158 | else: 159 | examples = islice(api.get_examples(), 0, MAX_EXAMPLES) 160 | num_retries += 1 161 | break 162 | except requests.exceptions.ConnectionError: 163 | logger.warning("Encountered a connection error. Retrying...") 164 | time.sleep(RETRY_WAIT_SEC) 165 | 166 | # Handle frequencies. 167 | highest_freq = None 168 | for i, translation in enumerate(translations): 169 | if i == 0: 170 | highest_freq = translation.frequency 171 | if translation.frequency > highest_freq * FREQUENCY_THRESHOLD: 172 | note.frequencies.append( 173 | (translation.translation, translation.frequency)) 174 | 175 | # Handle examples. 176 | for source, target in examples: 177 | # Create the cloze part 178 | cloze = make_cloze(source.text, source.highlighted) 179 | note.cloze_texts.append(cloze) 180 | 181 | # Use the english translation in a list of hints. The hint won't be 182 | # colocated with the sentence but doesn't really matter. 183 | try: 184 | note.hints.append( 185 | get_highlighted( 186 | target.text, 187 | target.highlighted)) 188 | except BaseException: 189 | logger.warning('Hint failed on ' + q) 190 | 191 | # Columns: Term, cloze, hint 192 | if len(note.cloze_texts) != 0: 193 | yield note 194 | else: 195 | # Simply skip the word for now, oh well. 196 | logger.warning("Nothing found on Reverso: " + q) 197 | 198 | bar.finish() 199 | 200 | 201 | (options, args) = parser.parse_args() 202 | if not options.source_lang: 203 | parser.error('No source language given.') 204 | 205 | # Mark the existing notes so that we can continue writing in case of large 206 | # jobs. 207 | existing_notes = set() 208 | 209 | if os.path.isfile(options.output_file): 210 | with open(options.output_file, 'r') as f: 211 | reader = csv.reader(f) 212 | for row in reader: 213 | existing_notes.add(row[0]) 214 | 215 | with open(options.query_file, 'r') as f: 216 | queries = f.read().strip().split('\n') 217 | 218 | with open(options.output_file, 'a', newline='') as csvfile: 219 | reversowriter = csv.writer(csvfile) 220 | # Write from the generator as we receive results so that progress can be 221 | # saved. 222 | for note in make_notes(queries, existing_notes, options): 223 | row = reverso_note_to_csv(note) 224 | reversowriter.writerow(row) 225 | --------------------------------------------------------------------------------