├── tests
├── __init__.py
├── test_api_helpers.py
└── test_formatting.py
├── .gitignore
├── TESTING.md
├── assets
├── back.png
├── front.png
└── import.png
├── templates
├── front.html
├── README.md
└── back.html
├── formatting.py
├── api_helpers.py
├── README.md
└── reverso_note_maker.py
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | queries.txt
3 | reverso.csv
4 | .DS_Store
5 | __pycache__
6 |
--------------------------------------------------------------------------------
/TESTING.md:
--------------------------------------------------------------------------------
1 | To run tests from the parent directory:
2 | ```
3 | nosetests
4 | ```
5 |
--------------------------------------------------------------------------------
/assets/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/back.png
--------------------------------------------------------------------------------
/assets/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/front.png
--------------------------------------------------------------------------------
/assets/import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/louisrli/anki-reverso-cloze/HEAD/assets/import.png
--------------------------------------------------------------------------------
/templates/front.html:
--------------------------------------------------------------------------------
1 | {{Image}}
2 |
3 | {{Frequencies}}
4 |
5 |
6 |
7 | {{cloze:Cloze Sentences}}
8 |
9 |
10 | {{Cloze Sentences Hints}}
11 |
12 |
--------------------------------------------------------------------------------
/formatting.py:
--------------------------------------------------------------------------------
1 | import string
2 |
3 | INVALID_PUNCTUATION = ''.join([c for c in string.punctuation if c != "'"])
4 |
5 | def strip_punctuation(s: str) -> str:
6 | """
7 | Strips punctuation since Reverso matches punctuation exactly.
8 | Does NOT remove apostrophes.
9 | """
10 | return s.translate(str.maketrans('', '', INVALID_PUNCTUATION))
11 |
--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
1 | These are suggested templates for use with the created cards, although you can
2 | definitely adjust or add your own cards as you wish.
3 |
4 | 1. Create a **cloze** note type with any of the fields that are used in the template.
5 | 2. Create a card type with the front and back contained in the given files.
6 | 3. Adjust any links in `back.html` that might be language specific.
7 |
8 | For how to create note or card types, you'll have to check the Anki
9 | documentation or poke around the UI yourself.
10 |
--------------------------------------------------------------------------------
/tests/test_api_helpers.py:
--------------------------------------------------------------------------------
1 | from api_helpers import make_cloze
2 | import unittest
3 |
4 | class TestApiHelpers(unittest.TestCase):
5 |
6 | def test_make_cloze_one_term(self):
7 | before = "foo'bar"
8 | expected = ""
9 | self.assertEqual(make_cloze(before), expected)
10 |
11 | def test_make_cloze_two_term(self):
12 | before = "foo'bar"
13 | expected = ""
14 | self.assertEqual(make_cloze(before), expected)
15 |
16 | if __name__ == '__main__':
17 | unittest.main()
18 |
19 |
--------------------------------------------------------------------------------
/templates/back.html:
--------------------------------------------------------------------------------
1 | {{Image}}
2 |
3 | {{Frequencies}}
4 |
5 |
6 |
7 | {{cloze:Cloze Sentences}}
8 |
9 |
10 |
11 | {{Cloze Sentences Hints}}
12 |
13 |
14 |
15 | {{Audio}}
16 |
17 |
18 |
19 | wiktionary (en)
20 | reverso
21 | yandex image
22 |
--------------------------------------------------------------------------------
/tests/test_formatting.py:
--------------------------------------------------------------------------------
1 | from formatting import strip_punctuation
2 | import unittest
3 |
4 | class TestFormatting(unittest.TestCase):
5 |
6 | def test_preserves_apostrophe(self):
7 | before = "foo'bar"
8 | self.assertEqual(strip_punctuation(before), "foo'bar")
9 |
10 | def test_strips_punct(self):
11 | before = "!foo#ba,r"
12 | self.assertEqual(strip_punctuation(before), "foobar")
13 |
14 | def test_strips_punct_with_apostr(self):
15 | before = "!foo#'ba,r"
16 | self.assertEqual(strip_punctuation(before), "foo'bar")
17 |
18 |
19 | if __name__ == '__main__':
20 | unittest.main()
21 |
22 |
--------------------------------------------------------------------------------
/api_helpers.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper functions relating to parsing the API output.
3 | """
4 |
5 | def get_highlighted(text, highlighted):
6 | """
7 | Given `highlighted`, which is a list of pair of indexes, return the first
8 | highlighted string in the Reverso sentence
9 | """
10 | start = highlighted[0][0]
11 | end = highlighted[0][1]
12 | return text[start:end]
13 |
14 |
15 | def make_cloze(text, highlighted):
16 | """
17 | Takes a string and a pair of numbers and clozes the parts of the string
18 | between the given indices.
19 | """
20 | start = highlighted[0][0]
21 | end = highlighted[0][1]
22 | prefix = text[:start]
23 | highlighted = text[start:end]
24 | suffix = text[end:]
25 | return "%s{{c1::%s}}%s" % (prefix, highlighted, suffix)
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Anki Reverso Cloze Card Creator
2 | This is a script for automatically creating Anki cloze cards using sentence
3 | examples from Reverso for language
4 | learning. It leverages the [unofficial Python Reverso
5 | API](https://github.com/demian-wolf/ReversoAPI).
6 |
7 | Given a list of strings, it will create a CSV file that can be used to create Anki
8 | notes with three columns:
9 |
10 | 1. Term
11 | 2. Example sentences from Reverso, with cloze
12 | 3. Reverso's translation of the definition in the example sentences
13 | 4. Top N frequencies of definitions (e.g., a word can have different meanings)
14 |
15 | It can easily be combined with
16 | [AwesomeTTS](https://github.com/AwesomeTTS/awesometts-anki-addon) or
17 | [FawsImageSearch](https://github.com/louisrli/anki-faws-image-search) to
18 | generate cards with audio and images.
19 |
20 | This README assumes basic background knowledge of Anki cards and does not
21 | explain any of the related terminology.
22 |
23 | ## Examples
24 |
25 | The script creates a file `reverso.csv` that can be imported into Anki. If you're interested in
26 | exactly what's generated, you can look in the spoiler below. Otherwise, continue
27 | reading.
28 |
29 |
30 | Here is an example of a line from the raw CSV created by the script, pulling
31 | three example sentences for the Italian phrase `a memoria` with the target term
32 | contained in a cloze. It also has relative ratios for the frequency of
33 | translations. For example, this term is most commonly translated to "by heart",
34 | but it can also be translated to "from memory" (with a relative 0.27 frequency
35 | to "by heart").
36 |
37 | ```
38 | a memoria,"Studialo bene ed imparalo {{c1::a memoria}}.
39 |
40 | È fuori questione imparare tutte queste frasi {{c1::a memoria}}.
41 |
42 | Cantatelo {{c1::a memoria}}, o leggendolo.",by heart | by heart | from memory,by heartfrom memory (0.27); in memory (0.12)
43 | ```
44 |
45 |
46 | After creating a note type and importing the CSV, you'll have cloze cards that have the
47 | following respective front and back sides:
48 |
49 | ### Front
50 | 
51 |
52 | ### Back
53 | 
54 |
55 | ## Usage
56 | Using this assumes basic knowledge of how to run programs on the command line.
57 | Coding is not required.
58 |
59 | Install the prerequisites:
60 | ```
61 | pip3 install progress
62 | ```
63 |
64 | ### Steps
65 | 1. **Gather terms:** Create a file called `queries.txt` with one Reverso search term per line.
66 | 2. **Run the script:** Run `python3 reverso_note_maker.py -s it` where `it` (Italian) can be
67 | replaced by the two letter language code. Wait for it to finish.
68 | 3. **Import the file to Anki:** Import the created file `reverso.csv` into the Anki desktop app, with the
69 | columns matching in the order shown below. You will have to create a new note
70 | type.
71 | 4. **Add a card template:** If you'd like cards that look like the examples, check the `templates/`
72 | folder for some example card templates that you can simply copy-paste into
73 | Anki's card template editor.
74 |
75 | 
76 |
77 | To see the full list of options:
78 | ```
79 | python3 reverso_note_maker.py --help
80 | ```
81 |
82 | ### Potential issues
83 | * **Sleeping**: If your computer goes to sleep, you'll need to restart the
84 | script, but it'll pick up from where you left off.
85 | * **Rate-limiting**: Reverso may block or throttle you. The script waits one
86 | second between requests and has a retry mechanism, but if it happens, run
87 | the script again.
88 | * **Bad translations**: Reverso isn't always perfect, so sometimes the
89 | translations will be weird. This is will happen frequently with less common
90 | terms.
91 |
--------------------------------------------------------------------------------
/reverso_note_maker.py:
--------------------------------------------------------------------------------
1 | """
2 | A script that pulls from the file `queries.txt` (by default) in the same
3 | directory, which should be a file with one query per line. It then generates a
4 | CSV file that can be imported into Anki.
5 |
6 | At the moment, it's not written as an anki add-on that can add to existing cards
7 | and should be used to generate notes from scratch.
8 |
9 | Example usage:
10 |
11 | python3 reverso_note_maker.py -s ru
12 | """
13 | from typing import Generator
14 | from optparse import OptionParser
15 | from reverso_api import context
16 | from itertools import islice
17 | import formatting
18 | import os
19 | import time
20 | import progress.bar
21 | import requests.exceptions
22 | from collections import namedtuple
23 | import csv
24 | import logging
25 | from api_helpers import get_highlighted, make_cloze
26 |
27 | logger = logging.getLogger(__name__)
28 | logger.setLevel(logging.INFO)
29 |
30 | # Maximum number of examples to pull from Reverso.
31 | MAX_EXAMPLES = 3
32 |
33 | # Number of examples to ask for when using prefer_short option. As the API
34 | # provides a generator, we ask for a reasonable amount by eyeballing the UI.
35 | # Unfortunately, there's no easy way to ask for "just one page" using the API
36 | # library, but this should add a ceiling to the request time.
37 | PREFER_SHORT_MAX_EXAMPLES = 15
38 |
39 | # Maximum number of frequencies ("translations" in library) to fetch.
40 | MAX_FREQUENCIES = 5
41 | # Reverso can start giving really weird frequencies. This cuts out any
42 | # frequencies relative to n * the highest frequency word. For example, if it's
43 | # 0.1, you can think that this means that any translation appearing < 10%
44 | # relative to the most common translation can be ignored.
45 | FREQUENCY_THRESHOLD = 0.1
46 |
47 | MAX_RETRIES = 5
48 |
49 | # Wait this long between each request to prevent getting blocked by reverso.
50 | SLEEP_THROTTLE_SEC = 1
51 | RETRY_WAIT_SEC = 60
52 |
53 | parser = OptionParser()
54 | parser.add_option("-s", "--source_lang", dest="source_lang",
55 | help="Source language code of words to read.")
56 | parser.add_option("-t", "--target_lang", dest="target_lang",
57 | help="Target language code.",
58 | default="en")
59 | parser.add_option("-q", "--queries", dest="query_file",
60 | help="Path to queries file", default="queries.txt")
61 | parser.add_option("-o", "--output", dest="output_file",
62 | help="Path to output file", default="reverso.csv")
63 | parser.add_option(
64 | "--prefer-short",
65 | action="store_true",
66 | dest="prefer_short",
67 | help="Sort example sentences by length, preferring shorter sentences",
68 | default=False)
69 |
70 | parser.add_option(
71 | "--keep-punctuation",
72 | action="store_true",
73 | dest="keep_punctuation",
74 | help="By default, script removes non-apostrophe punctuation because it Reverso matches the punctuation and it usually leads to bad examples. This flag keeps all punctuation",
75 | default=False)
76 |
77 |
78 | # frequencies is a (definition, count) pair that shows up at the top of the
79 | # Reverso UI, indicating what the most frequent translation is
80 | AnkiReversoNote = namedtuple(
81 | "AnkiReversoNote",
82 | ("query",
83 | "hints",
84 | "cloze_texts",
85 | "frequencies"))
86 |
87 |
88 | def reverso_note_to_csv(note: AnkiReversoNote) -> list[str]:
89 | """
90 | Processes an AnkiReversoNote into a single row of CSV output.
91 |
92 | See the README for an example of what this output would look like.
93 | """
94 | # Format: foo (0.5) where the number is the relative frequency to the most
95 | # common word. However, don't put any number next to the first word.
96 | freq_strs = []
97 | for i, f in enumerate(note.frequencies):
98 | if i == 0:
99 | freq_strs.append("%s" % f[0])
100 | else:
101 | highest_freq = note.frequencies[0][1]
102 | freq_strs.append("%s (%.2f)" % (f[0], f[1] / highest_freq))
103 |
104 | return [note.query, '
'.join(note.cloze_texts),
105 | ' | '.join(note.hints),
106 | # First one gets its own line, so no semicolon after it.
107 | freq_strs[0] + '; '.join(freq_strs[1:]) if freq_strs else ''
108 | ]
109 |
110 |
111 | def make_notes(queries, existing_notes, options) -> Generator[AnkiReversoNote,
112 | None, None]:
113 | """
114 | Main function for generating notes
115 | """
116 | bar = progress.bar.Bar('Processing', max=len(queries))
117 | for q in queries:
118 | bar.next()
119 | if q in existing_notes:
120 | continue
121 | # We need to normalize because of this article:
122 | # https://www.ojisanseiuchi.com/2021/05/08/encoding-of-the-cyrillic-letter-%D0%B9-a-utf-8-gotcha/
123 | # It doesn't handle the character й well, treating it as и + diacritic in a
124 | # lot of cases.
125 | # TODO: Move this into some other function.
126 | normalized = q.strip().lower().replace(u"\u0438\u0306", u"\u0439")
127 | if not options.keep_punctuation:
128 | normalized = formatting.strip_punctuation(normalized)
129 |
130 | api = context.ReversoContextAPI(
131 | normalized,
132 | "",
133 | options.source_lang,
134 | options.target_lang)
135 | # Rate limit to prevent getting blocked by Reverso.
136 | time.sleep(SLEEP_THROTTLE_SEC)
137 |
138 | note = AnkiReversoNote(
139 | query=q,
140 | hints=[],
141 | cloze_texts=[],
142 | frequencies=[])
143 |
144 | num_retries = 0
145 | while num_retries < MAX_RETRIES:
146 | try:
147 | if num_retries == MAX_RETRIES:
148 | raise Exception("Hit max number of retries.")
149 | translations = islice(
150 | api.get_translations(), 0, MAX_FREQUENCIES)
151 | if options.prefer_short:
152 | # Sort by the length of the sort text.
153 | examples = list(islice(api.get_examples(),
154 | 0,
155 | PREFER_SHORT_MAX_EXAMPLES))
156 | examples.sort(key=lambda s: len(s[0].text))
157 | examples = examples[:MAX_EXAMPLES]
158 | else:
159 | examples = islice(api.get_examples(), 0, MAX_EXAMPLES)
160 | num_retries += 1
161 | break
162 | except requests.exceptions.ConnectionError:
163 | logger.warning("Encountered a connection error. Retrying...")
164 | time.sleep(RETRY_WAIT_SEC)
165 |
166 | # Handle frequencies.
167 | highest_freq = None
168 | for i, translation in enumerate(translations):
169 | if i == 0:
170 | highest_freq = translation.frequency
171 | if translation.frequency > highest_freq * FREQUENCY_THRESHOLD:
172 | note.frequencies.append(
173 | (translation.translation, translation.frequency))
174 |
175 | # Handle examples.
176 | for source, target in examples:
177 | # Create the cloze part
178 | cloze = make_cloze(source.text, source.highlighted)
179 | note.cloze_texts.append(cloze)
180 |
181 | # Use the english translation in a list of hints. The hint won't be
182 | # colocated with the sentence but doesn't really matter.
183 | try:
184 | note.hints.append(
185 | get_highlighted(
186 | target.text,
187 | target.highlighted))
188 | except BaseException:
189 | logger.warning('Hint failed on ' + q)
190 |
191 | # Columns: Term, cloze, hint
192 | if len(note.cloze_texts) != 0:
193 | yield note
194 | else:
195 | # Simply skip the word for now, oh well.
196 | logger.warning("Nothing found on Reverso: " + q)
197 |
198 | bar.finish()
199 |
200 |
201 | (options, args) = parser.parse_args()
202 | if not options.source_lang:
203 | parser.error('No source language given.')
204 |
205 | # Mark the existing notes so that we can continue writing in case of large
206 | # jobs.
207 | existing_notes = set()
208 |
209 | if os.path.isfile(options.output_file):
210 | with open(options.output_file, 'r') as f:
211 | reader = csv.reader(f)
212 | for row in reader:
213 | existing_notes.add(row[0])
214 |
215 | with open(options.query_file, 'r') as f:
216 | queries = f.read().strip().split('\n')
217 |
218 | with open(options.output_file, 'a', newline='') as csvfile:
219 | reversowriter = csv.writer(csvfile)
220 | # Write from the generator as we receive results so that progress can be
221 | # saved.
222 | for note in make_notes(queries, existing_notes, options):
223 | row = reverso_note_to_csv(note)
224 | reversowriter.writerow(row)
225 |
--------------------------------------------------------------------------------