├── icons ├── add.png ├── keep.png ├── play.png ├── delete.png ├── blacklist.png ├── camb_icon.png ├── cambridge_icon.png ├── download_note_audio.png ├── download_side_audio.png └── download_audio_manual.png ├── __init__.py ├── _names.py ├── README.md ├── field_data.py ├── main.py ├── .gitattributes ├── styles.py ├── .github └── workflows │ └── main.yml ├── mediafile_utils.py ├── download_entry.py ├── .gitignore ├── get_fields.py ├── download.py ├── downloader.py ├── utils.py ├── Cambridge.py └── gui.py /icons/add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/add.png -------------------------------------------------------------------------------- /icons/keep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/keep.png -------------------------------------------------------------------------------- /icons/play.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/play.png -------------------------------------------------------------------------------- /icons/delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/delete.png -------------------------------------------------------------------------------- /icons/blacklist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/blacklist.png -------------------------------------------------------------------------------- /icons/camb_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/camb_icon.png -------------------------------------------------------------------------------- /icons/cambridge_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/cambridge_icon.png -------------------------------------------------------------------------------- /icons/download_note_audio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_note_audio.png -------------------------------------------------------------------------------- /icons/download_side_audio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_side_audio.png -------------------------------------------------------------------------------- /icons/download_audio_manual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_audio_manual.png -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # License: GNU AGPL, version 3 or later; 4 | # http://www.gnu.org/copyleft/agpl.html 5 | 6 | """ 7 | Anki-2 add-on to create notes from Cambridge Dictionary 8 | 9 | 10 | """ 11 | 12 | import sys 13 | from os.path import dirname, join 14 | 15 | sys.path.append(join(dirname(__file__), 'lib')) 16 | 17 | __version__ = "0.0.2" 18 | 19 | from . import main 20 | 21 | 22 | -------------------------------------------------------------------------------- /_names.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines constants and names 3 | 4 | """ 5 | 6 | 7 | 8 | LINK_DLG_NAME = 'link_dlg_name' 9 | WORD_DEFS_NAME = 'word_defs_name' 10 | CAMBRIDGE_MODEL = 'Cambridge Dictionary' 11 | ADDON_NAME = 'anki_cambridge' 12 | CREATE_NEW_NOTES_SHORTCUT = "Ctrl+l" 13 | 14 | # Request constants 15 | WORDLIST_URL = 'https://dictionary.cambridge.org/plus/wordlist' 16 | AUTH_URL = 'https://dictionary.cambridge.org/auth/signin?rid=1' 17 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36' 18 | CAMBRIDGE_API_BASE = 'https://dictionary.cambridge.org/plus/api/' 19 | URL_CAMBRIDGE_PLUS_BASE = 'https://dictionary.cambridge.org/plus/' 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Publish on ankiweb.net](https://github.com/am-silex/anki_cambridge/actions/workflows/main.yml/badge.svg) 2 | 3 | Anki Add-on for integration with Cambridge Dictionary website - https://dictionary.cambridge.org/ 4 | 5 | IMPORTANT: This add-on doesn't use official API - only web-scraping. 6 | 7 | ### What it's new: 8 | - Upgraded to Qt6 9 | - Creating notes from link to a word (word title, definition, grammar, IPA, sound, meanings, examples) 10 | - Fetching words from your word lists (with auto-deletion) 11 | - Config settings management - save cookie, word list IDs 12 | - Native authentication - through Cambridge account 13 | 14 | ### Auth with native authentication 15 | 16 | - Click on Sign in 17 | - Write your Cambridge email and password 18 | - Click on Log in 19 | - Once you are authenticated, you can close the web window 20 | - The cookie will be stored automatically in the configuration 21 | 22 | ### Development 23 | 24 | Official Anki Add-on writing [reference](https://addon-docs.ankiweb.net/intro.html) 25 | -------------------------------------------------------------------------------- /field_data.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # Copyright © 2015 Roland Sieker 4 | # 5 | # License: GNU AGPL, version 3 or later; 6 | # http://www.gnu.org/copyleft/agpl.html 7 | 8 | """ 9 | Two classes to store information for the downloader 10 | """ 11 | # 12 | from anki.sound import stripSounds 13 | from anki.template import furigana 14 | from anki.utils import stripHTML 15 | 16 | # Apparently some people use a 「・」 between the kana for different 17 | # kanji. Make it easier to switch removing them for the downloads on 18 | # or off 19 | strip_interpunct = False 20 | # Do or do not remove katakana interpuncts 「・」 before sending requests. 21 | 22 | 23 | class FieldData(object): 24 | def __init__(self, w_field, a_field, word): 25 | self.word_field_name = w_field 26 | self.audio_field_name = a_field 27 | # This is taken from aqt/browser.py. 28 | self.word = word.replace('
', ' ') 29 | self.word = self.word.replace('
', ' ') 30 | if strip_interpunct: 31 | self.word = self.word.replace('・', '') 32 | # self.word = stripHTML(self.word) 33 | # self.word = stripSounds(self.word) 34 | # Reformat so we have exactly one space between words. 35 | self.word = ' '.join(self.word.split()) 36 | 37 | @property 38 | def empty(self): 39 | return not self.word 40 | 41 | @property 42 | def split(self): 43 | return False 44 | 45 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # from PyQt6.QtGui import QIcon, QFont 2 | # from PyQt6.QtWidgets import * 3 | 4 | # from aqt import mw 5 | # from aqt.utils import tooltip 6 | # from anki.hooks import addHook 7 | 8 | from aqt import mw 9 | from aqt.qt import QAction, QMenu, QDialog 10 | from aqt.utils import showInfo 11 | 12 | from .gui import * 13 | from ._names import * 14 | 15 | from .Cambridge import CDDownloader 16 | 17 | 18 | def ask_user_for_link(): 19 | window = LinkDialogue() 20 | setattr(mw, LINK_DLG_NAME, window) 21 | r = window.exec() 22 | downloader = mw.cddownloader 23 | if r == QDialog.DialogCode.Accepted and downloader.word_data: 24 | sd = WordDefDialogue(downloader.word_data, downloader.word) 25 | sd.exec() 26 | sd = None 27 | 28 | 29 | def open_main_windows_addon(): 30 | window = AddonConfigWindow() 31 | window.exec() 32 | 33 | 34 | def parse_saved_wl(): 35 | mw.wl_pareser = WParseSavedWL() 36 | mw.wl_pareser.parse() 37 | 38 | 39 | mw.edit_cambridge_submenu = QMenu(u"&Cambridge Dictionary", mw) 40 | mw.form.menuEdit.addSeparator() 41 | mw.form.menuEdit.addMenu(mw.edit_cambridge_submenu) 42 | # Single word 43 | mw.create_notes_from_link_action = QAction(mw) 44 | mw.create_notes_from_link_action.setText("Create new note(s) from link") 45 | mw.create_notes_from_link_action.setToolTip("Fetch word definitions from provided link.") 46 | mw.create_notes_from_link_action.setShortcut(CREATE_NEW_NOTES_SHORTCUT) 47 | 48 | mw.create_notes_from_link_action.triggered.connect(ask_user_for_link) 49 | mw.edit_cambridge_submenu.addAction(mw.create_notes_from_link_action) 50 | 51 | # Word list - saved 52 | mw.parse_saved_wl_action = QAction(mw) 53 | mw.parse_saved_wl_action.setText("Fetch new words from user wordlists") 54 | mw.parse_saved_wl_action.setToolTip("Fetch new words from user wordlists") 55 | mw.parse_saved_wl_action.triggered.connect(parse_saved_wl) 56 | mw.edit_cambridge_submenu.addAction(mw.parse_saved_wl_action) 57 | 58 | # Addon settings 59 | mw.edit_cambridge_submenu.addSeparator() 60 | mw.open_main_windows_action = QAction(mw) 61 | mw.open_main_windows_action.setText("Cambridge Addon") 62 | mw.open_main_windows_action.setToolTip("Open Cambridge Addon main window.") 63 | 64 | mw.open_main_windows_action.triggered.connect(open_main_windows_addon) 65 | mw.edit_cambridge_submenu.addAction(mw.open_main_windows_action) 66 | 67 | mw.cddownloader = CDDownloader() 68 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto eol=lf 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | *.png binary 47 | 48 | ############################################################################### 49 | # diff behavior for common document formats 50 | # 51 | # Convert binary document formats to text before diffing them. This feature 52 | # is only available from the command line. Turn it on by uncommenting the 53 | # entries below. 54 | ############################################################################### 55 | #*.doc diff=astextplain 56 | #*.DOC diff=astextplain 57 | #*.docx diff=astextplain 58 | #*.DOCX diff=astextplain 59 | #*.dot diff=astextplain 60 | #*.DOT diff=astextplain 61 | #*.pdf diff=astextplain 62 | #*.PDF diff=astextplain 63 | #*.rtf diff=astextplain 64 | #*.RTF diff=astextplain 65 | -------------------------------------------------------------------------------- /styles.py: -------------------------------------------------------------------------------- 1 | model_css = ''' 2 | .card { 3 | font-family: arial; 4 | font-size: 20px; 5 | text-align: center; 6 | color: black; 7 | background-color: 8 | white; 9 | } 10 | .from { 11 | font-style: italic; 12 | }''' 13 | 14 | std_word = """ 15 |

{{Word}}

16 |
17 |
{{Grammar}} {{Pronunciation}}
18 |
19 |
{{Meaning}}
20 |
21 |
{{Examples}}
22 | """ 23 | 24 | std_word_def_sound = """ 25 |

{{Word}}

26 |
27 |
{{Grammar}} {{Pronunciation}}
28 |
29 |
{{Meaning}}
30 |
31 |
{{Examples}}
32 |
33 |
{{Picture}}
34 |
{{Definition}}
35 |
{{Audio}}
36 | {{SynonymAntonym}} 37 | """ 38 | 39 | std_def = """ 40 |
{{Picture}}
41 |
{{Definition}}
42 | """ 43 | 44 | std_def_word_sound = """ 45 |
{{Picture}}
46 |
{{Definition}}
47 |
48 |

{{Word}}

49 |
50 |
{{Grammar}} {{Pronunciation}}
51 |
52 |
{{Meaning}}
53 |
54 |
{{Examples}}
55 |
56 |
{{Audio}}
57 | {{SynonymAntonym}} 58 | """ 59 | 60 | std_sound = """ 61 |
Listen.{{Audio}}
62 | """ 63 | 64 | std_sound_word_def = """ 65 |
Listen.{{Audio}}
66 |
67 |

{{Word}}

68 |
69 |
{{Grammar}} {{Pronunciation}}
70 |
71 |
{{Picture}}
72 |
{{Meaning}}
73 |
74 |
{{Examples}}
75 | {{SynonymAntonym}} 76 | """ 77 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Publish on ankiweb.net 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v3 25 | - uses: actions/setup-python@v4 26 | 27 | - name: Archive all repo for sending 28 | run: | 29 | zip -r $GITHUB_WORKSPACE/$GITHUB_SHA.zip . -x '*.git*' '.github', 30 | ls -d -1 "$PWD/"**/ 31 | ls -l 32 | 33 | # - name: Get csrf token from Anki 34 | # id: authRequest 35 | # uses: fjogeleit/http-request-action@master 36 | # with: 37 | # url: 'https://ankiweb.net/account/login' 38 | # method: 'GET' 39 | 40 | # - name: Retrieve CSRF token 41 | # run: echo ${{ steps.authRequest.outputs.response }} | grep -oP 'csrf_token" value="\K([^"]+)' > token.txt 42 | 43 | - name: Authenticate with Anki 44 | env: 45 | ANKI_LOGIN: ${{ secrets.ANKILOGI_N }} 46 | ANKI_PASSWORD: ${{ secrets.ANKIPASSWORD }} 47 | 48 | run: | 49 | curl -d $'\n\u0010'$ANKI_LOGIN$'\u0012\u000'$ANKI_PASSWORD https://ankiweb.net/svc/account/login -c $GITHUB_WORKSPACE/cookie -b 'ankiweb=login' > null 50 | echo "ANKI_COOKIE="$(cat $GITHUB_WORKSPACE/cookie | grep -oP '^#HttpOnly_ankiweb.net.+ankiweb\s+\K.+$') >> $GITHUB_ENV 51 | 52 | 53 | - name: Upload to anki 54 | run: > 55 | curl 56 | -F 'title=Cambridge Dictionary' 57 | -F 'tags=cambridge, addon, word lists' 58 | -F 'supportURL=https://github.com/am-silex/anki_cambridge' 59 | -F 'minVer0=0' 60 | -F 'maxVer0=0' 61 | -F "desc=Anki Add-on for downloading words definitions and associating resources (pronunciation, images, etc) from Cambridge Dictionary website. What it's done (so far): Creating notes from link to a word (word title, definition, grammar, IPA, sound, meanings, examples) Fetching words from your word lists (with deleting words after successful fetching). Anyone willing to improve my idea and contribute is welcome on addon's GitHub: https://github.com/am-silex/anki_cambridge" 62 | -F 'id=1875288089' 63 | -F 'submit=Update' 64 | -F 'v21file0=@${{ github.workspace }}/${{ github.sha }}.zip' 65 | -b 'ankiweb=${{ env.ANKI_COOKIE }}' 66 | -v 67 | https://ankiweb.net/shared/upload 68 | 69 | -------------------------------------------------------------------------------- /mediafile_utils.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # Copyright © 2012–14 Roland Sieker 4 | # 5 | # Based on deurl-files.py by Damien Elmes 6 | # 7 | # License: GNU AGPL, version 3 or later; 8 | # http://www.gnu.org/copyleft/agpl.html 9 | # 10 | 11 | """ 12 | Helper function to deal with file names. 13 | """ 14 | 15 | import os 16 | import re 17 | import shutil 18 | import unicodedata 19 | 20 | from aqt import mw 21 | from anki.utils import isMac, stripHTML 22 | 23 | 24 | class dl_entry(): 25 | def __init__(self,abspath,base_name): 26 | self.base_name = base_name 27 | self.file_extension = os.path.splitext(abspath)[1].strip() 28 | self.file_path = os.path.abspath(abspath) 29 | 30 | def free_media_name(base, end): 31 | """Return a useful media name 32 | 33 | Return a pair of a file name that can be used for the media file, 34 | and the whole file path. The name is based on the base name and 35 | end, but doesn’t exist, nor does it clash with another file 36 | different only in upper/lower case. 37 | If no name can be found, a ValueError is raised. 38 | """ 39 | base = stripHTML(base) 40 | # Strip the ‘invalidFilenameChars’ by hand. 41 | base = re.sub(r'[\\/:\*?\'"<>\|]', '', base) 42 | base = unicodedata.normalize('NFC', base) 43 | # Looks like the normalization issue has finally been 44 | # solved. Always use NFC versions of file names now. 45 | mdir = mw.col.media.dir() 46 | if not exists_lc(mdir, base + end): 47 | return os.path.join(mdir, base + end), base + end 48 | for i in range(1, 10000): 49 | # Don't be silly. Give up after 9999 tries (by falling out of 50 | # this loop). 51 | long_name = '{0}_{1}{2}'.format(base, i, end) 52 | if not exists_lc(mdir, long_name): 53 | return os.path.join(mdir, long_name), long_name 54 | # The only way we can have arrived here is by unsuccessfully 55 | # trying the 10000 names. 56 | raise ValueError('Could not find free name.') 57 | 58 | def exists_lc(path, name): 59 | """Test if file name clashes with name of extant file. 60 | 61 | On Mac OS X we, simply check if the file exists. 62 | On other systems, we check if the name would clashes with an 63 | existing file’s name. That is, we check for files that have the same 64 | name when both are pulled to lower case and Unicode normalized. 65 | """ 66 | # The point is that like this syncing from Linux to Macs/Windows 67 | # and from Linux/Windows to Macs should work savely. Not much to 68 | # do on Macs, then. 69 | if isMac: 70 | return os.path.exists(os.path.join(path, name)) 71 | ln_name = unicodedata.normalize('NFC', name.lower()) 72 | for fname in os.listdir(path): 73 | if unicodedata.normalize('NFC', fname.lower()) == ln_name: 74 | return True 75 | # After the loop, none found. 76 | return False 77 | 78 | def unmunge_to_mediafile(dl_entry): 79 | """ 80 | Move the data to the media folder. 81 | 82 | Determine a free media name and move the data there from the 83 | tempfile. 84 | """ 85 | try: 86 | media_path, media_file_name = free_media_name( 87 | dl_entry.base_name, dl_entry.file_extension) 88 | shutil.copy(dl_entry.file_path, media_path) 89 | except : 90 | media_path ='' 91 | media_file_name ='' 92 | pass 93 | 94 | return media_file_name 95 | 96 | def get_file_entry(file,base_name): 97 | f_entry = dl_entry(file,base_name) 98 | return f_entry 99 | 100 | -------------------------------------------------------------------------------- /download_entry.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # Copyright © 2015 Paul Hartmann 4 | # Copyright © 2012–15 Roland Sieker 5 | # 6 | # License: GNU AGPL, version 3 or later; 7 | # http://www.gnu.org/copyleft/agpl.html 8 | 9 | import os 10 | 11 | from .blacklist import add_black_hash 12 | from .processors import processor 13 | from .mediafile_utils import unmunge_to_mediafile 14 | 15 | if processor: 16 | import pydub 17 | # See processors/__init__.py. We try the import there. If we have 18 | # a processor, this import should work. 19 | 20 | class DownloadEntry(object): 21 | """Data about a single file downloaded by a downloader""" 22 | def __init__(self, field_data, file_path, extras, icon): 23 | self.file_path = file_path 24 | # Absolute file path of the downloaded audio file 25 | self.word = field_data.word 26 | self.word_field_name = field_data.word_field_name 27 | self.audio_field_name = field_data.audio_field_name 28 | self.file_extension = '.mp3' 29 | # The file extension (with dot) 30 | self.extras = extras 31 | # A dict with strings of interesting informations, like 32 | # meaning numbers or name of speaker. Usually contains the 33 | # source. 34 | self.icon = icon 35 | # The downloader’s favicon 36 | self.action = Action.Add 37 | 38 | @property 39 | def display_word(self): 40 | return self.word 41 | 42 | @property 43 | def base_name(self): 44 | return self.word 45 | 46 | @property 47 | def entry_hash(self): 48 | return None 49 | # We are back to the way where the downloader checks 50 | # whether the file has a bad hash. This now doubles as the 51 | # old show_skull_and_bones. We show that button when this 52 | # has an interesting value. And that is set in JpodDownleadEntry 53 | 54 | def process(self): 55 | """Normalize &c. the audio file, if possible 56 | 57 | When we have an audio processor, process the file 58 | (i.e. normalize, remove silence, convert to preferred format) 59 | and update self. 60 | """ 61 | if processor: 62 | try: 63 | new_fp, new_sffx = processor.process(self) 64 | except pydub.exceptions.CouldntDecodeError: 65 | self.action = Action.Delete 66 | else: 67 | self.file_path = new_fp 68 | self.file_extension = new_sffx 69 | 70 | def dispatch(self, note): 71 | """Do what should be done with the downloaded file 72 | 73 | Depending on self.action, do that action. 74 | 75 | * That is, move the file do the media folder if we want it 76 | on the note or just want to keep it. 77 | * Add it to the note if that’s what we want. 78 | * Delete it if we want just delete or blacklist it. 79 | * Blacklist the hash if that’s what we want.""" 80 | if self.action == Action.Add or self.action == Action.Keep: 81 | media_fn = unmunge_to_mediafile(self) 82 | if self.action == Action.Add: 83 | note[self.audio_field_name] += '[sound:' + media_fn + ']' 84 | if self.action == Action.Delete or self.action == Action.Blacklist: 85 | os.remove(self.file_path) 86 | if self.action == Action.Blacklist: 87 | add_black_hash(self.entry_hash) 88 | 89 | 90 | class JpodDownloadEntry(DownloadEntry): 91 | """Data about a single file downloaded by a downloader""" 92 | def __init__( 93 | self, japanese_field_data, file_path, extras, icon, file_hash): 94 | DownloadEntry.__init__( 95 | self, japanese_field_data, file_path, extras, icon) 96 | self.kanji = japanese_field_data.kanji 97 | self.kana = japanese_field_data.kana 98 | self.hash_ = file_hash 99 | 100 | @property 101 | def base_name(self): 102 | if self.kana and self.kana != self.kanji: 103 | return u"{kanji}_{kana}".format(kanji=self.kanji, kana=self.kana) 104 | return u"{kanji}".format(kanji=self.kanji) 105 | 106 | @property 107 | def display_word(self): 108 | if self.kana and self.kana != self.kanji: 109 | return u"{kanji}({kana})".format(kanji=self.kanji, kana=self.kana) 110 | # N.B.: those are “full width” (CJK/quad) parentheses 111 | return u"{kanji}".format(kanji=self.kanji) 112 | 113 | @property 114 | def entry_hash(self): 115 | return self.hash_ 116 | 117 | 118 | class Action(object): 119 | Add, Keep, Delete, Blacklist = range(0, 4) 120 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | [Aa][Rr][Mm]/ 24 | [Aa][Rr][Mm]64/ 25 | bld/ 26 | [Bb]in/ 27 | [Oo]bj/ 28 | [Ll]og/ 29 | 30 | # Visual Studio 2015/2017 cache/options directory 31 | #.vs/ 32 | # Uncomment if you have tasks that create the project's static files in wwwroot 33 | #wwwroot/ 34 | 35 | # Visual Studio 2017 auto generated files 36 | Generated\ Files/ 37 | 38 | # MSTest test Results 39 | [Tt]est[Rr]esult*/ 40 | [Bb]uild[Ll]og.* 41 | 42 | # NUNIT 43 | *.VisualState.xml 44 | TestResult.xml 45 | 46 | # Build Results of an ATL Project 47 | [Dd]ebugPS/ 48 | [Rr]eleasePS/ 49 | dlldata.c 50 | 51 | 52 | # Benchmark Results 53 | BenchmarkDotNet.Artifacts/ 54 | 55 | # .NET Core 56 | project.lock.json 57 | project.fragment.lock.json 58 | artifacts/ 59 | 60 | # StyleCop 61 | StyleCopReport.xml 62 | 63 | # Files built by Visual Studio 64 | *_i.c 65 | *_p.c 66 | *_h.h 67 | *.ilk 68 | *.meta 69 | *.obj 70 | *.iobj 71 | *.pch 72 | *.pdb 73 | *.ipdb 74 | *.pgc 75 | *.pgd 76 | *.rsp 77 | *.sbr 78 | *.tlb 79 | *.tli 80 | *.tlh 81 | *.tmp 82 | *.tmp_proj 83 | *_wpftmp.csproj 84 | *.log 85 | *.vspscc 86 | *.vssscc 87 | .builds 88 | *.pidb 89 | *.svclog 90 | *.scc 91 | *.pyproj 92 | 93 | # Chutzpah Test files 94 | _Chutzpah* 95 | 96 | # Visual C++ cache files 97 | ipch/ 98 | *.aps 99 | *.ncb 100 | *.opendb 101 | *.opensdf 102 | *.sdf 103 | *.cachefile 104 | *.VC.db 105 | *.VC.VC.opendb 106 | 107 | # Visual Studio profiler 108 | *.psess 109 | *.vsp 110 | *.vspx 111 | *.sap 112 | 113 | # Visual Studio Trace Files 114 | *.e2e 115 | 116 | # TFS 2012 Local Workspace 117 | $tf/ 118 | 119 | # Guidance Automation Toolkit 120 | *.gpState 121 | 122 | # ReSharper is a .NET coding add-in 123 | _ReSharper*/ 124 | *.[Rr]e[Ss]harper 125 | *.DotSettings.user 126 | 127 | # JustCode is a .NET coding add-in 128 | .JustCode 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # The packages folder can be ignored because of Package Restore 188 | **/[Pp]ackages/* 189 | # except build/, which is used as an MSBuild target. 190 | !**/[Pp]ackages/build/ 191 | # Uncomment if necessary however generally it will be regenerated when needed 192 | #!**/[Pp]ackages/repositories.config 193 | # NuGet v3's project.json files produces more ignorable files 194 | *.nuget.props 195 | *.nuget.targets 196 | 197 | # Microsoft Azure Build Output 198 | csx/ 199 | *.build.csdef 200 | 201 | # Microsoft Azure Emulator 202 | ecf/ 203 | rcf/ 204 | 205 | # Windows Store app package directories and files 206 | AppPackages/ 207 | BundleArtifacts/ 208 | Package.StoreAssociation.xml 209 | _pkginfo.txt 210 | *.appx 211 | 212 | # Visual Studio cache files 213 | # files ending in .cache can be ignored 214 | *.[Cc]ache 215 | # but keep track of directories ending in .cache 216 | !?*.[Cc]ache/ 217 | 218 | # Others 219 | ClientBin/ 220 | ~$* 221 | *~ 222 | *.dbmdl 223 | *.dbproj.schemaview 224 | *.jfm 225 | *.pfx 226 | *.publishsettings 227 | orleans.codegen.cs 228 | 229 | # Including strong name files can present a security risk 230 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 231 | #*.snk 232 | 233 | # Since there are multiple workflows, uncomment next line to ignore bower_components 234 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 235 | #bower_components/ 236 | 237 | # RIA/Silverlight projects 238 | Generated_Code/ 239 | 240 | # Backup & report files from converting an old project file 241 | # to a newer Visual Studio version. Backup files are not needed, 242 | # because we have git ;-) 243 | _UpgradeReport_Files/ 244 | Backup*/ 245 | UpgradeLog*.XML 246 | UpgradeLog*.htm 247 | ServiceFabricBackup/ 248 | *.rptproj.bak 249 | 250 | # SQL Server files 251 | *.mdf 252 | *.ldf 253 | *.ndf 254 | 255 | # Business Intelligence projects 256 | *.rdl.data 257 | *.bim.layout 258 | *.bim_*.settings 259 | *.rptproj.rsuser 260 | *- Backup*.rdl 261 | 262 | # Microsoft Fakes 263 | FakesAssemblies/ 264 | 265 | # GhostDoc plugin setting file 266 | *.GhostDoc.xml 267 | 268 | # Node.js Tools for Visual Studio 269 | .ntvs_analysis.dat 270 | node_modules/ 271 | 272 | # Visual Studio 6 build log 273 | *.plg 274 | 275 | # Visual Studio 6 workspace options file 276 | *.opt 277 | 278 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 279 | *.vbw 280 | 281 | # Visual Studio LightSwitch build output 282 | **/*.HTMLClient/GeneratedArtifacts 283 | **/*.DesktopClient/GeneratedArtifacts 284 | **/*.DesktopClient/ModelManifest.xml 285 | **/*.Server/GeneratedArtifacts 286 | **/*.Server/ModelManifest.xml 287 | _Pvt_Extensions 288 | 289 | # Paket dependency manager 290 | .paket/paket.exe 291 | paket-files/ 292 | 293 | # FAKE - F# Make 294 | .fake/ 295 | 296 | # JetBrains Rider 297 | .idea/ 298 | *.sln.iml 299 | 300 | # CodeRush personal settings 301 | .cr/personal 302 | 303 | # Python Tools for Visual Studio (PTVS) 304 | __pycache__/ 305 | *.pyc 306 | 307 | # Cake - Uncomment if you are using it 308 | # tools/** 309 | # !tools/packages.config 310 | 311 | # Tabs Studio 312 | *.tss 313 | 314 | # Telerik's JustMock configuration file 315 | *.jmconfig 316 | 317 | # BizTalk build output 318 | *.btp.cs 319 | *.btm.cs 320 | *.odx.cs 321 | *.xsd.cs 322 | 323 | # OpenCover UI analysis results 324 | OpenCover/ 325 | 326 | # Azure Stream Analytics local run output 327 | ASALocalRun/ 328 | 329 | # MSBuild Binary and Structured Log 330 | *.binlog 331 | 332 | # NVidia Nsight GPU debugger configuration file 333 | *.nvuser 334 | 335 | # MFractors (Xamarin productivity tool) working folder 336 | .mfractor/ 337 | 338 | # Local History for Visual Studio 339 | .localhistory/ 340 | 341 | # BeatPulse healthcheck temp database 342 | healthchecksdb 343 | 344 | #User-defined files 345 | snippets.txt 346 | _old/ 347 | .vs 348 | env/ 349 | *.sln 350 | config.json 351 | meta.json 352 | word_model.txt 353 | .gitattributes 354 | .gitignore 355 | snippets.txt -------------------------------------------------------------------------------- /get_fields.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # Copyright © 2012–15 Roland Sieker 4 | # 5 | # License: GNU AGPL, version 3 or later; 6 | # http://www.gnu.org/copyleft/agpl.html 7 | 8 | """ 9 | Extract field data to download. 10 | """ 11 | 12 | from collections import namedtuple 13 | import re 14 | 15 | from aqt import mw 16 | 17 | from .field_data import FieldData, JapaneseFieldData 18 | 19 | 20 | # # Change these to mach the field names of your decks. Make sure to 21 | # # not use capital letters. We compare these to lower-case only 22 | # # versions of the field names. When these lists contain upper-case 23 | # # letters, no field will ever be matched and nothing will be 24 | # # downloaded. 25 | expression_fields = ['expression', 'word'] 26 | # Fields we get the ‘normal’ download text from. 27 | # 28 | # Text from these fields is used by most downloaders. When no field is 29 | # found here, we use the first field. 30 | 31 | 32 | reading_keys = ['reading', 'kana', 'かな', '仮名'] 33 | # Fields we get our Japanese text from. 34 | # 35 | # For Japanesepod we use these fields as source. A ‘Reading’ field is 36 | # typically filled automatically by the Japanese Support add-on in a 37 | # useful way (that is, with the reading in square brackets). 38 | 39 | 40 | audio_field_keys = ['audio', 'sound'] 41 | # Fields we put our downloaded sounds in. Don’t try crazy stuff here. 42 | 43 | split_kanji_kana = False 44 | # Replace ‘False’ with ‘True’ when you have no kanji in your reading field 45 | 46 | # Change this at your own risk. 47 | field_name_re = r'{{(?:[/^#]|[^:}]+:|)([^:}{]*%s[^:}{]*)}}' 48 | 49 | 50 | def uniqify_list(seq): 51 | """Return a copy of the list with every element appearing only once.""" 52 | # From http://www.peterbe.com/plog/uniqifiers-benchmark 53 | no_dupes = [] 54 | [no_dupes.append(i) for i in seq if not no_dupes.count(i)] 55 | return no_dupes 56 | 57 | 58 | def field_data(note, audio_field, reading=False): 59 | """Return FieldData when we have a source field 60 | 61 | Return FieldData when we have a matching source field for our 62 | audio field. """ 63 | def return_data(idx): 64 | source_name = field_names[idx] 65 | if reading: 66 | return JapaneseFieldData( 67 | source_name, audio_field, note[source_name]) 68 | else: 69 | return FieldData( 70 | source_name, audio_field, note[source_name]) 71 | 72 | a_name = audio_field.lower() 73 | field_names = [item[0] for item in note.items()] 74 | f_names = [fn.lower() for fn in field_names] 75 | # First, look for just audio fields 76 | for afk in audio_field_keys: 77 | if a_name == afk: 78 | if reading: 79 | sources_list = reading_keys 80 | else: 81 | sources_list = expression_fields 82 | for cnd in sources_list: 83 | for idx, lname in enumerate(f_names): 84 | if cnd == lname: 85 | return return_data(idx) 86 | # At this point: The target name is good, but we found no 87 | # source name. 88 | if not reading: 89 | # Don't give for most languages. Simply use the first 90 | # field. That should work for a lot of people 91 | return return_data(0) 92 | else: 93 | # But that doesn't really work for Japanese. 94 | raise KeyError('No source name found (case 1)') 95 | # This point: target name is not exactly the field name 96 | if afk not in a_name: 97 | # And not a substring either 98 | continue 99 | # Here: the field name contains an audio or sound. 100 | # Mangle the name as described. For the reading case we get a 101 | # list. So do a list for the other case as well. 102 | if reading: 103 | sources_list = [a_name.replace(afk, rk) for rk in reading_keys] 104 | else: 105 | # Here the tricky bit is to remove the right number of '_' 106 | # or ' ' characters, 0 or 1, but not 2. What we want is: 107 | # ExampleAudio -> Example 108 | # Example_Audio -> Example 109 | # Audio_Example -> Example 110 | # but 111 | # Another_Audio_Example -> Another_Example, not Another_Example 112 | # While a bit tricky, this is not THAT hard to do. (Not 113 | # lookbehind needed.) 114 | sources_list = [ 115 | re.sub(r'[\s_]{0}|{0}[\s_]?'.format(re.escape(afk)), 116 | '', a_name, count=1, flags=re.UNICODE)] 117 | for cnd in sources_list: 118 | for idx, lname in enumerate(f_names): 119 | if cnd == lname: 120 | return return_data(idx) 121 | # We do have audio or sound as sub-string but did not find a 122 | # maching field. 123 | raise KeyError('No source field found. (case 2)') 124 | # No audio field at all. 125 | raise KeyError('No source field found. (case 3)') 126 | 127 | 128 | def field_data_from_kanji_kana(note, fn): 129 | # Do the search twice 130 | base_fd = field_data(note, fn) 131 | # base_fd contains the kanji 132 | read_fd = field_data(note, fn, True) 133 | # read_fd is the right type but needs to be updated. 134 | read_fd.kanji = base_fd.word 135 | read_fd.word = base_fd.word # Not used, Set anyway. 136 | read_fd.word_field_name = base_fd.word_field_name 137 | return read_fd 138 | 139 | 140 | def get_side_fields(card, note): 141 | """Return a list of FieldDatas for the currently visible side 142 | 143 | Go through the fields of the currently visible side and return 144 | relevant data, as FieldData objects, for audio fields where we 145 | have matching text fields.""" 146 | if 'question' == mw.reviewer.state: 147 | template = card.template()['qfmt'] 148 | else: 149 | template = card.template()['afmt'] 150 | audio_field_names = [] 151 | all_field_names = [item[0] for item in note.items()] 152 | for afk in audio_field_keys: 153 | # Append all fields in the current template/side that contain 154 | # 'audio' or 'sound' 155 | audio_field_names += re.findall( 156 | field_name_re % afk, template, flags=re.IGNORECASE) 157 | # We use the (old style) % operator rather than 158 | # unicode.format() because we look for {}s in the re, which 159 | # would get more complicated with format(). 160 | audio_field_names = uniqify_list(audio_field_names) 161 | # Filter out non-existing fields. 162 | audio_field_names = [ 163 | fn for fn in audio_field_names if fn in all_field_names] 164 | field_data_list = [] 165 | for audio_field in audio_field_names: 166 | try: 167 | field_data_list.append(field_data(note, audio_field)) 168 | except (KeyError, ValueError): 169 | # No or empty reading field 170 | pass 171 | if not split_kanji_kana: 172 | try: 173 | field_data_list.append( 174 | field_data(note, audio_field, reading=True)) 175 | except (KeyError, ValueError): 176 | pass 177 | else: 178 | try: 179 | field_data_list.append( 180 | field_data_from_kanji_kana(note, audio_field)) 181 | except (KeyError, ValueError): 182 | pass 183 | return field_data_list 184 | 185 | 186 | def get_note_fields(note): 187 | """Return a list of FieldDatas for the note 188 | 189 | Go through the note’s fields and return relevant data, as 190 | FieldData objects, for audio fields where we have matching text 191 | fields.""" 192 | field_names = [item[0] for item in note.items()] 193 | field_data_list = [] 194 | for afk in audio_field_keys: 195 | for fn in field_names: 196 | if afk not in fn.lower(): 197 | continue 198 | if not split_kanji_kana: 199 | try: 200 | field_data_list.append(field_data(note, fn, reading=True)) 201 | except (KeyError, ValueError): 202 | # No or empty source field. 203 | pass 204 | else: 205 | try: 206 | field_data_list.append( 207 | field_data_from_kanji_kana(note, fn)) 208 | except (KeyError, ValueError): 209 | # No or empty source field. 210 | pass 211 | try: 212 | field_data_list.append(field_data(note, fn)) 213 | except (KeyError, ValueError): 214 | # No or empty source field. 215 | pass 216 | return field_data_list 217 | -------------------------------------------------------------------------------- /download.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # License: GNU AGPL, version 3 or later; 4 | # http://www.gnu.org/copyleft/agpl.html 5 | 6 | """ 7 | Anki2 add-on to download card's fields with audio from Cambridge Dictionary 8 | 9 | 10 | """ 11 | 12 | import os 13 | from PyQt6.QtGui import QIcon 14 | from PyQt6.QtWidgets import QAction, QMenu 15 | 16 | 17 | from aqt import mw 18 | from aqt.utils import tooltip 19 | from anki.hooks import addHook 20 | 21 | from .downloaders import downloaders 22 | from .download_entry import DownloadEntry, Action 23 | from .get_fields import get_note_fields, get_side_fields 24 | from .language import language_code_from_card, language_code_from_editor 25 | from .processors import processor 26 | from .review_gui import review_entries 27 | from .update_gui import update_data 28 | 29 | CREATE_NEW_NOTE_SHORTCUT = "t" 30 | # DOWNLOAD_SIDE_SHORTCUT = "t" 31 | # DOWNLOAD_MANUAL_SHORTCUT = "Ctrl+t" 32 | 33 | icons_dir = os.path.join(mw.pm.addonFolder(), 'downloadaudio', 'icons') 34 | # Place were we keep our megaphone icon. 35 | 36 | 37 | def do_download(note, field_data_list, language, hide_text=False): 38 | """ 39 | Download audio data. 40 | 41 | Go through the list of words and list of sites and download each 42 | word from each site. Then call a function that asks the user what 43 | to do. 44 | """ 45 | retrieved_entries = [] 46 | for field_data in field_data_list: 47 | if field_data.empty: 48 | continue 49 | for dloader in downloaders: 50 | # Use a public variable to set the language. 51 | dloader.language = language 52 | try: 53 | # Make it easer inside the downloader. If anything 54 | # goes wrong, don't catch, or raise whatever you want. 55 | dloader.download_files(field_data) 56 | except: 57 | # # Uncomment this raise while testing a new 58 | # # downloaders. Also use the “For testing” 59 | # # downloaders list with your downloader in 60 | # # downloaders.__init__ 61 | # raise 62 | continue 63 | retrieved_entries += dloader.downloads_list 64 | # Significantly changed the logic. Put all entries in one 65 | # list, do stuff with that list of DownloadEntries. 66 | for entry in retrieved_entries: 67 | # Do the processing before the reviewing now. 68 | entry.process() 69 | try: 70 | retrieved_entries = review_entries(note, retrieved_entries, hide_text) 71 | # Now just the dialog, which sets the fields in the entries 72 | except ValueError as ve: 73 | tooltip(str(ve)) 74 | except RuntimeError as rte: 75 | if 'cancel' in str(rte): 76 | for entry in retrieved_entries: 77 | entry.action = Action.Delete 78 | else: 79 | raise 80 | for entry in retrieved_entries: 81 | entry.dispatch(note) 82 | if any(entry.action == Action.Add for entry in retrieved_entries): 83 | note.flush() 84 | # We have to do different things here, for download during 85 | # review, we should reload the card and replay. When we are in 86 | # the add dialog, we do a field update there. 87 | rnote = None 88 | try: 89 | rnote = mw.reviewer.card.note() 90 | except AttributeError: 91 | # Could not get the note of the reviewer's card. Probably 92 | # not reviewing at all. 93 | return 94 | if note == rnote: 95 | # The note we have is the one we were reviewing, so, 96 | # reload and replay 97 | mw.reviewer.card.load() 98 | mw.reviewer.replayAudio() 99 | 100 | 101 | def download_for_side(): 102 | """ 103 | Download audio for one side. 104 | 105 | Download audio for all audio fields on the currently visible card 106 | side. 107 | """ 108 | card = mw.reviewer.card 109 | if not card: 110 | return 111 | note = card.note() 112 | field_data = get_side_fields(card, note) 113 | do_download( 114 | note, field_data, language_code_from_card(card), hide_text=True) 115 | 116 | 117 | def download_for_note(ask_user=False, note=None, editor=None): 118 | """ 119 | Download audio for all fields. 120 | 121 | Download audio for all fields of the note passed in or the current 122 | note. When ask_user is true, show a dialog that lets the user 123 | modify these texts. 124 | """ 125 | if not note: 126 | try: 127 | card = mw.reviewer.card 128 | note = card.note() 129 | except AttributeError: 130 | return 131 | language_code = language_code_from_card(card) 132 | else: 133 | language_code = language_code_from_editor(note, editor) 134 | field_data = get_note_fields(note) 135 | if not field_data: 136 | # Complain before we show the empty dialog. 137 | tooltip('Nothing to download.') 138 | return 139 | 140 | if ask_user: 141 | try: 142 | field_data, language_code = update_data(field_data, language_code) 143 | except RuntimeError as rte: 144 | if 'cancel' in str(rte): 145 | # User canceled. No need for the "Nothing downloaded" 146 | # message. 147 | return 148 | else: 149 | # Don't know how to handle this after all 150 | raise 151 | do_download(note, field_data, language_code) 152 | 153 | 154 | def download_manual(): 155 | """Do the download with the dialog before we go.""" 156 | download_for_note(ask_user=True) 157 | 158 | 159 | def download_off(): 160 | """Deactivate the download menus.""" 161 | mw.note_download_action.setEnabled(False) 162 | mw.side_download_action.setEnabled(False) 163 | mw.manual_download_action.setEnabled(False) 164 | 165 | 166 | def download_on(): 167 | """Activate the download menus.""" 168 | mw.note_download_action.setEnabled(True) 169 | mw.side_download_action.setEnabled(True) 170 | mw.manual_download_action.setEnabled(True) 171 | 172 | 173 | def editor_download_editing(self): 174 | """Do the download when we are in the note editor.""" 175 | self.saveNow() 176 | download_for_note(ask_user=True, note=self.note, editor=self) 177 | # Fix for issue #10. 178 | self.stealFocus = True 179 | self.loadNote() 180 | self.stealFocus = False 181 | 182 | 183 | def editor_add_download_editing_button(self): 184 | """Add the download button to the editor""" 185 | dl_button = self._addButton( 186 | "download_audio", 187 | lambda self=self: editor_download_editing(self), 188 | tip=u"Download audio…") 189 | dl_button.setIcon( 190 | QIcon(os.path.join(icons_dir, 'download_note_audio.png'))) 191 | 192 | 193 | # Either reuse an edit-media sub-menu created by another add-on 194 | # (probably the mhwave (ex sweep) add-on by Y.T.) or create that 195 | # menu. When we already have that menu, add a separator, otherwise 196 | # create that menu. 197 | 198 | # try: 199 | # mw.edit_media_submenu.addSeparator() 200 | # except AttributeError: 201 | mw.edit_cambridge_submenu = QMenu(u"&Cambridge Dictionary", mw) 202 | mw.form.menuEdit.addSeparator() 203 | mw.form.menuEdit.addMenu(mw.edit_cambridge_submenu) 204 | 205 | 206 | mw.note_crattion_action = QAction(mw) 207 | mw.note_crattion_action.setText(u"Create new note fromm link") 208 | # mw.note_download_action.setIcon(QIcon(os.path.join(icons_dir, 209 | # 'download_note_audio.png'))) 210 | mw.note_crattion_action.setToolTip( 211 | "Create a new card, providing ling first.") 212 | mw.note_crattion_action.setShortcut(CREATE_NEW_NOTE_SHORTCUT) 213 | mw.note_crattion_action.triggered.connect(download_for_note) 214 | 215 | # mw.side_download_action = QAction(mw) 216 | # mw.side_download_action.setText(u"Side audio") 217 | # mw.side_download_action.setIcon( 218 | # QIcon(os.path.join(icons_dir, 'download_side_audio.png'))) 219 | # mw.side_download_action.setToolTip( 220 | # "Download audio for audio fields currently visible.") 221 | # mw.side_download_action.setShortcut(DOWNLOAD_SIDE_SHORTCUT) 222 | # mw.side_download_action.triggered.connect(download_for_side) 223 | # 224 | # mw.manual_download_action = QAction(mw) 225 | # mw.manual_download_action.setText(u"Manual audio") 226 | # mw.manual_download_action.setIcon( 227 | # QIcon(os.path.join(icons_dir, 'download_audio_manual.png'))) 228 | # mw.manual_download_action.setToolTip( 229 | # "Download audio, editing the information first.") 230 | # mw.manual_download_action.setShortcut(DOWNLOAD_MANUAL_SHORTCUT) 231 | # mw.manual_download_action.triggered.connect(download_manual) 232 | 233 | 234 | mw.edit_media_submenu.addAction(mw.note_crattion_action) 235 | 236 | # Todo: switch off at start and on when we get to reviewing. 237 | # # And start with the acitons off. 238 | # download_off() 239 | 240 | 241 | # addHook("setupEditorButtons", editor_add_download_editing_button) 242 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python; coding: utf-8 -*- 2 | # 3 | # Copyright © 2012–15 Roland Sieker 4 | # Copyright © 2015 Paul Hartmann 5 | # 6 | # License: GNU AGPL, version 3 or later; 7 | # http://www.gnu.org/copyleft/agpl.html 8 | 9 | 10 | ''' 11 | Class to download a files from a speaking dictionary or TTS service. 12 | ''' 13 | 14 | 15 | import tempfile 16 | import urllib.request, urllib.error, urllib.parse 17 | import urllib.parse 18 | from bs4 import BeautifulSoup as soup 19 | from field_data import FieldData 20 | 21 | # Make this work without PyQt 22 | with_pyqt = True 23 | try: 24 | from PyQt6.QtGui import QImage 25 | from PyQt6.QtCore import QSize, Qt 26 | except ImportError: 27 | with_pyqt = False 28 | 29 | 30 | def uniqify_list(seq): 31 | """Return a copy of the list with every element appearing only once.""" 32 | # From http://www.peterbe.com/plog/uniqifiers-benchmark 33 | no_dupes = [] 34 | [no_dupes.append(i) for i in seq if not no_dupes.count(i)] 35 | return no_dupes 36 | 37 | 38 | class AudioDownloader(object): 39 | """ 40 | Class to download a files from a dictionary or TTS service. 41 | 42 | This is the base class for the downloaders of spoken 43 | pronunciations. 44 | 45 | The derived classes must implement self.download_files() 46 | """ 47 | def __init__(self): 48 | self.language = '' 49 | # The language used. 50 | # This is used as a public variable and set for every download. 51 | self.downloads_list = [] 52 | # Store for downloaded data. 53 | # This is where self.download_files should store the results 54 | # (type DownloadEntry). 55 | self.url = '' 56 | # The base URL used for (the first step of) the download. 57 | self.icon_url = '' 58 | # URL to get the address of the site icon from. 59 | self.max_icon_size = 20 60 | # Max size we scale the site icon down to, if larger. 61 | self.user_agent = 'Mozilla/5.0' 62 | # User agent string that can be used for requests. 63 | # At least Google TTS won’t give out their translations unless 64 | # we pretend to be some typical browser. 65 | self.use_temp_files = False 66 | # Whether to use files created by tempfiles or not. 67 | # Where to write the downloaded files, in /tmp/ or into the Anki 68 | # media directory directly. 69 | # This is set to True by the “real” audio processor that does 70 | # normalization but doesn’t work for standard installs. On 71 | # typical installs this is kept False.) 72 | self.download_directory = None 73 | # Where to write the downloaded files. 74 | # If this is None or empty (i.e. “if not 75 | # self.download_directory”) (and self.use_temp_files == 76 | # False) we use the current directory. 77 | self.site_icon = None 78 | # The sites’s favicon. 79 | self.file_extension = '.mp3' 80 | # Most sites have mp3 files. 81 | self.word_data = [] 82 | # List contains dictionary(s) of word's definition(s) 83 | # with fields: word_title, word_gram, word_pro_uk, word_pro_us, meanings 84 | self.base_url = '' 85 | # Base url to Cambridge Dictionary - 86 | 87 | def get_word_data(self, field_data): 88 | # Get data from Cambd and fill in word_data dict 89 | raise NotImplementedError("Use a class derived from this.") 90 | 91 | def download_files(self, field_data): 92 | """Downloader functon 93 | 94 | This is the main worker function. It has to be reimplemented 95 | by the derived classes. 96 | 97 | The input is the text to use for the download, the whole text 98 | (for most languages) and the text split into base (kanji) and 99 | ruby (reading, kana). split is set to true when we got the 100 | text from a reading field and should use base and ruby rather 101 | than word. 102 | 103 | This function should clear the self.downloads_list and try to 104 | get pronunciation files from its source, put those into tempfiles, 105 | and add a DownloadEntry object to self_downloads_lists for each of 106 | the zero or more downloaded files. (Zero when the 107 | self.language is wrong, there is no file &c.) 108 | 109 | """ 110 | raise NotImplementedError("Use a class derived from this.") 111 | 112 | def maybe_get_icon(self): 113 | """ 114 | Get icon for the site as a QImage if we haven’t already. 115 | 116 | Get the site icon, either the 'rel="icon"' or the favicon, for 117 | the web page at url or passed in as page_html and store it as 118 | a QImage. This function can be called repeatedly and loads the 119 | icon only once. 120 | """ 121 | if self.site_icon: 122 | return 123 | if not with_pyqt: 124 | self.site_icon = None 125 | return 126 | page_request = urllib.request.Request(self.icon_url) 127 | if self.user_agent: 128 | page_request.add_header('User-agent', self.user_agent) 129 | page_response = urllib.request.urlopen(page_request) 130 | if 200 != page_response.code: 131 | self.get_favicon() 132 | return 133 | page_soup = soup(page_response, 'html.parser') 134 | try: 135 | icon_url = page_soup.find( 136 | name='link', attrs={'rel': 'icon'})['href'] 137 | except (TypeError, KeyError): 138 | self.get_favicon() 139 | return 140 | # The url may be absolute or relative. 141 | if not urllib.parse.urlsplit(icon_url).netloc: 142 | icon_url = urllib.parse.urljoin( 143 | self.url, urllib.parse.quote(icon_url.encode('utf-8'))) 144 | icon_request = urllib.request.Request(icon_url) 145 | if self.user_agent: 146 | icon_request.add_header('User-agent', self.user_agent) 147 | icon_response = urllib.request.urlopen(icon_request) 148 | if 200 != icon_response.code: 149 | self.site_icon = None 150 | return 151 | self.site_icon = QImage.fromData(icon_response.read()) 152 | max_size = QSize(self.max_icon_size, self.max_icon_size) 153 | icon_size = self.site_icon.size() 154 | if icon_size.width() > max_size.width() \ 155 | or icon_size.height() > max_size.height(): 156 | self.site_icon = self.site_icon.scaled( 157 | max_size, Qt.KeepAspectRatio, Qt.SmoothTransformation) 158 | 159 | def get_favicon(self): 160 | """ 161 | Get favicon for the site. 162 | 163 | This is called when the site_url can’t be loaded or when that 164 | page doesn’t contain a link tag with rel set to icon (the new 165 | way of doing site icons.) 166 | """ 167 | if self.site_icon: 168 | return 169 | if not with_pyqt: 170 | self.site_icon = None 171 | return 172 | ico_url = urllib.parse.urljoin(self.icon_url, "/favicon.ico") 173 | ico_request = urllib.request.Request(ico_url) 174 | if self.user_agent: 175 | ico_request.add_header('User-agent', self.user_agent) 176 | ico_response = urllib.request.urlopen(ico_request) 177 | if 200 != ico_response.code: 178 | self.site_icon = None 179 | return 180 | self.site_icon = QImage.fromData(ico_response.read()) 181 | max_size = QSize(self.max_icon_size, self.max_icon_size) 182 | ico_size = self.site_icon.size() 183 | if ico_size.width() > max_size.width() \ 184 | or ico_size.height() > max_size.height(): 185 | self.site_icon = self.site_icon.scaled( 186 | max_size, Qt.KeepAspectRatio, Qt.SmoothTransformation) 187 | 188 | def get_data_from_url(self, url_in): 189 | """ 190 | Return raw data loaded from an URL. 191 | 192 | Helper function. Put in an URL and it sets the agent, sends 193 | the requests, checks that we got error code 200 and returns 194 | the raw data only when everything is OK. 195 | """ 196 | request = urllib.request.Request(url_in) 197 | request.add_header('User-agent', self.user_agent) 198 | response = urllib.request.urlopen(request) 199 | if 200 != response.code: 200 | raise ValueError(str(response.code) + ': ' + response.msg) 201 | return response.read() 202 | 203 | def get_tempfile_from_url(self, url_in): 204 | """ 205 | Download raw data from url and put into a tempfile 206 | 207 | Wrapper helper function aronud self.get_data_from_url(). 208 | """ 209 | data = self.get_data_from_url(url_in) 210 | # We put the data into RAM first so that we don’t have to 211 | # clean up the temp file when the get does not work. (Bad 212 | # get_data raises all kinds of exceptions that fly through 213 | # here.) 214 | tfile = tempfile.NamedTemporaryFile( 215 | delete=False, prefix='anki_audio_', suffix=self.file_extension) 216 | tfile.write(data) 217 | tfile.close() 218 | return tfile.name 219 | 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from random import randint 3 | import json 4 | import socket 5 | import ssl 6 | 7 | from aqt import mw 8 | from anki import notes 9 | 10 | from . import styles 11 | from ._names import * 12 | from .mediafile_utils import * 13 | 14 | from PyQt6.QtWidgets import QMessageBox 15 | 16 | 17 | fields = ['Word','Examples','Definition','Audio','Picture','Pronunciation','Grammar','Meaning','SynonymAntonym'] 18 | 19 | def fill_note(word_entry, note): 20 | 21 | config = get_config() 22 | pronunciation_uk = config['pronunciation_uk'] if 'pronunciation_uk' in config else True 23 | pronunciation_us = config['pronunciation_us'] if 'pronunciation_us' in config else True 24 | 25 | note['Word'] = word_entry.word_title 26 | note['Examples'] = "
".join(word_entry.word_examples) 27 | note['Definition'] = word_entry.word_specific 28 | note['Pronunciation'] = "" 29 | if pronunciation_uk: 30 | note['Pronunciation'] += word_entry.word_pro_uk + ' ' 31 | if pronunciation_us: 32 | note['Pronunciation'] += word_entry.word_pro_us 33 | note['Grammar'] = word_entry.word_part_of_speech 34 | note['Meaning'] = word_entry.word_general 35 | audio_field = '' 36 | if word_entry.word_uk_media and pronunciation_uk: 37 | try: 38 | f_entry = get_file_entry(word_entry.word_uk_media,note['Word']) 39 | audio_field = audio_field + '[sound:' + unmunge_to_mediafile(f_entry)+'] ' 40 | except : 41 | pass 42 | try: 43 | if word_entry.word_us_media and pronunciation_us: 44 | f_entry = get_file_entry(word_entry.word_us_media,note['Word']) 45 | audio_field = audio_field + '[sound:' + unmunge_to_mediafile(f_entry)+'] ' 46 | except : 47 | pass 48 | 49 | note['Audio'] = audio_field 50 | #Fetching picture 51 | #picture_name = word.get('Picture').split('/')[-1] if word.get('Picture') else '' 52 | picture_field = '' 53 | f_picture = word_entry.word_image 54 | if f_picture: 55 | f_entry = get_file_entry(f_picture,note['Word']) 56 | picture_field = '' 57 | note['Picture'] = picture_field 58 | return note 59 | 60 | def add_word(word, model): 61 | # TODO: Use picture_name and sound_name to check 62 | # if update is needed and don't download media if not 63 | collection = mw.col 64 | note = notes.Note(collection, model) 65 | note = fill_note(word, note) 66 | 67 | # TODO: Rewrite to use is_duplicate() 68 | #word_value = word.get('wordValue') if word.get('wordValue') else 'NO_WORD_VALUE' 69 | #dupes = collection.findDupes("en", word_value) 70 | ## a hack to support words with apostrophes 71 | #note_dupes1 = collection.findNotes("en:'%s'" % word_value) 72 | #note_dupes2 = collection.findNotes('en:"%s"' % word_value) 73 | #note_dupes = note_dupes1 + note_dupes2 74 | collection.addNote(note) 75 | 76 | def add_word_to_collection(word_entry, collection): 77 | 78 | model = prepare_model(mw.col, fields, styles.model_css) 79 | note = notes.Note(collection, model) 80 | note = fill_note(word_entry, note) 81 | 82 | collection.addNote(note) 83 | 84 | 85 | #if not dupes and not note_dupes: 86 | # collection.addNote(note) 87 | ## TODO: Update notes if translation or tags (user wordsets) changed 88 | #elif (note['picture_name'] or note['sound_name']) and note_dupes: 89 | # # update existing notes with new pictures and sounds in case 90 | # # they have been changed in LinguaLeo's UI 91 | # for nid in note_dupes: 92 | # note_in_db = notes.Note(collection, id=nid) 93 | # # a dirty hack below until a new field in the model is introduced 94 | # # put a space before or after a *sound* field of an existing note if you want it to be updated 95 | # # if a note has no picture or sound, it will be updated anyway 96 | # # TODO: Check if hack is still needed, remove if not 97 | # sound_name = note_in_db['sound_name'] 98 | # sound_name = sound_name.replace(" ", " ") 99 | # note_needs_update = sound_name != sound_name.strip() 100 | # if note['picture_name'] and (note_needs_update or not note_in_db['picture_name'].strip()): 101 | # note_in_db['picture_name'] = note['picture_name'] 102 | # if note['sound_name'] and (note_needs_update or not note_in_db['sound_name'].strip()): 103 | # note_in_db['sound_name'] = note['sound_name'] 104 | # note_in_db.flush() 105 | ## TODO: Check if it is possible to update Anki's media collection to remove old (unused) media 106 | 107 | def create_templates(collection): 108 | template_Recognition = collection.models.newTemplate('Recognition') 109 | template_Recognition['qfmt'] = styles.std_word 110 | template_Recognition['afmt'] = styles.std_word_def_sound 111 | template_Recall = collection.models.newTemplate('Recall') 112 | template_Recall['qfmt'] = styles.std_def 113 | template_Recall['afmt'] = styles.std_def_word_sound 114 | template_Sound = collection.models.newTemplate('Sound') 115 | template_Sound['qfmt'] = styles.std_sound 116 | template_Sound['afmt'] = styles.std_sound_word_def 117 | return (template_Recognition, template_Recall, template_Sound) 118 | 119 | def create_new_model(collection, fields, model_css): 120 | model = collection.models.new(CAMBRIDGE_MODEL) 121 | #Seems like tag key is depricated 122 | #model['tags'].append("Cambridge") 123 | #model['css'] = model_css 124 | for field in fields: 125 | collection.models.addField(model, collection.models.newField(field)) 126 | template_Recognition, template_Recall, template_Sound = create_templates(collection) 127 | collection.models.addTemplate(model, template_Recognition) 128 | collection.models.addTemplate(model, template_Recall) 129 | collection.models.addTemplate(model, template_Sound) 130 | model['id'] = randint(100000, 1000000) # Essential for upgrade detection 131 | collection.models.update(model) 132 | return model 133 | 134 | def is_model_exist(collection, fields): 135 | name_exist = CAMBRIDGE_MODEL in collection.models.allNames() 136 | if name_exist: 137 | fields_ok = collection.models.fieldNames(collection.models.byName( 138 | CAMBRIDGE_MODEL)) == fields 139 | else: 140 | fields_ok = False 141 | return name_exist and fields_ok 142 | 143 | def prepare_model(collection, fields, model_css): 144 | """ 145 | Returns a model for our future notes. 146 | Creates a deck to keep them. 147 | """ 148 | if is_model_exist(collection, fields): 149 | model = collection.models.byName(CAMBRIDGE_MODEL) 150 | else: 151 | model = create_new_model(collection, fields, model_css) 152 | # TODO: Move Deck name to config? 153 | # Create a deck "Cambridge" and write id to deck_id 154 | model['did'] = collection.decks.id('Cambridge') 155 | collection.models.setCurrent(model) 156 | collection.models.save(model) 157 | return model 158 | 159 | def get_cambridge_model(collection): 160 | """ 161 | Get model named defined in STANDARD_MODEL_WITH_AUDIO literal 162 | """ 163 | return collection.models.byName(CAMBRIDGE_MODEL) 164 | 165 | def is_valid_ascii(url): 166 | """ 167 | Check an url if it is a valid ascii string 168 | After the LinguaLeo update some images 169 | have broken links with cyrillic characters 170 | """ 171 | if url == '': 172 | return True 173 | try: 174 | url.encode('ascii') 175 | except: 176 | return False 177 | return True 178 | 179 | def get_module_name(): 180 | 181 | return __name__.split(".")[0] 182 | 183 | def get_addon_dir(): 184 | root = mw.pm.addonFolder() 185 | addon_dir = os.path.join(root, get_module_name()) 186 | return addon_dir 187 | 188 | def get_cookies_path(): 189 | """ 190 | Returns a full path to cookies.txt in the user_files folder 191 | :return: 192 | """ 193 | # user_files folder in the current addon's dir 194 | uf_dir = os.path.join(get_addon_dir(), 'user_files') 195 | # Create a folder if doesn't exist 196 | if not os.path.exists(uf_dir): 197 | try: 198 | os.makedirs(uf_dir) 199 | except: 200 | # TODO: Improve error handling 201 | return None 202 | return os.path.join(uf_dir, 'cookies.txt') 203 | 204 | def get_config(): 205 | # Load config from config.json file 206 | #if getattr(getattr(mw, "addonManager", None), "getConfig", None): 207 | # config = mw.addonManager.getConfig(get_module_name()) 208 | #else: 209 | try: 210 | config_file = os.path.join(get_addon_dir(), 'config.json') 211 | with open(config_file, 'r') as f: 212 | config = json.loads(f.read()) 213 | except IOError: 214 | try: 215 | config = {'cookie':'', 'pronunciation_uk': True, 'pronunciation_us': True} 216 | config_file = os.path.join(get_addon_dir(), 'config.json') 217 | with open(config_file, 'w') as f: 218 | json.dump(config, f, sort_keys=True, indent=2) 219 | f.close() 220 | with open(config_file, 'r') as f: 221 | config = json.loads(f.read()) 222 | except: 223 | config = None 224 | return config 225 | 226 | def update_config(config): 227 | #if getattr(getattr(mw, "addonManager", None), "writeConfig", None): 228 | # mw.addonManager.writeConfig(get_module_name(), config) 229 | #else: 230 | try: 231 | config_file = os.path.join(get_addon_dir(), 'config.json') 232 | with open(config_file, 'w') as f: 233 | json.dump(config, f, sort_keys=True, indent=2) 234 | except: 235 | # TODO: Improve error handling 236 | #pass 237 | raise SystemError 238 | 239 | def get_config_dict(): 240 | config = {} 241 | config['cookie'] = '' 242 | config['pronunciation_uk'] = True 243 | config['pronunciation_us'] = True 244 | return config 245 | 246 | def find_note_with_url_pictures(AddonDialog): 247 | query = "Picture:*https*" 248 | foundCardsIDs = mw.col.findNotes(query) 249 | AddonDialog.progress.setMaximum(len(foundCardsIDs)) 250 | n = 0 251 | for f_noteID in foundCardsIDs: 252 | f_note = mw.col.getNote(f_noteID) 253 | f_picture = mw.cddownloader.get_tempfile_from_url(f_note['Picture']) 254 | picture_name = f_note['Word'].split('/')[-1] if f_note['Word'] else '' 255 | f_entry = get_file_entry(f_picture,picture_name) 256 | picture_field = '' 257 | f_note['Picture'] = picture_field 258 | f_note.flush() 259 | n =+ 1 260 | AddonDialog.progress.setValue(n) 261 | 262 | 263 | 264 | 265 | -------------------------------------------------------------------------------- /Cambridge.py: -------------------------------------------------------------------------------- 1 | # system libs 2 | from urllib.error import * 3 | from urllib.parse import quote, urljoin 4 | from urllib.request import Request, urlopen 5 | from copy import copy 6 | from bs4 import BeautifulSoup 7 | from PyQt6.QtCore import QObject 8 | from PyQt6.QtWidgets import QMessageBox 9 | import tempfile 10 | import queue 11 | import copy 12 | 13 | # project libs 14 | from .utils import * 15 | 16 | 17 | class CDDownloader(QObject): 18 | """ 19 | Class to download word definitions and media files - audio and picture (if exist) 20 | from Cambridge Dictionary. 21 | Element fields: word_title, word_gram, word_pro_uk, word_pro_us, word_uk_media, word_us_media, word_image, meanings 22 | """ 23 | 24 | def __init__(self): 25 | super(CDDownloader, self).__init__() 26 | self.icon_url = 'https://dictionary.cambridge.org/' 27 | self.url = \ 28 | 'https://dictionary.cambridge.org/dictionary/english/' 29 | # self.url_sound = self.icon_url 30 | self.extras = dict(Source=u"Cambridge Dictionary") 31 | self.base_url = 'https://dictionary.cambridge.org/' 32 | self.user_url = '' 33 | self.word = '' 34 | self.language = 'en' 35 | self.word_data = [] 36 | self.word_media = {} 37 | self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36' 38 | self.wordlist_id = '' 39 | self.word_id = '' 40 | self.wordlist = [] 41 | self.wordlist_entry = None 42 | self.words_queue = queue.Queue() 43 | # Definitions 44 | self.cambridge_plus_url = 'https://dictionary.cambridge.org/plus/' 45 | self.cambridge_plus_api_url = 'https://dictionary.cambridge.org/plus/api/' 46 | self.config = get_config() 47 | self.req = None 48 | 49 | if self.config is None: 50 | raise Exception("Config file error") 51 | if self.config['cookie'] is None: 52 | raise Exception("Config doesn't have cookie") 53 | 54 | def get_word_defs(self): 55 | 56 | if not self.language.lower().startswith('en'): 57 | return 58 | word = self.word.replace("'", "-") 59 | if not word and not self.user_url: 60 | return 61 | 62 | self.word_data.clear() 63 | # self.maybe_get_icon() 64 | # Do our parsing with BeautifulSoup 65 | 66 | if self.user_url: 67 | self.req = Request(self.user_url) 68 | else: 69 | self.req = Request(self.url + quote(word.encode('utf-8'))) 70 | 71 | self._fill_request_headers() 72 | 73 | try: 74 | response = urlopen(self.req) 75 | except HTTPError as e: 76 | QMessageBox.warning(mw, 'URL error', e.reason.strip()) 77 | return 78 | except URLError as e: 79 | QMessageBox.warning(mw, 'URL error', e.reason.strip()) 80 | return 81 | html_doc = response.read() 82 | 83 | word_soup = BeautifulSoup(html_doc, "html.parser") 84 | 85 | stop_further_parsing = False 86 | 87 | for tag_cald4 in word_soup.find_all(name='div', attrs={'class': 'pr dictionary', 88 | 'data-id': ['cald4', 'cbed', 'cacd', 'cald4-us']}): 89 | for tag_entry in tag_cald4.find_all(name='div', attrs={ 90 | 'class': ['pr entry-body__el', 'pr idiom-block', 'entry-body__el clrd js-share-holder']}): 91 | word_to_add = word_entry() 92 | tag_dict = tag_cald4.find(name='div', attrs={'class': 'cid'}) 93 | word_to_add.word_dictionary_id = tag_dict['id'] 94 | word_to_add.word_dictionary = self.get_dict_name(word_to_add.word_dictionary_id) 95 | # Different types of entries 96 | # pr entry-body__el - ordinary entry 97 | # pr idiom-block - idiomatic expressions 98 | # entry-body__el clrd js-share-holder phrasal verbs 99 | l1_word = {} 100 | # Word title 101 | cur_tag = tag_entry.find(name='div', attrs={'class': 'di-title'}) 102 | if cur_tag: 103 | word_to_add.word_title = self._prettify_string(cur_tag.text) 104 | else: 105 | return 106 | # Word's grammatical part 107 | cur_tag = tag_entry.find(name='div', attrs={'class': 'posgram dpos-g hdib lmr-5'}) 108 | if cur_tag: 109 | word_to_add.word_part_of_speech = self._prettify_string(cur_tag.text) 110 | else: 111 | l1_word["word_gram"] = '' 112 | # UK IPA 113 | cur_tag = tag_entry.find("span", class_=re.compile("uk\sdpron-i\s")) 114 | if not cur_tag: 115 | cur_tag = tag_entry.find("span", attrs={'class': 'uk dpron-i'}) 116 | if cur_tag: 117 | ipa = cur_tag.find(name='span', attrs={'class': 'ipa dipa lpr-2 lpl-1'}) 118 | if ipa: 119 | ipa_text = self._prettify_string(ipa.text) 120 | else: 121 | ipa_text = '' 122 | word_to_add.word_pro_uk = 'UK ' + ipa_text 123 | media_file_tag = cur_tag.find("source", attrs={'type': 'audio/mpeg'}) 124 | if media_file_tag: 125 | if media_file_tag['src'] in self.word_media: 126 | word_to_add.word_uk_media = self.word_media[media_file_tag['src']] 127 | else: 128 | word_to_add.word_uk_media = self.get_tempfile_from_url( 129 | self.base_url.rstrip('/') + media_file_tag['src']) 130 | self.word_media[media_file_tag['src']] = word_to_add.word_uk_media 131 | # US IPA 132 | cur_tag = tag_entry.find("span", class_=re.compile("us\sdpron-i\s")) 133 | if not cur_tag: 134 | cur_tag = tag_entry.find("span", attrs={'class': 'us dpron-i'}) 135 | if cur_tag: 136 | ipa = cur_tag.find(name='span', attrs={'class': 'ipa dipa lpr-2 lpl-1'}) 137 | if ipa: 138 | ipa_text = self._prettify_string(ipa.text) 139 | else: 140 | ipa_text = '' 141 | l1_word["word_pro_us"] = 'US ' + ipa_text 142 | word_to_add.word_pro_us = 'US ' + ipa_text 143 | media_file_tag = cur_tag.find("source", attrs={'type': 'audio/mpeg'}) 144 | if media_file_tag: 145 | if media_file_tag['src'] in self.word_media: 146 | word_to_add.word_us_media = self.word_media[media_file_tag['src']] 147 | else: 148 | word_to_add.word_us_media = self.get_tempfile_from_url( 149 | self.base_url.rstrip('/') + media_file_tag['src']) 150 | self.word_media[media_file_tag['src']] = word_to_add.word_uk_media 151 | 152 | l2_word = {} 153 | suffix = 1 154 | word_to_copy = copy.deepcopy(word_to_add) 155 | # Looping through word general definition - like 'draw verb 156 | # (PICTURE)' 157 | for html_l2_tag in tag_entry.find_all(name=['div', 'span'], attrs={ 158 | 'class': ['pos-body', 'idiom-body didiom-body', 'pv-body dpv-body']}): 159 | # Looping through words specific definitions - l2_meaning 160 | # (def & examples) 161 | for html_pos_body in html_l2_tag.find_all( 162 | attrs={'class': ['pr dsense', 'pr dsense ', 'sense-body dsense_b']}): 163 | tag_l2_word_key = html_pos_body.find(attrs={'class': 'dsense_h'}) 164 | if not tag_l2_word_key: 165 | continue 166 | general_m = self._prettify_string(tag_l2_word_key.get_text()) 167 | word_to_add.word_general = general_m 168 | l2_word[general_m] = None 169 | l2_meaning_key = {} 170 | l2_meaning_examples = [] 171 | l2_meaning = {} 172 | for html_meaning in html_pos_body.find_all(name="div", attrs={ 173 | 'class': ['def-block ddef_block', 'def-block ddef_block ']}): 174 | tag_l2_meaning = html_meaning.find("div", attrs={'class': 'ddef_h'}) 175 | if not tag_l2_meaning: 176 | continue 177 | # Image 178 | tag_picture = html_meaning.find(name='amp-img', attrs={'class': 'dimg_i'}) 179 | if tag_picture: 180 | if tag_picture.attrs['src'] in self.word_media: 181 | word_to_add.word_image = self.word_media[tag_picture.attrs['src']] 182 | else: 183 | word_to_add.word_image = self.get_tempfile_from_url( 184 | self.base_url.rstrip('/') + tag_picture.attrs['src']) 185 | self.word_media[tag_picture.attrs['src']] = word_to_add.word_image 186 | word_to_add.senseId = html_meaning.attrs['data-wl-senseid'] 187 | tag_l2_specific_gram = tag_l2_meaning.find("span", attrs={'class': 'gram dgram'}) 188 | word_to_add.word_specific_gram = self._prettify_string( 189 | tag_l2_specific_gram.text if tag_l2_specific_gram else '') 190 | tag_usage = tag_l2_meaning.find("span", attrs={'class': 'usage dusage'}) 191 | if tag_usage: 192 | word_to_add.usage = self._prettify_string(tag_usage.text) 193 | tag_l2_specific = tag_l2_meaning.find("div", attrs={'class': 'def ddef_d db'}) 194 | word_to_add.word_specific = self._prettify_string( 195 | tag_l2_specific.text if tag_l2_specific else '') 196 | specific_m = self._prettify_string(tag_l2_meaning.text) 197 | l2_meaning[specific_m] = None 198 | examples = [] 199 | for tag_examples in html_meaning.find_all(name='div', attrs={'class': 'examp dexamp'}): 200 | examples.append(self._prettify_string(tag_examples.text)) 201 | l2_meaning[specific_m] = examples 202 | word_to_add.word_examples = examples 203 | self.word_data.append(word_to_add) 204 | word_to_add = copy.deepcopy(word_to_copy) 205 | l2_word[general_m] = l2_meaning 206 | 207 | for html_pos_body in html_l2_tag.find_all(name='div', attrs={'class': 'pr', 'class': 'dsense', 208 | 'class': 'dsense-noh'}): 209 | general_m = 'UNDEFINED' + str(suffix) 210 | word_to_add.word_general = '' 211 | l2_word[general_m] = None 212 | l2_meaning_key = {} 213 | l2_meaning_examples = [] 214 | l2_meaning = {} 215 | for html_meaning in html_pos_body.find_all(name="div", 216 | attrs={'class': 'def-block', 'class': 'ddef_block'}): 217 | tag_l2_meaning = html_meaning.find("div", attrs={'class': 'ddef_h'}) 218 | if not tag_l2_meaning: 219 | continue 220 | # Image 221 | tag_picture = html_meaning.find(name='amp-img', attrs={'class': 'dimg_i'}) 222 | if tag_picture: 223 | if tag_picture.attrs['src'] in self.word_media: 224 | word_to_add.word_image = self.word_media[tag_picture.attrs['src']] 225 | else: 226 | word_to_add.word_image = self.get_tempfile_from_url( 227 | self.base_url.rstrip('/') + tag_picture.attrs['src']) 228 | self.word_media[tag_picture.attrs['src']] = word_to_add.word_image 229 | specific_m = self._prettify_string(tag_l2_meaning.text) 230 | word_to_add.senseId = html_meaning.attrs['data-wl-senseid'] 231 | tag_l2_specific_gram = tag_l2_meaning.find("span", attrs={'class': 'gram dgram'}) 232 | word_to_add.word_specific_gram = self._prettify_string( 233 | tag_l2_specific_gram.text if tag_l2_specific_gram else '') 234 | tag_usage = tag_l2_meaning.find("span", attrs={'class': 'usage dusage'}) 235 | if tag_usage: 236 | word_to_add.usage = self._prettify_string(tag_usage.text) 237 | tag_l2_specific = tag_l2_meaning.find("div", attrs={'class': 'def ddef_d db'}) 238 | word_to_add.word_specific = self._prettify_string( 239 | tag_l2_specific.text if tag_l2_specific else '') 240 | l2_meaning[specific_m] = None 241 | examples = [] 242 | for tag_examples in html_meaning.find_all(name='div', attrs={'class': 'examp dexamp'}): 243 | examples.append(self._prettify_string(tag_examples.text)) 244 | l2_meaning[specific_m] = examples 245 | word_to_add.word_examples = examples 246 | self.word_data.append(word_to_add) 247 | word_to_add = copy.deepcopy(word_to_copy) 248 | l2_word[general_m] = l2_meaning 249 | suffix += 1 250 | 251 | l1_word["meanings"] = l2_word 252 | 253 | if not self.word and self.user_url: 254 | self.word = self.user_url.split('/')[-1] 255 | 256 | self.word_data.sort(key=lambda x: x.word_dictionary_id) 257 | 258 | def get_tempfile_from_url(self, url_in): 259 | """ 260 | Download raw data from url and put into a tempfile 261 | 262 | Wrapper helper function aronud self.get_data_from_url(). 263 | """ 264 | if not url_in: 265 | return None 266 | data = self.get_data_from_url(url_in) 267 | if data == None: 268 | return None 269 | 270 | self.file_extension = '.' + url_in.split('.')[-1] 271 | # We put the data into RAM first so that we don’t have to 272 | # clean up the temp file when the get does not work. (Bad 273 | # get_data raises all kinds of exceptions that fly through 274 | # here.) 275 | tfile = tempfile.NamedTemporaryFile(delete=False, prefix='anki_audio_', suffix=self.file_extension) 276 | tfile.write(data) 277 | tfile.close() 278 | return tfile.name 279 | 280 | def get_data_from_url(self, url_in): 281 | """ 282 | Return raw data loaded from an URL. 283 | 284 | Helper function. Put in an URL and it sets the agent, sends 285 | the requests, checks that we got error code 200 and returns 286 | the raw data only when everything is OK. 287 | """ 288 | self.req = Request(url_in) 289 | self._fill_request_headers() 290 | try: 291 | response = urlopen(self.req) 292 | except URLError as e: 293 | return None 294 | 295 | if 200 != response.code: 296 | raise ValueError(str(response.code) + ': ' + response.msg) 297 | return response.read() 298 | 299 | def clean_up(self): 300 | self.user_url = '' 301 | self.word = '' 302 | self.word_data.clear() 303 | self.word_media = {} 304 | self.wordlist.clear() 305 | self.word_id = '' 306 | self.wordlist_entry = None 307 | 308 | def get_file_entry(self, file, base_name): 309 | file_entry = {} 310 | file_entry['base_name'] = base_name 311 | file_entry['file_extension'] = os.path.splitext(file)[1][1:].strip() 312 | file_entry['file_path'] = os.path.abspath(file) 313 | return file_entry 314 | 315 | def fetch_wordlist_entries(self, wordlist_id): 316 | 317 | for n in range(1, 100): 318 | 319 | # https://dictionary.cambridge.org/plus/wordlist/21215803/entries/100/ 320 | 321 | url_for_request = urljoin(self.base_url, 'plus/wordlist/') 322 | url_for_request = urljoin(url_for_request, str(wordlist_id) + '/entries/') 323 | url_for_request = urljoin(url_for_request, str(n) + '/') 324 | 325 | self.req = Request(url_for_request) 326 | self._fill_request_headers() 327 | self.req.add_header('Accept', 'application/json') 328 | self.req.add_header('Cookie', self.config['cookie']) 329 | 330 | response = urlopen(self.req) 331 | word_list_json = json.loads(response.read()) 332 | if not word_list_json: 333 | break 334 | 335 | for word_in_response in word_list_json: 336 | wl_entry = wordlist_entry() 337 | wl_entry.wordlist_id = word_in_response['wordlistId'] 338 | wl_entry.word_id = word_in_response['id'] 339 | wl_entry.senseId = word_in_response['senseId'] 340 | wl_entry.word_url = word_in_response['entryUrl'] 341 | wl_entry.definition = word_in_response['definition'] 342 | wl_entry.soundUKMp3 = word_in_response['soundUKMp3'] 343 | wl_entry.soundUSMp3 = word_in_response['soundUSMp3'] 344 | wl_entry.dictCode = word_in_response['dictCode'] 345 | wl_entry.headword = word_in_response['headword'] 346 | self.wordlist.append(wl_entry) 347 | # { 348 | # "id": 26061590, 349 | # "entryId": "walk-on-part", 350 | # "headword": "walk-on part", 351 | # "senseId": "ID_00035581_01", 352 | # "dictCode": "English", 353 | # "definition": "A walk-on part in a play is a very small part 354 | # in which the actor is on the stage for a short time and speaks 355 | # very few or no words.", 356 | # "translation": "", 357 | # "pos": "noun", 358 | # "soundUK": "/CUK01223", 359 | # "soundUS": "/CUS02231", 360 | # "soundUKMp3": 361 | # "https://dictionary.cambridge.org/media/english/uk_pron/c/cuk/cuk01/cuk01223.mp3", 362 | # "soundUKOgg": 363 | # "https://dictionary.cambridge.org/media/english/uk_pron_ogg/c/cuk/cuk01/cuk01223.ogg", 364 | # "soundUSMp3": 365 | # "https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus02/cus02231.mp3", 366 | # "soundUSOgg": 367 | # "https://dictionary.cambridge.org/media/english/us_pron_ogg/c/cus/cus02/cus02231.ogg", 368 | # "wordlistId": 21215803, 369 | # "entryUrl": 370 | # "https://dictionary.cambridge.org/dictionary/english/walk-on-part" 371 | # }, 372 | 373 | def delete_word_from_wordlist(self, wl_entry): 374 | 375 | self.req = Request(self.cambridge_plus_api_url + 'deleteWordlistEntry') 376 | self._fill_request_headers() 377 | self.req.add_header('Content-Type', 'application/json') 378 | 379 | self.req.add_header('Cookie', self.config['cookie']) 380 | data = json.dumps({'id': wl_entry.word_id, 'wordlistId': wl_entry.wordlist_id}) 381 | data = data.encode('ascii') 382 | r = urlopen(self.req, data) 383 | 384 | def get_dict_name(self, dict_id): 385 | 386 | dicts = { 387 | 'dataset_cald4': 'Cambridge Advanced Learner''s Dictionary & Thesaurus', 388 | 'dataset_cbed': 'Cambridge Business English Dictionary', 389 | 'dataset_cacd': 'Cambridge American English Dictionary'} 390 | if dict_id in dicts: 391 | return dicts[dict_id] 392 | else: 393 | return '' 394 | 395 | def find_word_by_definition(self, definition): 396 | for wd_entry in self.word_data: 397 | if wd_entry.word_specific == definition: 398 | return self.word_data.pop() 399 | return None 400 | 401 | def find_word_by_wl_entry(self, wl_entry): 402 | wd_entries = list(filter(lambda wd_entry: wd_entry.senseId == wl_entry.senseId, self.word_data)) 403 | if len(wd_entries) == 1: 404 | return wd_entries[0] 405 | 406 | def _prettify_string(self, in_str): 407 | if not in_str: 408 | return '' 409 | in_str = re.sub(r' +', ' ', in_str).strip() 410 | in_str = re.sub(r'\n', '', in_str).strip() 411 | in_str = re.sub(r':$', '', in_str).strip() 412 | return in_str.strip() 413 | 414 | def _fill_request_headers(self): 415 | self.req.add_header("Host", "dictionary.cambridge.org") 416 | self.req.add_header('Accept-Language', 'en-US') 417 | self.req.add_header('User-Agent', 418 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36') 419 | 420 | 421 | class wordlist_entry(): 422 | """ 423 | This class represent a single entry from Cambridge Wordlist. 424 | """ 425 | 426 | def __init__(self, word=None, ref=None, l2_meaning=None, dataWordID=None, wordlist_id=None): 427 | self.wordlist_id = wordlist_id 428 | self.senseId = '' 429 | self.word = word 430 | self.word_url = ref 431 | self.word_l2_meaning = l2_meaning 432 | self.word_id = dataWordID 433 | self.definition = l2_meaning 434 | self.dictCode = '' 435 | self.soundUKMp3 = '' 436 | self.soundUSMp3 = '' 437 | 438 | 439 | class word_entry(): 440 | # word_title, word_gram, word_pro_uk, word_pro_us, word_uk_media, word_us_media, word_image, meanings 441 | def __init__(self): 442 | self.word_dictionary_id = '' 443 | self.word_dictionary = '' 444 | self.word_title = '' 445 | self.word_part_of_speech = '' 446 | self.word_pro_uk = '' 447 | self.word_pro_us = '' 448 | self.word_uk_media = '' 449 | self.word_us_media = '' 450 | self.word_image = '' 451 | self.word_general = '' 452 | self.word_specific = '' 453 | self.word_specific_gram = '' 454 | self.word_examples = [] 455 | self.usage = '' 456 | self.senseId = '' 457 | 458 | # def self._prettify_string(self,in_str): 459 | # if not in_str: 460 | # return '' 461 | # in_str = re.sub(r' +',' ',in_str).strip() 462 | # in_str = re.sub(r'\n','',in_str).strip() 463 | # in_str = re.sub(r':','',in_str).strip() 464 | # return in_str.strip() 465 | 466 | # This code for debugging purposes 467 | # ad = CDDownloader() 468 | # ad.language = 'en' 469 | # ad.user_url = 'https://dictionary.cambridge.org/dictionary/english/pit' 470 | # ad.get_word_defs() 471 | # ad = None 472 | 473 | 474 | ## This code for testing with WebDriver 475 | # ad = CDDownloader() 476 | # ad.language = 'en' 477 | ##ad.user_url = 'https://dictionary.cambridge.org/dictionary/english/tear-up' 478 | ##ad.get_all_words_in_list('https://dictionary.cambridge.org/plus/wordlist/21215803_cald') 479 | # ad.fetch_wordlist_entries('21215803') 480 | # ad = None 481 | -------------------------------------------------------------------------------- /gui.py: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # 3 | # License: GNU AGPL, version 3 or later; 4 | # http://www.gnu.org/copyleft/agpl.html 5 | """ 6 | Anki2 add-on to download card's fields with audio from Cambridge Dictionary 7 | 8 | """ 9 | 10 | import queue 11 | import traceback 12 | from PyQt6.QtGui import QIcon, QFont 13 | from PyQt6.QtWidgets import * 14 | from PyQt6 import QtCore 15 | from PyQt6.QtCore import (QThread, QObject, pyqtSignal, QUrl, Qt) 16 | from PyQt6.QtWebEngineWidgets import QWebEngineView 17 | from PyQt6.QtWebEngineCore import QWebEnginePage, QWebEngineProfile 18 | from PyQt6.QtNetwork import QNetworkCookie 19 | 20 | from aqt import mw 21 | from aqt.utils import askUserDialog, showInfo, showText, showWarning, tooltip 22 | from anki.hooks import addHook 23 | 24 | from .Cambridge import (CDDownloader, word_entry, wordlist_entry) 25 | from urllib.error import * 26 | 27 | from ._names import * 28 | from .utils import * 29 | 30 | icons_dir = os.path.join(mw.pm.addonFolder(), 'icons') 31 | 32 | 33 | # These are classes for GUI dialogues 34 | class LinkDialogue(QDialog): 35 | """ 36 | A Dialog to let the user edit the texts or change the language. 37 | """ 38 | 39 | def __init__(self, parent=None): 40 | self.edit_word_head = None 41 | self.user_url = '' 42 | self.word = '' 43 | QDialog.__init__(self) 44 | self.setAttribute(Qt.WidgetAttribute.WA_DeleteOnClose, True) 45 | self.initUI() 46 | 47 | def initUI(self): 48 | """Build the dialog box.""" 49 | 50 | self.setWindowTitle(('Anki – Download definitions')) 51 | self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png'))) 52 | layout = QVBoxLayout() 53 | self.setLayout(layout) 54 | self.edit_word_head = QLabel() 55 | 56 | self.edit_word_head.setText(('''

Enter link for parsing

''')) 57 | bold_font = QFont() 58 | bold_font.setBold(True) 59 | self.edit_word_head.setFont(bold_font) 60 | layout.addWidget(self.edit_word_head) 61 | 62 | self.link_editor = QLineEdit() 63 | self.link_editor.placeholderText = 'Enter your link here' 64 | layout.addWidget(self.link_editor) 65 | 66 | dialog_buttons = QDialogButtonBox(self) 67 | dialog_buttons.addButton(QDialogButtonBox.StandardButton.Cancel) 68 | dialog_buttons.addButton(QDialogButtonBox.StandardButton.Ok) 69 | dialog_buttons.accepted.connect(self.get_word_definitions_from_link) 70 | dialog_buttons.rejected.connect(self.reject) 71 | layout.addWidget(dialog_buttons) 72 | self.link_editor.setFocus() 73 | 74 | def get_word_definitions_from_link(self): 75 | self.user_url = self.link_editor.text() 76 | if not self.user_url: 77 | QMessageBox.warning(mw, 'Link is not provided', 'Please, provide a link for you word or phrase.') 78 | return 79 | 80 | downloader = mw.cddownloader 81 | # downloader.clean_up() 82 | downloader.user_url = self.user_url 83 | downloader.get_word_defs() 84 | self.setResult(QDialog.DialogCode.Accepted) 85 | self.done(QDialog.DialogCode.Accepted) 86 | 87 | 88 | class WordDefDialogue(QDialog): 89 | """ 90 | A Dialog to let the user choosing defs to be added. 91 | """ 92 | 93 | def __init__(self, word_data, word, l2_meaning=None, wd_entry=None): 94 | self.word_data = word_data 95 | self.word = word 96 | self.selected_defs = [] # list of selected words (word_entry) 97 | self.deletion_mark = False 98 | self.l2_def = None 99 | self.single_word = False 100 | self.l2_meaning = l2_meaning 101 | self.set_model() 102 | QDialog.__init__(self) 103 | self.initUI() 104 | 105 | def initUI(self): 106 | """Build the dialog box.""" 107 | 108 | self.setWindowTitle(self.word) 109 | self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png'))) 110 | 111 | dialog_layout = QVBoxLayout(self) 112 | scrollArea = QScrollArea() 113 | scrollArea.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff) 114 | dialog_layout.addWidget(scrollArea) 115 | 116 | scroll_area_content = QWidget() 117 | scrollArea.setWidget(scroll_area_content) 118 | 119 | layout = QVBoxLayout(scroll_area_content) 120 | 121 | word_specific = '' 122 | word_part_of_speech = '' 123 | word_dictionary = '' 124 | # Looping through data structure 125 | for word_el in self.word_data: 126 | if word_dictionary != word_el.word_dictionary: 127 | row = 0 128 | gl = QGridLayout() 129 | gr = QGroupBox() 130 | gr.setTitle(word_el.word_dictionary) 131 | gr.setLayout(gl) 132 | word_dictionary = word_el.word_dictionary 133 | 134 | if word_dictionary == word_el.word_dictionary and word_part_of_speech != word_el.word_part_of_speech: 135 | row += 1 136 | word_title = QLabel('

' + word_el.word_title + '

') 137 | gl.addWidget(word_title, row, 0) 138 | word_gram = QLabel('

' + word_el.word_part_of_speech + '

') 139 | gl.addWidget(word_gram, row, 1) 140 | word_part_of_speech = word_el.word_part_of_speech 141 | 142 | row += 1 143 | word_specific = word_el.word_specific 144 | l2_def_check = QCheckBox(word_specific) 145 | l2_def_check.l2_def = word_specific 146 | l2_def_check.word = word_el 147 | l2_def_check.word_specific = word_specific 148 | l2_def_check.stateChanged.connect(self.toggle_def) 149 | gl.addWidget(l2_def_check, row, 0, 1, -1) 150 | self.l2_def = word_specific 151 | layout.addWidget(gr) 152 | 153 | dialog_buttons = QDialogButtonBox(self) 154 | dialog_buttons.addButton(QDialogButtonBox.StandardButton.SaveAll) 155 | dialog_buttons.addButton(QDialogButtonBox.StandardButton.Cancel) 156 | dialog_buttons.addButton(QDialogButtonBox.StandardButton.Ok) 157 | dialog_buttons.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.create_selected_notes) 158 | dialog_buttons.button(QDialogButtonBox.StandardButton.Cancel).clicked.connect(self.reject) 159 | dialog_buttons.button(QDialogButtonBox.StandardButton.SaveAll).clicked.connect(self.save_all) 160 | dialog_layout.addWidget(dialog_buttons) 161 | 162 | # Automatic add single word with single def if in add_single mode 163 | if len(self.word_data) == 1: 164 | self.selected_defs.append(self.word_data[0]) 165 | self.create_selected_notes() 166 | self.single_word = True 167 | 168 | scroll_area_content.adjustSize() # required to get correct content area size 169 | scrollArea.verticalScrollBar().adjustSize() # required to get the correct scrollbar size 170 | scrollArea.setMinimumWidth(scroll_area_content.size().width() + scrollArea.verticalScrollBar().width()) 171 | 172 | def toggle_def(self, state): 173 | sender = self.sender() 174 | word_specific = sender.word_specific 175 | if self.sender(): 176 | if word_specific in self.selected_defs: 177 | self.selected_defs.remove(word_specific) 178 | else: 179 | self.selected_defs.append(word_specific) 180 | 181 | def create_selected_notes(self): 182 | 183 | for wd_entry in self.word_data: 184 | for word_to_save in self.selected_defs: 185 | if wd_entry.word_specific == word_to_save: 186 | add_word(wd_entry, self.model) 187 | self.accept() 188 | 189 | def set_model(self): 190 | self.model = prepare_model(mw.col, fields, styles.model_css) 191 | 192 | def add_note(self, word_to_add): 193 | """ 194 | Note is an SQLite object in Anki so you need 195 | to fill it out inside the main thread 196 | 197 | """ 198 | QMessageBox.warning(mw, 'Link is not provided', str(word_to_add)) 199 | word = {} 200 | word['Word'] = word_to_add['word_title'] 201 | word['Grammar'] = word_to_add['word_gram'] 202 | word['Pronunciation'] = word_to_add['word_pro_uk'] + ' ' + word_to_add['word_pro_us'] 203 | word['Meaning'] = word_to_add['word_general'] if not 'UNDEFINED' in word_to_add['word_general'] else '' 204 | word['Definition'] = word_to_add['word_specific'] 205 | word['Examples'] = word_to_add['word_examples'] 206 | word['Sounds'] = [word_to_add['word_uk_media'], word_to_add['word_us_media']] 207 | word['Picture'] = word_to_add['word_image'] 208 | 209 | add_word(word, self.model) 210 | 211 | def save_all(self): 212 | for wd_entry in self.word_data: 213 | add_word(wd_entry, self.model) 214 | self.done(0) 215 | 216 | 217 | class AddonConfigWindow(QDialog): 218 | """ 219 | A Dialog to let the user to choose defs to be added. 220 | """ 221 | 222 | def __init__(self): 223 | self.config = get_config() 224 | QDialog.__init__(self) 225 | self.initUI() 226 | 227 | def initUI(self): 228 | self.setWindowTitle('Cambridge Addon Settings') 229 | self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png'))) 230 | layout = QVBoxLayout() 231 | self.setLayout(layout) 232 | 233 | # Authorize and save cookies - Google OAuth2 234 | # Some useful variable go here 235 | auth_layout = QHBoxLayout() 236 | auth_label = QLabel() 237 | auth_label.setText('Authorization status:') 238 | auth_layout.addWidget(auth_label) 239 | auth_label_status = QLabel() 240 | auth_label_status.setText('Singed in' if self.config['cookie'] else 'Not signed in') 241 | auth_layout.addWidget(auth_label_status) 242 | auth_btn = QPushButton() 243 | auth_btn.setText('Sign in') 244 | auth_btn.clicked.connect(self.btn_auth_clicked) 245 | auth_layout.addWidget(auth_btn) 246 | layout.addLayout(auth_layout) 247 | 248 | # Cookie - for semi authorization 249 | h_layout = QHBoxLayout() 250 | h_label = QLabel() 251 | h_label.setText('Cookie:') 252 | h_layout.addWidget(h_label) 253 | h_layout.addStretch() 254 | self.editor_cookie = QLineEdit() 255 | self.editor_cookie.setClearButtonEnabled(True) 256 | self.editor_cookie.setText(self.config['cookie'] if self.config['cookie'] else '') 257 | h_layout.addWidget(self.editor_cookie, QtCore.Qt.AlignmentFlag.AlignRight) 258 | layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop) 259 | 260 | # Pronunciation UK 261 | h_layout = QHBoxLayout() 262 | h_label = QLabel() 263 | h_label.setText('Pronunciation UK:') 264 | h_layout.addWidget(h_label) 265 | h_layout.addStretch() 266 | self.cb_pronunciation_uk = QCheckBox() 267 | self.cb_pronunciation_uk.setChecked( 268 | self.config['pronunciation_uk'] if 'pronunciation_uk' in self.config else True) 269 | h_layout.addWidget(self.cb_pronunciation_uk, QtCore.Qt.AlignmentFlag.AlignRight) 270 | layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop) 271 | 272 | # Pronunciation US 273 | h_layout = QHBoxLayout() 274 | h_label = QLabel() 275 | h_label.setText('Pronunciation US:') 276 | h_layout.addWidget(h_label) 277 | h_layout.addStretch() 278 | self.cb_pronunciation_us = QCheckBox() 279 | self.cb_pronunciation_us.setChecked( 280 | self.config['pronunciation_us'] if 'pronunciation_us' in self.config else True) 281 | h_layout.addWidget(self.cb_pronunciation_us, QtCore.Qt.AlignmentFlag.AlignRight) 282 | layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop) 283 | 284 | # word list IDs 285 | h_layout = QVBoxLayout() 286 | h_label = QLabel() 287 | # h_label.setText('Cookie:') 288 | # h_layout.addWidget(h_label) 289 | # h_layout.addStretch() 290 | self.wordlist_list = QListWidget() 291 | if 'wordlist_ids' in self.config: 292 | links = self.config['wordlist_ids'] 293 | for l in links: 294 | self.wordlist_list.addItem(l) 295 | self.wordlist_list.itemDoubleClicked.connect(self.wl_edit_row) 296 | h_layout.addWidget(self.wordlist_list) 297 | h_label = QLabel() 298 | h_label.setText('ID to add:') 299 | h_layout.addWidget(h_label) 300 | self.ledit_wl = QLineEdit() 301 | h_layout.addWidget(self.ledit_wl) 302 | btn_Add_WL = QPushButton() 303 | btn_Add_WL.setText('Add ID') 304 | btn_Add_WL.clicked.connect(self.wl_add) 305 | h_layout.addWidget(btn_Add_WL) 306 | layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop) 307 | 308 | # Find and fetch all pictures instead of links 309 | find_btn = QPushButton() 310 | find_btn.setText('Replace all URL with pictures in deck') 311 | find_btn.clicked.connect(self.find_and_fetch_pictures) 312 | layout.addWidget(find_btn) 313 | 314 | # Stretcher 315 | layout.addStretch() 316 | 317 | # Progress bar 318 | h_layout = QHBoxLayout() 319 | self.progress = QProgressBar(self) 320 | self.progress.setGeometry(0, 0, 200, 30) 321 | self.progress.visbile = False 322 | h_layout.addWidget(self.progress) 323 | layout.addLayout(h_layout) 324 | 325 | # Bottom buttons - Ok, Cancel 326 | btn_bottom_layout = QHBoxLayout() 327 | btn_bottom_layout.addStretch() 328 | btn_Ok = QPushButton() 329 | btn_Ok.setText('Ok') 330 | btn_bottom_layout.addWidget(btn_Ok, QtCore.Qt.AlignmentFlag.AlignRight) 331 | btn_Cancel = QPushButton() 332 | btn_Cancel.setText('Cancel') 333 | btn_bottom_layout.addWidget(btn_Cancel, QtCore.Qt.AlignmentFlag.AlignRight) 334 | layout.addLayout(btn_bottom_layout) 335 | btn_Ok.clicked.connect(self.btn_Ok) 336 | btn_Cancel.clicked.connect(self.close) 337 | 338 | def find_and_fetch_pictures(self): 339 | self.progress.visible = True 340 | find_note_with_url_pictures(self) 341 | self.progress.visible = False 342 | 343 | def btn_auth_clicked(self): 344 | auth_window = WebPageView(AUTH_URL) 345 | auth_window.accepted.connect(self.onAuthCompleted) 346 | auth_window.exec() 347 | self.config['cookie'] = auth_window.get_cookie() 348 | update_config(self.config) 349 | self.editor_cookie.setText(self.config['cookie'] if self.config['cookie'] else '') 350 | 351 | def onAuthCompleted(self): 352 | QMessageBox.information(self, 'Singing in', 'Signed in successfully') 353 | 354 | def btn_Ok(self): 355 | # Fill config dict with current settings and write them to file 356 | self.config['cookie'] = self.editor_cookie.text() 357 | self.config['pronunciation_uk'] = self.cb_pronunciation_uk.isChecked() 358 | self.config['pronunciation_us'] = self.cb_pronunciation_us.isChecked() 359 | wl = [] 360 | for i in self.iterAllItems(self.wordlist_list): 361 | wl.append(i.text()) 362 | self.config['wordlist_ids'] = wl 363 | update_config(self.config) 364 | self.close() 365 | 366 | def btn_Cancel(self): 367 | self.close() 368 | 369 | def wl_add(self): 370 | self.wordlist_list.addItem(self.ledit_wl.text()) 371 | 372 | def iterAllItems(self, iterable): 373 | for i in range(iterable.count()): 374 | yield iterable.item(i) 375 | 376 | def wl_edit_row(self): 377 | current_item = self.wordlist_list.currentItem() 378 | self.ledit_wl.setText(current_item.text()) 379 | self.wordlist_list.takeItem(self.wordlist_list.currentRow()) 380 | 381 | 382 | class WParseSavedWL(QObject): 383 | instance = None 384 | 385 | def __init__(self): 386 | super(WParseSavedWL, self).__init__() 387 | self.config = get_config() 388 | self.collection = mw.col 389 | self.word_to_fetch = '0' 390 | self.fetched = 0 391 | self.need_to_stop = False 392 | self.wordlist_queue = queue.Queue() 393 | 394 | def __call__(self, *pargs, **kargs): 395 | if instance is None: 396 | instance = super().__call__(*args, **kw) 397 | return instance 398 | 399 | def parse(self): 400 | 401 | if 'wordlist_ids' not in self.config: 402 | return 403 | 404 | t = self.thread = FetchThread(0, True, self.wordlist_queue) 405 | t._event.connect(self.onEvent) 406 | t._add_word_event.connect(self.on_add_word) 407 | self.thread.start() 408 | 409 | def onEvent(self, evt, *args): 410 | n = 1 411 | if evt == 'spawn_other_threads': 412 | self.word_to_fetch = args[0] 413 | for n in range(1, 5): 414 | t = FetchThread(max_words=5, fetch_wordlist=False, wordlist_queue=self.wordlist_queue) 415 | t._event.connect(self.onEvent) 416 | t._add_word_event.connect(self.on_add_word) 417 | setattr(self, 'thread' + str(n), t) 418 | thread_to_start = getattr(self, 'thread' + str(n)) 419 | thread_to_start.start() 420 | n += 1 421 | if evt == 'batch_completed': 422 | if self.wordlist_queue.empty() or self.need_to_stop: 423 | return 424 | for n in range(1, 5): 425 | thread_to_restart = getattr(self, 'thread' + str(n), None) 426 | if thread_to_restart != None and thread_to_restart.isFinished(): 427 | thread_to_restart.start() 428 | if evt == 'need_to_stop': 429 | self.need_to_stop = True 430 | tooltip( 431 | (str('Stopping all error')), 432 | parent=mw, 433 | ) 434 | if evt == 'error': 435 | self.need_to_stop = True 436 | showText(("Fetching failed:\n%s") % self._rewriteError(args[0])) 437 | 438 | if evt == 'message': 439 | tooltip( 440 | (str(args[0])), 441 | parent=mw, 442 | ) 443 | 444 | def on_add_word(self, word_entry): 445 | add_word_to_collection(word_entry, self.collection) 446 | self.fetched += 1 447 | tooltip( 448 | (str(self.fetched) + ' / ' + str(self.word_to_fetch)), 449 | parent=mw, 450 | ) 451 | 452 | def _rewriteError(self, err): 453 | if "Errno 61" in err: 454 | return ( 455 | """\ 456 | Couldn't connect to AnkiWeb. Please check your network connection \ 457 | and try again.""" 458 | ) 459 | elif "timed out" in err or "10060" in err: 460 | return ( 461 | """\ 462 | The connection to AnkiWeb timed out. Please check your network \ 463 | connection and try again.""" 464 | ) 465 | elif "403" in err: 466 | return ( 467 | """\ 468 | Authentication failed. Either your cookie expired or your are trying to delete entries from a community wordlist.""" 469 | ) 470 | elif "code: 500" in err: 471 | return ( 472 | """\ 473 | AnkiWeb encountered an error. Please try again in a few minutes, and if \ 474 | the problem persists, please file a bug report.""" 475 | ) 476 | elif "code: 501" in err: 477 | return ( 478 | """\ 479 | Please upgrade to the latest version of Anki.""" 480 | ) 481 | # 502 is technically due to the server restarting, but we reuse the 482 | # error message 483 | elif "code: 502" in err: 484 | return ("Cambridge Dictionary is under maintenance. Please try again in a few minutes.") 485 | elif "code: 503" in err: 486 | return ( 487 | """\ 488 | Cambridge Dictionary is too busy at the moment. Please try again in a few minutes.""" 489 | ) 490 | elif "code: 504" in err: 491 | return ( 492 | "504 gateway timeout error received. Please try temporarily disabling your antivirus." 493 | ) 494 | elif "10061" in err or "10013" in err or "10053" in err: 495 | return ( 496 | "Antivirus or firewall software is preventing Anki from connecting to the internet." 497 | ) 498 | elif "10054" in err or "Broken pipe" in err: 499 | return ( 500 | "Connection timed out. Either your internet connection is experiencing problems, or you have a very large file in your media folder." 501 | ) 502 | elif "Unable to find the server" in err or "socket.gaierror" in err: 503 | return ( 504 | "Server not found. Either your connection is down, or antivirus/firewall " 505 | "software is blocking Anki from connecting to the internet." 506 | ) 507 | elif "code: 407" in err: 508 | return ("Proxy authentication required.") 509 | elif "code: 413" in err: 510 | return ("Your collection or a media file is too large to sync.") 511 | elif "EOF occurred in violation of protocol" in err: 512 | return ( 513 | ( 514 | "Error establishing a secure connection. This is usually caused by antivirus, firewall or VPN software, or problems with your ISP." 515 | ) 516 | + " (eof)" 517 | ) 518 | elif "certificate verify failed" in err: 519 | return ( 520 | ( 521 | "Error establishing a secure connection. This is usually caused by antivirus, firewall or VPN software, or problems with your ISP." 522 | ) 523 | + " (invalid cert)" 524 | ) 525 | return err 526 | 527 | 528 | class FetchThread(QThread): 529 | _event = pyqtSignal(str, str) 530 | _add_word_event = pyqtSignal(word_entry) 531 | 532 | def __init__(self, max_words=5, fetch_wordlist=False, wordlist_queue=None): 533 | QThread.__init__(self) 534 | self.config = get_config() 535 | self.downloader = CDDownloader() 536 | self.max_words = max_words 537 | self.fetch_wordlist = fetch_wordlist 538 | self.wordlist_queue = wordlist_queue 539 | 540 | def run(self): 541 | if self.wordlist_queue == None: 542 | return 543 | try: 544 | if self.fetch_wordlist: 545 | self._fetch_wordlist() 546 | self.fireEvent('spawn_other_threads', str(self.wordlist_queue.qsize())) 547 | return 548 | except: 549 | err = traceback.format_exc() 550 | self.fireEvent("error", err) 551 | return 552 | 553 | try: 554 | self._fetch_words() 555 | self.fireEvent('batch_completed') 556 | except: 557 | err = traceback.format_exc() 558 | self.fireEvent("error", err) 559 | return 560 | 561 | def fireEvent(self, evt, args=''): 562 | # 563 | self._event.emit(evt, args) 564 | 565 | def addWordEvent(self, wd_entry): 566 | 567 | self._add_word_event.emit(wd_entry) 568 | 569 | def _fetch_wordlist(self): 570 | self.fireEvent('message', 'Starting fetching wordlists') 571 | self.downloader = CDDownloader() 572 | self.downloader.clean_up() 573 | for wordlist_id in self.config['wordlist_ids']: 574 | self.downloader.fetch_wordlist_entries(wordlist_id) 575 | 576 | for wl_entry in self.downloader.wordlist: 577 | self.wordlist_queue.put(wl_entry) 578 | 579 | def _fetch_words(self): 580 | n = 0 581 | while n < self.max_words and not self.wordlist_queue.empty(): 582 | wl_entry = self.wordlist_queue.get() 583 | self.downloader.clean_up() 584 | self.downloader.wordlist_entry = wl_entry 585 | self.downloader.user_url = wl_entry.word_url 586 | self.downloader.word_id = wl_entry.word_id 587 | self.downloader.get_word_defs() 588 | if self.downloader.word_data: 589 | wd_entry = self.downloader.find_word_by_wl_entry(wl_entry) 590 | if wd_entry != None: 591 | self.addWordEvent(wd_entry) 592 | self.downloader.delete_word_from_wordlist(wl_entry) 593 | n += 1 594 | 595 | 596 | class WebPageView(QDialog): 597 | def __init__(self, user_url=''): 598 | self.user_url = user_url 599 | QDialog.__init__(self) 600 | self.webview = QWebEngineView() 601 | self.profile = QWebEngineProfile("storage", self.webview) 602 | self.cookie_store = self.profile.cookieStore() 603 | self.cookie_store.cookieAdded.connect(self.onCookieAdded) 604 | self.webpage = QWebEnginePage(self.profile, self.webview) 605 | self.webview.setPage(self.webpage) 606 | self.webview.urlChanged.connect(self.onUrlChanged) 607 | self.cookies = [] 608 | self.initUI() 609 | 610 | def initUI(self): 611 | layout = QVBoxLayout() 612 | self.setLayout(layout) 613 | 614 | layout.addWidget(self.webview) 615 | self.webview.load(QUrl(self.user_url)) 616 | self.webview.show() 617 | 618 | def onCookieAdded(self, cookie): 619 | for c in self.cookies: 620 | if c.hasSameIdentifier(cookie): 621 | return 622 | self.cookies.append(QNetworkCookie(cookie)) 623 | 624 | def get_cookie(self): 625 | cookie_builder = "" 626 | for c in self.cookies: 627 | # Get cookies that domain contains cambridge.org 628 | if "cambridge.org" not in str(c.domain()): 629 | continue 630 | 631 | name = bytearray(c.name()).decode() 632 | value = bytearray(c.value()).decode() 633 | cookie_builder = name + "=" + value + "; " + cookie_builder 634 | return cookie_builder 635 | 636 | def onUrlChanged(self): 637 | if self.webview.url().toString() != AUTH_URL: 638 | self.accept() 639 | 640 | 641 | class MyQWebEngineView(QWebEngineView): 642 | def __init__(self): 643 | QWebEngineView.__init__(self) 644 | self.urlChanged.connect(self.url_changed) 645 | 646 | def createWindow(self, type=None): 647 | QMessageBox.warning(mw, 'Link is not provided', str(type)) 648 | wpv = WebPageView() 649 | wpv.exec() 650 | 651 | def url_changed(self): 652 | QMessageBox.warning(mw, 'Link is not provided', 'URL changed') 653 | --------------------------------------------------------------------------------