├── icons
    ├── add.png
    ├── keep.png
    ├── play.png
    ├── delete.png
    ├── blacklist.png
    ├── camb_icon.png
    ├── cambridge_icon.png
    ├── download_note_audio.png
    ├── download_side_audio.png
    └── download_audio_manual.png
├── __init__.py
├── _names.py
├── README.md
├── field_data.py
├── main.py
├── .gitattributes
├── styles.py
├── .github
    └── workflows
    │   └── main.yml
├── mediafile_utils.py
├── download_entry.py
├── .gitignore
├── get_fields.py
├── download.py
├── downloader.py
├── utils.py
├── Cambridge.py
└── gui.py


/icons/add.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/add.png


--------------------------------------------------------------------------------
/icons/keep.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/keep.png


--------------------------------------------------------------------------------
/icons/play.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/play.png


--------------------------------------------------------------------------------
/icons/delete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/delete.png


--------------------------------------------------------------------------------
/icons/blacklist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/blacklist.png


--------------------------------------------------------------------------------
/icons/camb_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/camb_icon.png


--------------------------------------------------------------------------------
/icons/cambridge_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/cambridge_icon.png


--------------------------------------------------------------------------------
/icons/download_note_audio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_note_audio.png


--------------------------------------------------------------------------------
/icons/download_side_audio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_side_audio.png


--------------------------------------------------------------------------------
/icons/download_audio_manual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/am-silex/anki_cambridge/HEAD/icons/download_audio_manual.png


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | #
 3 | # License: GNU AGPL, version 3 or later;
 4 | # http://www.gnu.org/copyleft/agpl.html
 5 | 
 6 | """
 7 | Anki-2 add-on to create notes from Cambridge Dictionary 
 8 | 
 9 | 
10 | """
11 | 
12 | import sys
13 | from os.path import dirname, join
14 | 
15 | sys.path.append(join(dirname(__file__), 'lib'))
16 | 
17 | __version__ = "0.0.2"
18 | 
19 | from . import main
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/_names.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines constants and names
 3 | 
 4 | """
 5 | 
 6 | 
 7 | 
 8 | LINK_DLG_NAME = 'link_dlg_name'
 9 | WORD_DEFS_NAME = 'word_defs_name'
10 | CAMBRIDGE_MODEL = 'Cambridge Dictionary'
11 | ADDON_NAME = 'anki_cambridge'
12 | CREATE_NEW_NOTES_SHORTCUT = "Ctrl+l"
13 | 
14 | # Request constants
15 | WORDLIST_URL = 'https://dictionary.cambridge.org/plus/wordlist'
16 | AUTH_URL = 'https://dictionary.cambridge.org/auth/signin?rid=1'
17 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
18 | CAMBRIDGE_API_BASE = 'https://dictionary.cambridge.org/plus/api/'
19 | URL_CAMBRIDGE_PLUS_BASE = 'https://dictionary.cambridge.org/plus/'
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Publish on ankiweb.net](https://github.com/am-silex/anki_cambridge/actions/workflows/main.yml/badge.svg)
 2 | 
 3 | Anki Add-on for integration with Cambridge Dictionary website - https://dictionary.cambridge.org/
 4 | 
 5 | IMPORTANT: This add-on doesn't use official API - only web-scraping.
 6 | 
 7 | ### What it's new:
 8 |  - Upgraded to Qt6
 9 |  - Creating notes from link to a word (word title, definition, grammar, IPA, sound, meanings, examples)
10 |  - Fetching words from your word lists (with auto-deletion)
11 |  - Config settings management - save cookie, word list IDs
12 |  - Native authentication - through Cambridge account
13 | 
14 | ### Auth with native authentication
15 | 
16 | - Click on Sign in
17 | - Write your Cambridge email and password
18 | - Click on Log in 
19 | - Once you are authenticated, you can close the web window
20 | - The cookie will be stored automatically in the configuration
21 | 
22 | ### Development
23 | 
24 | Official Anki Add-on writing [reference](https://addon-docs.ankiweb.net/intro.html)
25 | 


--------------------------------------------------------------------------------
/field_data.py:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | #
 3 | # Copyright © 2015 Roland Sieker <ospalh@gmail.com>
 4 | #
 5 | # License: GNU AGPL, version 3 or later;
 6 | # http://www.gnu.org/copyleft/agpl.html
 7 | 
 8 | """
 9 | Two classes to store information for the downloader
10 | """
11 | #
12 | from anki.sound import stripSounds
13 | from anki.template import furigana
14 | from anki.utils import stripHTML
15 | 
16 | # Apparently some people use a 「・」 between the kana for different
17 | # kanji. Make it easier to switch removing them for the downloads on
18 | # or off
19 | strip_interpunct = False
20 | # Do or do not remove katakana interpuncts 「・」 before sending requests.
21 | 
22 | 
23 | class FieldData(object):
24 |     def __init__(self, w_field, a_field, word):
25 |         self.word_field_name = w_field
26 |         self.audio_field_name = a_field
27 |         # This is taken from aqt/browser.py.
28 |         self.word = word.replace('<br>', ' ')
29 |         self.word = self.word.replace('<br />', ' ')
30 |         if strip_interpunct:
31 |             self.word = self.word.replace('・', '')
32 |         # self.word = stripHTML(self.word)
33 |         # self.word = stripSounds(self.word)
34 |         # Reformat so we have exactly one space between words.
35 |         self.word = ' '.join(self.word.split())
36 | 
37 |     @property
38 |     def empty(self):
39 |         return not self.word
40 | 
41 |     @property
42 |     def split(self):
43 |         return False
44 | 
45 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # from PyQt6.QtGui import QIcon, QFont
 2 | # from PyQt6.QtWidgets import *
 3 | 
 4 | # from aqt import mw
 5 | # from aqt.utils import tooltip
 6 | # from anki.hooks import addHook
 7 | 
 8 | from aqt import mw
 9 | from aqt.qt import QAction, QMenu, QDialog
10 | from aqt.utils import showInfo
11 | 
12 | from .gui import *
13 | from ._names import *
14 | 
15 | from .Cambridge import CDDownloader
16 | 
17 | 
18 | def ask_user_for_link():
19 |     window = LinkDialogue()
20 |     setattr(mw, LINK_DLG_NAME, window)
21 |     r = window.exec()
22 |     downloader = mw.cddownloader
23 |     if r == QDialog.DialogCode.Accepted and downloader.word_data:
24 |         sd = WordDefDialogue(downloader.word_data, downloader.word)
25 |         sd.exec()
26 |         sd = None
27 | 
28 | 
29 | def open_main_windows_addon():
30 |     window = AddonConfigWindow()
31 |     window.exec()
32 | 
33 | 
34 | def parse_saved_wl():
35 |     mw.wl_pareser = WParseSavedWL()
36 |     mw.wl_pareser.parse()
37 | 
38 | 
39 | mw.edit_cambridge_submenu = QMenu(u"&Cambridge Dictionary", mw)
40 | mw.form.menuEdit.addSeparator()
41 | mw.form.menuEdit.addMenu(mw.edit_cambridge_submenu)
42 | # Single word 
43 | mw.create_notes_from_link_action = QAction(mw)
44 | mw.create_notes_from_link_action.setText("Create new note(s) from link")
45 | mw.create_notes_from_link_action.setToolTip("Fetch word definitions from provided link.")
46 | mw.create_notes_from_link_action.setShortcut(CREATE_NEW_NOTES_SHORTCUT)
47 | 
48 | mw.create_notes_from_link_action.triggered.connect(ask_user_for_link)
49 | mw.edit_cambridge_submenu.addAction(mw.create_notes_from_link_action)
50 | 
51 | # Word list - saved
52 | mw.parse_saved_wl_action = QAction(mw)
53 | mw.parse_saved_wl_action.setText("Fetch new words from user wordlists")
54 | mw.parse_saved_wl_action.setToolTip("Fetch new words from user wordlists")
55 | mw.parse_saved_wl_action.triggered.connect(parse_saved_wl)
56 | mw.edit_cambridge_submenu.addAction(mw.parse_saved_wl_action)
57 | 
58 | # Addon settings
59 | mw.edit_cambridge_submenu.addSeparator()
60 | mw.open_main_windows_action = QAction(mw)
61 | mw.open_main_windows_action.setText("Cambridge Addon")
62 | mw.open_main_windows_action.setToolTip("Open Cambridge Addon main window.")
63 | 
64 | mw.open_main_windows_action.triggered.connect(open_main_windows_addon)
65 | mw.edit_cambridge_submenu.addAction(mw.open_main_windows_action)
66 | 
67 | mw.cddownloader = CDDownloader()
68 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto eol=lf
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | *.png   binary
47 | 
48 | ###############################################################################
49 | # diff behavior for common document formats
50 | # 
51 | # Convert binary document formats to text before diffing them. This feature
52 | # is only available from the command line. Turn it on by uncommenting the 
53 | # entries below.
54 | ###############################################################################
55 | #*.doc   diff=astextplain
56 | #*.DOC   diff=astextplain
57 | #*.docx  diff=astextplain
58 | #*.DOCX  diff=astextplain
59 | #*.dot   diff=astextplain
60 | #*.DOT   diff=astextplain
61 | #*.pdf   diff=astextplain
62 | #*.PDF   diff=astextplain
63 | #*.rtf   diff=astextplain
64 | #*.RTF   diff=astextplain
65 | 


--------------------------------------------------------------------------------
/styles.py:
--------------------------------------------------------------------------------
 1 | model_css = '''
 2 |     .card {
 3 |         font-family: arial;
 4 |         font-size: 20px;
 5 |         text-align: center;
 6 |         color: black;
 7 |         background-color:
 8 |         white;
 9 |         }
10 |     .from {
11 |         font-style: italic;
12 |     }'''
13 | 
14 | std_word = """
15 |     <h3 style="text-align: center;"><a href="https://dictionary.cambridge.org/dictionary/english/{{Word}}">{{Word}}</a></h3>
16 |     <blockquote>
17 |     <div style="text-align: center;">{{Grammar}} {{Pronunciation}}</div>
18 |     </blockquote>
19 |     <div style="text-align: center;">{{Meaning}}</div>
20 |     <br>
21 |     <div style="text-align: center;font-style: italic;">{{Examples}}</div>
22 |     """
23 | 
24 | std_word_def_sound = """
25 |     <h3 style="text-align: center;"><a href="https://dictionary.cambridge.org/dictionary/english/{{Word}}">{{Word}}</a></h3>   
26 |     <blockquote>
27 |     <div style="text-align: center;">{{Grammar}} {{Pronunciation}}</div>
28 |     </blockquote>
29 |     <div style="text-align: center;">{{Meaning}}</div>
30 |     <br>
31 |     <div style="text-align: center;font-style: italic;">{{Examples}}</div>
32 |     <hr>
33 |     <div style="text-align: center;">{{Picture}}</div>
34 |     <div style="text-align: center;">{{Definition}}</div>
35 |     <div style="text-align: center;">{{Audio}}</div>
36 |     <sup style="text-align: center;">{{SynonymAntonym}}<sup>
37 |     """
38 | 
39 | std_def = """
40 |     <div style="text-align: center;">{{Picture}}</div>
41 |     <div style="text-align: center;">{{Definition}}</div>
42 |     """
43 | 
44 | std_def_word_sound = """
45 |     <div style="text-align: center;">{{Picture}}</div>
46 |     <div style="text-align: center;">{{Definition}}</div>
47 |     <hr>
48 |     <h3 style="text-align: center;"><a href="https://dictionary.cambridge.org/dictionary/english/{{Word}}">{{Word}}</a></h3> 
49 |     <blockquote>
50 |     <div style="text-align: center;">{{Grammar}} {{Pronunciation}}</div>
51 |     </blockquote>
52 |     <div style="text-align: center;">{{Meaning}}</div>
53 |     <br>
54 |     <div style="text-align: center;font-style: italic;">{{Examples}}</div>
55 |     <hr>
56 |     <div style="text-align: center;">{{Audio}}</div>
57 |     <sup style="text-align: center;">{{SynonymAntonym}}<sup>
58 |     """
59 | 
60 | std_sound = """
61 |     <div style="text-align: center;">Listen.{{Audio}}</div>
62 |     """
63 | 
64 | std_sound_word_def = """
65 |     <div style="text-align: center;">Listen.{{Audio}}</div>
66 |     <hr>
67 |     <h3 style="text-align: center;"><a href="https://dictionary.cambridge.org/dictionary/english/{{Word}}">{{Word}}</a></h3>    
68 |     <blockquote>
69 |     <div style="text-align: center;">{{Grammar}} {{Pronunciation}}</div>
70 |     </blockquote>
71 |     <div style="text-align: center;">{{Picture}}</div>
72 |     <div style="text-align: center;">{{Meaning}}</div>
73 |     <br>
74 |     <div style="text-align: center;font-style: italic;">{{Examples}}</div>
75 |     <sup style="text-align: center;">{{SynonymAntonym}}<sup>
76 |     """
77 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: Publish on ankiweb.net
 4 | 
 5 | # Controls when the action will run. 
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the master branch
 8 |   push:
 9 |     branches: [ master ]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
15 | jobs:
16 |   # This workflow contains a single job called "build"
17 |   build:
18 |     # The type of runner that the job will run on
19 |     runs-on: ubuntu-latest
20 | 
21 |     # Steps represent a sequence of tasks that will be executed as part of the job
22 |     steps:
23 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 |       - uses: actions/checkout@v3
25 |       - uses: actions/setup-python@v4
26 |       
27 |       - name: Archive all repo for sending
28 |         run: |
29 |           zip -r $GITHUB_WORKSPACE/$GITHUB_SHA.zip . -x '*.git*' '.github',
30 |           ls -d -1 "$PWD/"**/
31 |           ls -l
32 | 
33 |       # - name: Get csrf token from Anki
34 |       #   id: authRequest
35 |       #   uses: fjogeleit/http-request-action@master
36 |       #   with:
37 |       #     url: 'https://ankiweb.net/account/login'
38 |       #     method: 'GET'
39 |     
40 |       # - name: Retrieve CSRF token
41 |       #   run: echo ${{ steps.authRequest.outputs.response }} | grep  -oP 'csrf_token" value="\K([^"]+)' > token.txt
42 |           
43 |       - name: Authenticate with Anki
44 |         env:
45 |           ANKI_LOGIN: ${{ secrets.ANKILOGI_N }}
46 |           ANKI_PASSWORD: ${{ secrets.ANKIPASSWORD }}
47 |           
48 |         run: |
49 |           curl -d $'\n\u0010'$ANKI_LOGIN$'\u0012\u000'$ANKI_PASSWORD https://ankiweb.net/svc/account/login -c $GITHUB_WORKSPACE/cookie -b 'ankiweb=login' > null
50 |           echo "ANKI_COOKIE="$(cat $GITHUB_WORKSPACE/cookie | grep -oP '^#HttpOnly_ankiweb.net.+ankiweb\s+\K.+$') >> $GITHUB_ENV
51 |         
52 |         
53 |       - name: Upload to anki
54 |         run: >
55 |           curl
56 |           -F 'title=Cambridge Dictionary'
57 |           -F 'tags=cambridge, addon, word lists'
58 |           -F 'supportURL=https://github.com/am-silex/anki_cambridge'
59 |           -F 'minVer0=0'
60 |           -F 'maxVer0=0'
61 |           -F "desc=Anki Add-on for downloading words definitions and associating resources (pronunciation, images, etc) from Cambridge Dictionary website. What it's done (so far): Creating notes from link to a word (word title, definition, grammar, IPA, sound, meanings, examples) Fetching words from your word lists (with deleting words after successful fetching). Anyone willing to improve my idea and contribute is welcome on addon's GitHub: https://github.com/am-silex/anki_cambridge"
62 |           -F 'id=1875288089'
63 |           -F 'submit=Update'
64 |           -F 'v21file0=@${{ github.workspace }}/${{ github.sha }}.zip'
65 |           -b 'ankiweb=${{ env.ANKI_COOKIE }}'
66 |           -v
67 |           https://ankiweb.net/shared/upload
68 |           
69 | 


--------------------------------------------------------------------------------
/mediafile_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | #
  3 | # Copyright © 2012–14 Roland Sieker <ospalh@gmail.com>
  4 | #
  5 | # Based on deurl-files.py by  Damien Elmes <anki@ichi2.net>
  6 | #
  7 | # License: GNU AGPL, version 3 or later;
  8 | # http://www.gnu.org/copyleft/agpl.html
  9 | #
 10 | 
 11 | """
 12 | Helper function to deal with file names.
 13 | """
 14 | 
 15 | import os
 16 | import re
 17 | import shutil
 18 | import unicodedata
 19 | 
 20 | from aqt import mw
 21 | from anki.utils import isMac, stripHTML
 22 | 
 23 | 
 24 | class dl_entry():
 25 |     def __init__(self,abspath,base_name):
 26 |         self.base_name      = base_name
 27 |         self.file_extension = os.path.splitext(abspath)[1].strip()
 28 |         self.file_path      = os.path.abspath(abspath)
 29 | 
 30 | def free_media_name(base, end):
 31 |     """Return a useful media name
 32 | 
 33 |     Return a pair of a file name that can be used for the media file,
 34 |     and the whole file path. The name is based on the base name and
 35 |     end, but doesn’t exist, nor does it clash with another file
 36 |     different only in upper/lower case.
 37 |     If no name can be found, a ValueError is raised.
 38 |     """
 39 |     base = stripHTML(base)
 40 |     # Strip the ‘invalidFilenameChars’ by hand.
 41 |     base = re.sub(r'[\\/:\*?\'"<>\|]', '', base)
 42 |     base = unicodedata.normalize('NFC', base)
 43 |     # Looks like the normalization issue has finally been
 44 |     # solved. Always use NFC versions of file names now.
 45 |     mdir = mw.col.media.dir()
 46 |     if not exists_lc(mdir, base + end):
 47 |         return os.path.join(mdir, base + end), base + end
 48 |     for i in range(1, 10000):
 49 |         # Don't be silly. Give up after 9999 tries (by falling out of
 50 |         # this loop).
 51 |         long_name = '{0}_{1}{2}'.format(base, i, end)
 52 |         if not exists_lc(mdir, long_name):
 53 |             return os.path.join(mdir, long_name), long_name
 54 |     # The only way we can have arrived here is by unsuccessfully
 55 |     # trying the 10000 names.
 56 |     raise ValueError('Could not find free name.')
 57 | 
 58 | def exists_lc(path, name):
 59 |     """Test if file name clashes with name of extant file.
 60 | 
 61 |     On Mac OS X we, simply check if the file exists.
 62 |     On other systems, we check if the name would clashes with an
 63 |     existing file’s name. That is, we check for files that have the same
 64 |     name when both are pulled to lower case and Unicode normalized.
 65 |     """
 66 |     # The point is that like this syncing from Linux to Macs/Windows
 67 |     # and from Linux/Windows to Macs should work savely. Not much to
 68 |     # do on Macs, then.
 69 |     if isMac:
 70 |         return os.path.exists(os.path.join(path, name))
 71 |     ln_name = unicodedata.normalize('NFC', name.lower())
 72 |     for fname in os.listdir(path):
 73 |         if unicodedata.normalize('NFC', fname.lower()) == ln_name:
 74 |             return True
 75 |     # After the loop, none found.
 76 |     return False
 77 | 
 78 | def unmunge_to_mediafile(dl_entry):
 79 |     """
 80 |     Move the data to the media folder.
 81 | 
 82 |     Determine a free media name and move the data there from the
 83 |     tempfile.
 84 |     """
 85 |     try:
 86 |         media_path, media_file_name = free_media_name(
 87 |             dl_entry.base_name, dl_entry.file_extension)
 88 |         shutil.copy(dl_entry.file_path, media_path)
 89 |     except :
 90 |         media_path =''
 91 |         media_file_name =''
 92 |         pass
 93 |     
 94 |     return media_file_name
 95 | 
 96 | def get_file_entry(file,base_name):
 97 |     f_entry = dl_entry(file,base_name)
 98 |     return f_entry
 99 | 
100 | 


--------------------------------------------------------------------------------
/download_entry.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | #
  3 | # Copyright © 2015 Paul Hartmann <phaaurlt@gmail.com>
  4 | # Copyright © 2012–15 Roland Sieker <ospalh@gmail.com>
  5 | #
  6 | # License: GNU AGPL, version 3 or later;
  7 | # http://www.gnu.org/copyleft/agpl.html
  8 | 
  9 | import os
 10 | 
 11 | from .blacklist import add_black_hash
 12 | from .processors import processor
 13 | from .mediafile_utils import unmunge_to_mediafile
 14 | 
 15 | if processor:
 16 |     import pydub
 17 |     # See processors/__init__.py. We try the import there. If we have
 18 |     # a processor, this import should work.
 19 | 
 20 | class DownloadEntry(object):
 21 |     """Data about a single file downloaded by a downloader"""
 22 |     def __init__(self, field_data, file_path, extras, icon):
 23 |         self.file_path = file_path
 24 |         # Absolute file path of the downloaded audio file
 25 |         self.word = field_data.word
 26 |         self.word_field_name = field_data.word_field_name
 27 |         self.audio_field_name = field_data.audio_field_name
 28 |         self.file_extension = '.mp3'
 29 |         # The file extension (with dot)
 30 |         self.extras = extras
 31 |         # A dict with strings of interesting informations, like
 32 |         # meaning numbers or name of speaker. Usually contains the
 33 |         # source.
 34 |         self.icon = icon
 35 |         # The downloader’s favicon
 36 |         self.action = Action.Add
 37 | 
 38 |     @property
 39 |     def display_word(self):
 40 |         return self.word
 41 | 
 42 |     @property
 43 |     def base_name(self):
 44 |         return self.word
 45 | 
 46 |     @property
 47 |     def entry_hash(self):
 48 |         return None
 49 |         # We are back to the way where the downloader checks
 50 |         # whether the file has a bad hash. This now doubles as the
 51 |         # old show_skull_and_bones. We show that button when this
 52 |         # has an interesting value. And that is set in JpodDownleadEntry
 53 | 
 54 |     def process(self):
 55 |         """Normalize &c. the audio file, if possible
 56 | 
 57 |         When we have an audio processor, process the file
 58 |         (i.e. normalize, remove silence, convert to preferred format)
 59 |         and update self.
 60 |         """
 61 |         if processor:
 62 |             try:
 63 |                 new_fp, new_sffx = processor.process(self)
 64 |             except pydub.exceptions.CouldntDecodeError:
 65 |                 self.action = Action.Delete
 66 |             else:
 67 |                 self.file_path = new_fp
 68 |                 self.file_extension = new_sffx
 69 | 
 70 |     def dispatch(self, note):
 71 |         """Do what should be done with the downloaded file
 72 | 
 73 |         Depending on self.action, do that action.
 74 | 
 75 |         * That is, move the file do the media folder if we want it
 76 |           on the note or just want to keep it.
 77 |         * Add it to the note if that’s what we want.
 78 |         * Delete it if we want just delete or blacklist it.
 79 |         * Blacklist the hash if that’s what we want."""
 80 |         if self.action == Action.Add or self.action == Action.Keep:
 81 |             media_fn = unmunge_to_mediafile(self)
 82 |             if self.action == Action.Add:
 83 |                 note[self.audio_field_name] += '[sound:' + media_fn + ']'
 84 |         if self.action == Action.Delete or self.action == Action.Blacklist:
 85 |             os.remove(self.file_path)
 86 |         if self.action == Action.Blacklist:
 87 |             add_black_hash(self.entry_hash)
 88 | 
 89 | 
 90 | class JpodDownloadEntry(DownloadEntry):
 91 |     """Data about a single file downloaded by a downloader"""
 92 |     def __init__(
 93 |             self, japanese_field_data, file_path, extras, icon, file_hash):
 94 |         DownloadEntry.__init__(
 95 |             self, japanese_field_data, file_path, extras, icon)
 96 |         self.kanji = japanese_field_data.kanji
 97 |         self.kana = japanese_field_data.kana
 98 |         self.hash_ = file_hash
 99 | 
100 |     @property
101 |     def base_name(self):
102 |         if self.kana and self.kana != self.kanji:
103 |             return u"{kanji}_{kana}".format(kanji=self.kanji, kana=self.kana)
104 |         return u"{kanji}".format(kanji=self.kanji)
105 | 
106 |     @property
107 |     def display_word(self):
108 |         if self.kana and self.kana != self.kanji:
109 |             return u"{kanji}（{kana}）".format(kanji=self.kanji, kana=self.kana)
110 |             # N.B.: those are “full width” (CJK/quad) parentheses
111 |         return u"{kanji}".format(kanji=self.kanji)
112 | 
113 |     @property
114 |     def entry_hash(self):
115 |         return self.hash_
116 | 
117 | 
118 | class Action(object):
119 |     Add, Keep, Delete, Blacklist = range(0, 4)
120 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Build results
 17 | [Dd]ebug/
 18 | [Dd]ebugPublic/
 19 | [Rr]elease/
 20 | [Rr]eleases/
 21 | x64/
 22 | x86/
 23 | [Aa][Rr][Mm]/
 24 | [Aa][Rr][Mm]64/
 25 | bld/
 26 | [Bb]in/
 27 | [Oo]bj/
 28 | [Ll]og/
 29 | 
 30 | # Visual Studio 2015/2017 cache/options directory
 31 | #.vs/
 32 | # Uncomment if you have tasks that create the project's static files in wwwroot
 33 | #wwwroot/
 34 | 
 35 | # Visual Studio 2017 auto generated files
 36 | Generated\ Files/
 37 | 
 38 | # MSTest test Results
 39 | [Tt]est[Rr]esult*/
 40 | [Bb]uild[Ll]og.*
 41 | 
 42 | # NUNIT
 43 | *.VisualState.xml
 44 | TestResult.xml
 45 | 
 46 | # Build Results of an ATL Project
 47 | [Dd]ebugPS/
 48 | [Rr]eleasePS/
 49 | dlldata.c
 50 | 
 51 | 
 52 | # Benchmark Results
 53 | BenchmarkDotNet.Artifacts/
 54 | 
 55 | # .NET Core
 56 | project.lock.json
 57 | project.fragment.lock.json
 58 | artifacts/
 59 | 
 60 | # StyleCop
 61 | StyleCopReport.xml
 62 | 
 63 | # Files built by Visual Studio
 64 | *_i.c
 65 | *_p.c
 66 | *_h.h
 67 | *.ilk
 68 | *.meta
 69 | *.obj
 70 | *.iobj
 71 | *.pch
 72 | *.pdb
 73 | *.ipdb
 74 | *.pgc
 75 | *.pgd
 76 | *.rsp
 77 | *.sbr
 78 | *.tlb
 79 | *.tli
 80 | *.tlh
 81 | *.tmp
 82 | *.tmp_proj
 83 | *_wpftmp.csproj
 84 | *.log
 85 | *.vspscc
 86 | *.vssscc
 87 | .builds
 88 | *.pidb
 89 | *.svclog
 90 | *.scc
 91 | *.pyproj
 92 | 
 93 | # Chutzpah Test files
 94 | _Chutzpah*
 95 | 
 96 | # Visual C++ cache files
 97 | ipch/
 98 | *.aps
 99 | *.ncb
100 | *.opendb
101 | *.opensdf
102 | *.sdf
103 | *.cachefile
104 | *.VC.db
105 | *.VC.VC.opendb
106 | 
107 | # Visual Studio profiler
108 | *.psess
109 | *.vsp
110 | *.vspx
111 | *.sap
112 | 
113 | # Visual Studio Trace Files
114 | *.e2e
115 | 
116 | # TFS 2012 Local Workspace
117 | $tf/
118 | 
119 | # Guidance Automation Toolkit
120 | *.gpState
121 | 
122 | # ReSharper is a .NET coding add-in
123 | _ReSharper*/
124 | *.[Rr]e[Ss]harper
125 | *.DotSettings.user
126 | 
127 | # JustCode is a .NET coding add-in
128 | .JustCode
129 | 
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 | 
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 | 
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 | 
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 | 
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 | 
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 | 
153 | # Web workbench (sass)
154 | .sass-cache/
155 | 
156 | # Installshield output folder
157 | [Ee]xpress/
158 | 
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 | 
169 | # Click-Once directory
170 | publish/
171 | 
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 | 
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 | 
185 | # NuGet Packages
186 | *.nupkg
187 | # The packages folder can be ignored because of Package Restore
188 | **/[Pp]ackages/*
189 | # except build/, which is used as an MSBuild target.
190 | !**/[Pp]ackages/build/
191 | # Uncomment if necessary however generally it will be regenerated when needed
192 | #!**/[Pp]ackages/repositories.config
193 | # NuGet v3's project.json files produces more ignorable files
194 | *.nuget.props
195 | *.nuget.targets
196 | 
197 | # Microsoft Azure Build Output
198 | csx/
199 | *.build.csdef
200 | 
201 | # Microsoft Azure Emulator
202 | ecf/
203 | rcf/
204 | 
205 | # Windows Store app package directories and files
206 | AppPackages/
207 | BundleArtifacts/
208 | Package.StoreAssociation.xml
209 | _pkginfo.txt
210 | *.appx
211 | 
212 | # Visual Studio cache files
213 | # files ending in .cache can be ignored
214 | *.[Cc]ache
215 | # but keep track of directories ending in .cache
216 | !?*.[Cc]ache/
217 | 
218 | # Others
219 | ClientBin/
220 | ~$*
221 | *~
222 | *.dbmdl
223 | *.dbproj.schemaview
224 | *.jfm
225 | *.pfx
226 | *.publishsettings
227 | orleans.codegen.cs
228 | 
229 | # Including strong name files can present a security risk
230 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
231 | #*.snk
232 | 
233 | # Since there are multiple workflows, uncomment next line to ignore bower_components
234 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
235 | #bower_components/
236 | 
237 | # RIA/Silverlight projects
238 | Generated_Code/
239 | 
240 | # Backup & report files from converting an old project file
241 | # to a newer Visual Studio version. Backup files are not needed,
242 | # because we have git ;-)
243 | _UpgradeReport_Files/
244 | Backup*/
245 | UpgradeLog*.XML
246 | UpgradeLog*.htm
247 | ServiceFabricBackup/
248 | *.rptproj.bak
249 | 
250 | # SQL Server files
251 | *.mdf
252 | *.ldf
253 | *.ndf
254 | 
255 | # Business Intelligence projects
256 | *.rdl.data
257 | *.bim.layout
258 | *.bim_*.settings
259 | *.rptproj.rsuser
260 | *- Backup*.rdl
261 | 
262 | # Microsoft Fakes
263 | FakesAssemblies/
264 | 
265 | # GhostDoc plugin setting file
266 | *.GhostDoc.xml
267 | 
268 | # Node.js Tools for Visual Studio
269 | .ntvs_analysis.dat
270 | node_modules/
271 | 
272 | # Visual Studio 6 build log
273 | *.plg
274 | 
275 | # Visual Studio 6 workspace options file
276 | *.opt
277 | 
278 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
279 | *.vbw
280 | 
281 | # Visual Studio LightSwitch build output
282 | **/*.HTMLClient/GeneratedArtifacts
283 | **/*.DesktopClient/GeneratedArtifacts
284 | **/*.DesktopClient/ModelManifest.xml
285 | **/*.Server/GeneratedArtifacts
286 | **/*.Server/ModelManifest.xml
287 | _Pvt_Extensions
288 | 
289 | # Paket dependency manager
290 | .paket/paket.exe
291 | paket-files/
292 | 
293 | # FAKE - F# Make
294 | .fake/
295 | 
296 | # JetBrains Rider
297 | .idea/
298 | *.sln.iml
299 | 
300 | # CodeRush personal settings
301 | .cr/personal
302 | 
303 | # Python Tools for Visual Studio (PTVS)
304 | __pycache__/
305 | *.pyc
306 | 
307 | # Cake - Uncomment if you are using it
308 | # tools/**
309 | # !tools/packages.config
310 | 
311 | # Tabs Studio
312 | *.tss
313 | 
314 | # Telerik's JustMock configuration file
315 | *.jmconfig
316 | 
317 | # BizTalk build output
318 | *.btp.cs
319 | *.btm.cs
320 | *.odx.cs
321 | *.xsd.cs
322 | 
323 | # OpenCover UI analysis results
324 | OpenCover/
325 | 
326 | # Azure Stream Analytics local run output
327 | ASALocalRun/
328 | 
329 | # MSBuild Binary and Structured Log
330 | *.binlog
331 | 
332 | # NVidia Nsight GPU debugger configuration file
333 | *.nvuser
334 | 
335 | # MFractors (Xamarin productivity tool) working folder
336 | .mfractor/
337 | 
338 | # Local History for Visual Studio
339 | .localhistory/
340 | 
341 | # BeatPulse healthcheck temp database
342 | healthchecksdb
343 | 
344 | #User-defined files
345 | snippets.txt
346 | _old/
347 | .vs
348 | env/
349 | *.sln
350 | config.json
351 | meta.json
352 | word_model.txt
353 | .gitattributes
354 | .gitignore
355 | snippets.txt


--------------------------------------------------------------------------------
/get_fields.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | #
  3 | # Copyright © 2012–15 Roland Sieker <ospalh@gmail.com>
  4 | #
  5 | # License: GNU AGPL, version 3 or later;
  6 | # http://www.gnu.org/copyleft/agpl.html
  7 | 
  8 | """
  9 | Extract field data to download.
 10 | """
 11 | 
 12 | from collections import namedtuple
 13 | import re
 14 | 
 15 | from aqt import mw
 16 | 
 17 | from .field_data import FieldData, JapaneseFieldData
 18 | 
 19 | 
 20 | # # Change these to mach the field names of your decks. Make sure to
 21 | # # not use capital letters. We compare these to lower-case only
 22 | # # versions of the field names. When these lists contain upper-case
 23 | # # letters, no field will ever be matched and nothing will be
 24 | # # downloaded.
 25 | expression_fields = ['expression', 'word']
 26 | # Fields we get the ‘normal’ download text from.
 27 | #
 28 | # Text from these fields is used by most downloaders. When no field is
 29 | # found here, we use the first field.
 30 | 
 31 | 
 32 | reading_keys = ['reading', 'kana', 'かな', '仮名']
 33 | # Fields we get our Japanese text from.
 34 | #
 35 | # For Japanesepod we use these fields as source. A ‘Reading’ field is
 36 | # typically filled automatically by the Japanese Support add-on in a
 37 | # useful way (that is, with the reading in square brackets).
 38 | 
 39 | 
 40 | audio_field_keys = ['audio', 'sound']
 41 | # Fields we put our downloaded sounds in. Don’t try crazy stuff here.
 42 | 
 43 | split_kanji_kana = False
 44 | # Replace ‘False’ with ‘True’ when you have no kanji in your reading field
 45 | 
 46 | # Change this at your own risk.
 47 | field_name_re = r'{{(?:[/^#]|[^:}]+:|)([^:}{]*%s[^:}{]*)}}'
 48 | 
 49 | 
 50 | def uniqify_list(seq):
 51 |     """Return a copy of the list with every element appearing only once."""
 52 |     # From http://www.peterbe.com/plog/uniqifiers-benchmark
 53 |     no_dupes = []
 54 |     [no_dupes.append(i) for i in seq if not no_dupes.count(i)]
 55 |     return no_dupes
 56 | 
 57 | 
 58 | def field_data(note, audio_field, reading=False):
 59 |     """Return FieldData when we have a source field
 60 | 
 61 |     Return FieldData when we have a matching source field for our
 62 |     audio field.  """
 63 |     def return_data(idx):
 64 |         source_name = field_names[idx]
 65 |         if reading:
 66 |             return JapaneseFieldData(
 67 |                 source_name, audio_field, note[source_name])
 68 |         else:
 69 |             return FieldData(
 70 |                 source_name, audio_field, note[source_name])
 71 | 
 72 |     a_name = audio_field.lower()
 73 |     field_names = [item[0] for item in note.items()]
 74 |     f_names = [fn.lower() for fn in field_names]
 75 |     # First, look for just audio fields
 76 |     for afk in audio_field_keys:
 77 |         if a_name == afk:
 78 |             if reading:
 79 |                 sources_list = reading_keys
 80 |             else:
 81 |                 sources_list = expression_fields
 82 |             for cnd in sources_list:
 83 |                 for idx, lname in enumerate(f_names):
 84 |                     if cnd == lname:
 85 |                         return return_data(idx)
 86 |             # At this point: The target name is good, but we found no
 87 |             # source name.
 88 |             if not reading:
 89 |                 # Don't give for most languages. Simply use the first
 90 |                 # field. That should work for a lot of people
 91 |                 return return_data(0)
 92 |             else:
 93 |                 # But that doesn't really work for Japanese.
 94 |                 raise KeyError('No source name found (case 1)')
 95 |         # This point: target name is not exactly the field name
 96 |         if afk not in a_name:
 97 |             # And not a substring either
 98 |             continue
 99 |         # Here: the field name contains an audio or sound.
100 |         # Mangle the name as described. For the reading case we get a
101 |         # list. So do a list for the other case as well.
102 |         if reading:
103 |             sources_list = [a_name.replace(afk, rk) for rk in reading_keys]
104 |         else:
105 |             # Here the tricky bit is to remove the right number of '_'
106 |             # or ' ' characters, 0 or 1, but not 2. What we want is:
107 |             # ExampleAudio -> Example
108 |             # Example_Audio -> Example
109 |             # Audio_Example -> Example
110 |             # but
111 |             # Another_Audio_Example -> Another_Example, not Another_Example
112 |             # While a bit tricky, this is not THAT hard to do. (Not
113 |             # lookbehind needed.)
114 |             sources_list = [
115 |                 re.sub(r'[\s_]{0}|{0}[\s_]?'.format(re.escape(afk)),
116 |                        '', a_name, count=1, flags=re.UNICODE)]
117 |         for cnd in sources_list:
118 |             for idx, lname in enumerate(f_names):
119 |                 if cnd == lname:
120 |                     return return_data(idx)
121 |         # We do have audio or sound as sub-string but did not find a
122 |         # maching field.
123 |         raise KeyError('No source field found. (case 2)')
124 |     # No audio field at all.
125 |     raise KeyError('No source field found. (case 3)')
126 | 
127 | 
128 | def field_data_from_kanji_kana(note, fn):
129 |     # Do the search twice
130 |     base_fd = field_data(note, fn)
131 |     # base_fd contains the kanji
132 |     read_fd = field_data(note, fn, True)
133 |     # read_fd is the right type but needs to be updated.
134 |     read_fd.kanji = base_fd.word
135 |     read_fd.word = base_fd.word  # Not used, Set anyway.
136 |     read_fd.word_field_name = base_fd.word_field_name
137 |     return read_fd
138 | 
139 | 
140 | def get_side_fields(card, note):
141 |     """Return a list of FieldDatas for the currently visible side
142 | 
143 |     Go through the fields of the currently visible side and return
144 |     relevant data, as FieldData objects, for audio fields where we
145 |     have matching text fields."""
146 |     if 'question' == mw.reviewer.state:
147 |         template = card.template()['qfmt']
148 |     else:
149 |         template = card.template()['afmt']
150 |     audio_field_names = []
151 |     all_field_names = [item[0] for item in note.items()]
152 |     for afk in audio_field_keys:
153 |         # Append all fields in the current template/side that contain
154 |         # 'audio' or 'sound'
155 |         audio_field_names += re.findall(
156 |             field_name_re % afk, template, flags=re.IGNORECASE)
157 |         # We use the (old style) % operator rather than
158 |         # unicode.format() because we look for {}s in the re, which
159 |         # would get more complicated with format().
160 |     audio_field_names = uniqify_list(audio_field_names)
161 |     # Filter out non-existing fields.
162 |     audio_field_names = [
163 |         fn for fn in audio_field_names if fn in all_field_names]
164 |     field_data_list = []
165 |     for audio_field in audio_field_names:
166 |         try:
167 |             field_data_list.append(field_data(note, audio_field))
168 |         except (KeyError, ValueError):
169 |             # No or empty reading field
170 |             pass
171 |         if not split_kanji_kana:
172 |             try:
173 |                 field_data_list.append(
174 |                     field_data(note, audio_field, reading=True))
175 |             except (KeyError, ValueError):
176 |                 pass
177 |         else:
178 |             try:
179 |                 field_data_list.append(
180 |                     field_data_from_kanji_kana(note, audio_field))
181 |             except (KeyError, ValueError):
182 |                 pass
183 |     return field_data_list
184 | 
185 | 
186 | def get_note_fields(note):
187 |     """Return a list of FieldDatas for the note
188 | 
189 |     Go through the note’s fields and return relevant data, as
190 |     FieldData objects, for audio fields where we have matching text
191 |     fields."""
192 |     field_names = [item[0] for item in note.items()]
193 |     field_data_list = []
194 |     for afk in audio_field_keys:
195 |         for fn in field_names:
196 |             if afk not in fn.lower():
197 |                 continue
198 |             if not split_kanji_kana:
199 |                 try:
200 |                     field_data_list.append(field_data(note, fn, reading=True))
201 |                 except (KeyError, ValueError):
202 |                     # No or empty source field.
203 |                     pass
204 |             else:
205 |                 try:
206 |                     field_data_list.append(
207 |                         field_data_from_kanji_kana(note, fn))
208 |                 except (KeyError, ValueError):
209 |                     # No or empty source field.
210 |                     pass
211 |             try:
212 |                 field_data_list.append(field_data(note, fn))
213 |             except (KeyError, ValueError):
214 |                 # No or empty source field.
215 |                 pass
216 |     return field_data_list
217 | 


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | #
  3 | # License: GNU AGPL, version 3 or later;
  4 | # http://www.gnu.org/copyleft/agpl.html
  5 | 
  6 | """
  7 | Anki2 add-on to download card's fields with audio from Cambridge Dictionary
  8 | 
  9 | 
 10 | """
 11 | 
 12 | import os
 13 | from PyQt6.QtGui import QIcon
 14 | from PyQt6.QtWidgets import QAction, QMenu
 15 | 
 16 | 
 17 | from aqt import mw
 18 | from aqt.utils import tooltip
 19 | from anki.hooks import addHook
 20 | 
 21 | from .downloaders import downloaders
 22 | from .download_entry import DownloadEntry, Action
 23 | from .get_fields import get_note_fields, get_side_fields
 24 | from .language import language_code_from_card, language_code_from_editor
 25 | from .processors import processor
 26 | from .review_gui import review_entries
 27 | from .update_gui import update_data
 28 | 
 29 | CREATE_NEW_NOTE_SHORTCUT = "t"
 30 | # DOWNLOAD_SIDE_SHORTCUT = "t"
 31 | # DOWNLOAD_MANUAL_SHORTCUT = "Ctrl+t"
 32 | 
 33 | icons_dir = os.path.join(mw.pm.addonFolder(), 'downloadaudio', 'icons')
 34 | # Place were we keep our megaphone icon.
 35 | 
 36 | 
 37 | def do_download(note, field_data_list, language, hide_text=False):
 38 |     """
 39 |     Download audio data.
 40 | 
 41 |     Go through the list of words and list of sites and download each
 42 |     word from each site. Then call a function that asks the user what
 43 |     to do.
 44 |     """
 45 |     retrieved_entries = []
 46 |     for field_data in field_data_list:
 47 |         if field_data.empty:
 48 |             continue
 49 |         for dloader in downloaders:
 50 |             # Use a public variable to set the language.
 51 |             dloader.language = language
 52 |             try:
 53 |                 # Make it easer inside the downloader. If anything
 54 |                 # goes wrong, don't catch, or raise whatever you want.
 55 |                 dloader.download_files(field_data)
 56 |             except:
 57 |                 #  # Uncomment this raise while testing a new
 58 |                 #  # downloaders.  Also use the “For testing”
 59 |                 #  # downloaders list with your downloader in
 60 |                 #  # downloaders.__init__
 61 |                 # raise
 62 |                 continue
 63 |             retrieved_entries += dloader.downloads_list
 64 |     # Significantly changed the logic. Put all entries in one
 65 |     # list, do stuff with that list of DownloadEntries.
 66 |     for entry in retrieved_entries:
 67 |         # Do the processing before the reviewing now.
 68 |         entry.process()
 69 |     try:
 70 |         retrieved_entries = review_entries(note, retrieved_entries, hide_text)
 71 |         # Now just the dialog, which sets the fields in the entries
 72 |     except ValueError as ve:
 73 |         tooltip(str(ve))
 74 |     except RuntimeError as rte:
 75 |         if 'cancel' in str(rte):
 76 |             for entry in retrieved_entries:
 77 |                 entry.action = Action.Delete
 78 |         else:
 79 |             raise
 80 |     for entry in retrieved_entries:
 81 |         entry.dispatch(note)
 82 |     if any(entry.action == Action.Add for entry in retrieved_entries):
 83 |         note.flush()
 84 |         # We have to do different things here, for download during
 85 |         # review, we should reload the card and replay. When we are in
 86 |         # the add dialog, we do a field update there.
 87 |         rnote = None
 88 |         try:
 89 |             rnote = mw.reviewer.card.note()
 90 |         except AttributeError:
 91 |             # Could not get the note of the reviewer's card. Probably
 92 |             # not reviewing at all.
 93 |             return
 94 |         if note == rnote:
 95 |             # The note we have is the one we were reviewing, so,
 96 |             # reload and replay
 97 |             mw.reviewer.card.load()
 98 |             mw.reviewer.replayAudio()
 99 | 
100 | 
101 | def download_for_side():
102 |     """
103 |     Download audio for one side.
104 | 
105 |     Download audio for all audio fields on the currently visible card
106 |     side.
107 |     """
108 |     card = mw.reviewer.card
109 |     if not card:
110 |         return
111 |     note = card.note()
112 |     field_data = get_side_fields(card, note)
113 |     do_download(
114 |         note, field_data, language_code_from_card(card), hide_text=True)
115 | 
116 | 
117 | def download_for_note(ask_user=False, note=None, editor=None):
118 |     """
119 |     Download audio for all fields.
120 | 
121 |     Download audio for all fields of the note passed in or the current
122 |     note. When ask_user is true, show a dialog that lets the user
123 |     modify these texts.
124 |     """
125 |     if not note:
126 |         try:
127 |             card = mw.reviewer.card
128 |             note = card.note()
129 |         except AttributeError:
130 |             return
131 |         language_code = language_code_from_card(card)
132 |     else:
133 |         language_code = language_code_from_editor(note, editor)
134 |     field_data = get_note_fields(note)
135 |     if not field_data:
136 |         # Complain before we show the empty dialog.
137 |         tooltip('Nothing to download.')
138 |         return
139 | 
140 |     if ask_user:
141 |         try:
142 |             field_data, language_code = update_data(field_data, language_code)
143 |         except RuntimeError as rte:
144 |             if 'cancel' in str(rte):
145 |                 # User canceled. No need for the "Nothing downloaded"
146 |                 # message.
147 |                 return
148 |             else:
149 |                 # Don't know how to handle this after all
150 |                 raise
151 |     do_download(note, field_data, language_code)
152 | 
153 | 
154 | def download_manual():
155 |     """Do the download with the dialog before we go."""
156 |     download_for_note(ask_user=True)
157 | 
158 | 
159 | def download_off():
160 |     """Deactivate the download menus."""
161 |     mw.note_download_action.setEnabled(False)
162 |     mw.side_download_action.setEnabled(False)
163 |     mw.manual_download_action.setEnabled(False)
164 | 
165 | 
166 | def download_on():
167 |     """Activate the download menus."""
168 |     mw.note_download_action.setEnabled(True)
169 |     mw.side_download_action.setEnabled(True)
170 |     mw.manual_download_action.setEnabled(True)
171 | 
172 | 
173 | def editor_download_editing(self):
174 |     """Do the download when we are in the note editor."""
175 |     self.saveNow()
176 |     download_for_note(ask_user=True, note=self.note, editor=self)
177 |     # Fix for issue #10.
178 |     self.stealFocus = True
179 |     self.loadNote()
180 |     self.stealFocus = False
181 | 
182 | 
183 | def editor_add_download_editing_button(self):
184 |     """Add the download button to the editor"""
185 |     dl_button = self._addButton(
186 |         "download_audio",
187 |         lambda self=self: editor_download_editing(self),
188 |         tip=u"Download audio…")
189 |     dl_button.setIcon(
190 |         QIcon(os.path.join(icons_dir, 'download_note_audio.png')))
191 | 
192 | 
193 | # Either reuse an edit-media sub-menu created by another add-on
194 | # (probably the mhwave (ex sweep) add-on by Y.T.) or create that
195 | # menu. When we already have that menu, add a separator, otherwise
196 | # create that menu.
197 | 
198 | # try:
199 | #     mw.edit_media_submenu.addSeparator()
200 | # except AttributeError:
201 |     mw.edit_cambridge_submenu = QMenu(u"&Cambridge Dictionary", mw)
202 |     mw.form.menuEdit.addSeparator()
203 |     mw.form.menuEdit.addMenu(mw.edit_cambridge_submenu)
204 | 
205 | 
206 | mw.note_crattion_action = QAction(mw)
207 | mw.note_crattion_action.setText(u"Create new note fromm link")
208 | # mw.note_download_action.setIcon(QIcon(os.path.join(icons_dir,
209 | #                                                    'download_note_audio.png')))
210 | mw.note_crattion_action.setToolTip(
211 |      "Create a new card, providing ling first.")
212 | mw.note_crattion_action.setShortcut(CREATE_NEW_NOTE_SHORTCUT)
213 | mw.note_crattion_action.triggered.connect(download_for_note)
214 | 
215 | # mw.side_download_action = QAction(mw)
216 | # mw.side_download_action.setText(u"Side audio")
217 | # mw.side_download_action.setIcon(
218 | #     QIcon(os.path.join(icons_dir, 'download_side_audio.png')))
219 | # mw.side_download_action.setToolTip(
220 | #     "Download audio for audio fields currently visible.")
221 | # mw.side_download_action.setShortcut(DOWNLOAD_SIDE_SHORTCUT)
222 | # mw.side_download_action.triggered.connect(download_for_side)
223 | #
224 | # mw.manual_download_action = QAction(mw)
225 | # mw.manual_download_action.setText(u"Manual audio")
226 | # mw.manual_download_action.setIcon(
227 | #     QIcon(os.path.join(icons_dir, 'download_audio_manual.png')))
228 | # mw.manual_download_action.setToolTip(
229 | #     "Download audio, editing the information first.")
230 | # mw.manual_download_action.setShortcut(DOWNLOAD_MANUAL_SHORTCUT)
231 | # mw.manual_download_action.triggered.connect(download_manual)
232 | 
233 | 
234 | mw.edit_media_submenu.addAction(mw.note_crattion_action)
235 | 
236 | # Todo: switch off at start and on when we get to reviewing.
237 | # # And start with the acitons off.
238 | # download_off()
239 | 
240 | 
241 | # addHook("setupEditorButtons", editor_add_download_editing_button)
242 | 


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python; coding: utf-8 -*-
  2 | #
  3 | # Copyright © 2012–15 Roland Sieker <ospalh@gmail.com>
  4 | # Copyright © 2015 Paul Hartmann <phaaurlt@gmail.com>
  5 | #
  6 | # License: GNU AGPL, version 3 or later;
  7 | # http://www.gnu.org/copyleft/agpl.html
  8 | 
  9 | 
 10 | '''
 11 | Class to download a files from a speaking dictionary or TTS service.
 12 | '''
 13 | 
 14 | 
 15 | import tempfile
 16 | import urllib.request, urllib.error, urllib.parse
 17 | import urllib.parse
 18 | from bs4 import BeautifulSoup as soup
 19 | from field_data import FieldData
 20 | 
 21 | # Make this work without PyQt
 22 | with_pyqt = True
 23 | try:
 24 |     from PyQt6.QtGui import QImage
 25 |     from PyQt6.QtCore import QSize, Qt
 26 | except ImportError:
 27 |     with_pyqt = False
 28 | 
 29 | 
 30 | def uniqify_list(seq):
 31 |     """Return a copy of the list with every element appearing only once."""
 32 |     # From http://www.peterbe.com/plog/uniqifiers-benchmark
 33 |     no_dupes = []
 34 |     [no_dupes.append(i) for i in seq if not no_dupes.count(i)]
 35 |     return no_dupes
 36 | 
 37 | 
 38 | class AudioDownloader(object):
 39 |     """
 40 |     Class to download a files from a dictionary or TTS service.
 41 | 
 42 |     This is the base class for the downloaders of spoken
 43 |     pronunciations.
 44 | 
 45 |     The derived classes must implement self.download_files()
 46 |     """
 47 |     def __init__(self):
 48 |         self.language = ''
 49 |         # The language used.
 50 |         # This is used as a public variable and set for every download.
 51 |         self.downloads_list = []
 52 |         # Store for downloaded data.
 53 |         # This is where self.download_files should store the results
 54 |         # (type DownloadEntry).
 55 |         self.url = ''
 56 |         # The base URL used for (the first step of) the download.
 57 |         self.icon_url = ''
 58 |         # URL to get the address of the site icon from.
 59 |         self.max_icon_size = 20
 60 |         # Max size we scale the site icon down to, if larger.
 61 |         self.user_agent = 'Mozilla/5.0'
 62 |         # User agent string that can be used for requests.
 63 |         # At least Google TTS won’t give out their translations unless
 64 |         # we pretend to be some typical browser.
 65 |         self.use_temp_files = False
 66 |         # Whether to use files created by tempfiles or not.
 67 |         # Where to write the downloaded files, in /tmp/ or into the Anki
 68 |         # media directory directly.
 69 |         # This is set to True by the “real” audio processor that does
 70 |         # normalization but doesn’t work for standard installs. On
 71 |         # typical installs this is kept False.)
 72 |         self.download_directory = None
 73 |         # Where to write the downloaded files.
 74 |         # If this is None or empty (i.e. “if not
 75 |         # self.download_directory”)  (and self.use_temp_files ==
 76 |         # False) we use the current directory.
 77 |         self.site_icon = None
 78 |         # The sites’s favicon.
 79 |         self.file_extension = '.mp3'
 80 |         # Most sites have mp3 files.
 81 |         self.word_data = []
 82 |         # List contains dictionary(s) of word's definition(s)
 83 |         # with fields: word_title, word_gram, word_pro_uk, word_pro_us, meanings
 84 |         self.base_url = ''
 85 |         # Base url to Cambridge Dictionary -
 86 | 
 87 |     def get_word_data(self, field_data):
 88 |         # Get data from Cambd and fill in word_data dict
 89 |         raise NotImplementedError("Use a class derived from this.")
 90 | 
 91 |     def download_files(self, field_data):
 92 |         """Downloader functon
 93 | 
 94 |         This is the main worker function. It has to be reimplemented
 95 |         by the derived classes.
 96 | 
 97 |         The input is the text to use for the download, the whole text
 98 |         (for most languages) and the text split into base (kanji) and
 99 |         ruby (reading, kana). split is set to true when we got the
100 |         text from a reading field and should use base and ruby rather
101 |         than word.
102 | 
103 |         This function should clear the self.downloads_list and try to
104 |         get pronunciation files from its source, put those into tempfiles,
105 |         and add a DownloadEntry object to self_downloads_lists for each of
106 |         the zero or more downloaded files. (Zero when the
107 |         self.language is wrong, there is no file &c.)
108 | 
109 |         """
110 |         raise NotImplementedError("Use a class derived from this.")
111 | 
112 |     def maybe_get_icon(self):
113 |         """
114 |         Get icon for the site as a QImage if we haven’t already.
115 | 
116 |         Get the site icon, either the 'rel="icon"' or the favicon, for
117 |         the web page at url or passed in as page_html and store it as
118 |         a QImage. This function can be called repeatedly and loads the
119 |         icon only once.
120 |         """
121 |         if self.site_icon:
122 |             return
123 |         if not with_pyqt:
124 |             self.site_icon = None
125 |             return
126 |         page_request = urllib.request.Request(self.icon_url)
127 |         if self.user_agent:
128 |             page_request.add_header('User-agent', self.user_agent)
129 |         page_response = urllib.request.urlopen(page_request)
130 |         if 200 != page_response.code:
131 |             self.get_favicon()
132 |             return
133 |         page_soup = soup(page_response, 'html.parser')
134 |         try:
135 |             icon_url = page_soup.find(
136 |                 name='link', attrs={'rel': 'icon'})['href']
137 |         except (TypeError, KeyError):
138 |             self.get_favicon()
139 |             return
140 |         # The url may be absolute or relative.
141 |         if not urllib.parse.urlsplit(icon_url).netloc:
142 |             icon_url = urllib.parse.urljoin(
143 |                 self.url, urllib.parse.quote(icon_url.encode('utf-8')))
144 |         icon_request = urllib.request.Request(icon_url)
145 |         if self.user_agent:
146 |             icon_request.add_header('User-agent', self.user_agent)
147 |         icon_response = urllib.request.urlopen(icon_request)
148 |         if 200 != icon_response.code:
149 |             self.site_icon = None
150 |             return
151 |         self.site_icon = QImage.fromData(icon_response.read())
152 |         max_size = QSize(self.max_icon_size, self.max_icon_size)
153 |         icon_size = self.site_icon.size()
154 |         if icon_size.width() > max_size.width() \
155 |                 or icon_size.height() > max_size.height():
156 |             self.site_icon = self.site_icon.scaled(
157 |                 max_size, Qt.KeepAspectRatio, Qt.SmoothTransformation)
158 | 
159 |     def get_favicon(self):
160 |         """
161 |         Get favicon for the site.
162 | 
163 |         This is called when the site_url can’t be loaded or when that
164 |         page doesn’t contain a link tag with rel set to icon (the new
165 |         way of doing site icons.)
166 |         """
167 |         if self.site_icon:
168 |             return
169 |         if not with_pyqt:
170 |             self.site_icon = None
171 |             return
172 |         ico_url = urllib.parse.urljoin(self.icon_url, "/favicon.ico")
173 |         ico_request = urllib.request.Request(ico_url)
174 |         if self.user_agent:
175 |             ico_request.add_header('User-agent', self.user_agent)
176 |         ico_response = urllib.request.urlopen(ico_request)
177 |         if 200 != ico_response.code:
178 |             self.site_icon = None
179 |             return
180 |         self.site_icon = QImage.fromData(ico_response.read())
181 |         max_size = QSize(self.max_icon_size, self.max_icon_size)
182 |         ico_size = self.site_icon.size()
183 |         if ico_size.width() > max_size.width() \
184 |                 or ico_size.height() > max_size.height():
185 |             self.site_icon = self.site_icon.scaled(
186 |                 max_size, Qt.KeepAspectRatio, Qt.SmoothTransformation)
187 | 
188 |     def get_data_from_url(self, url_in):
189 |         """
190 |         Return raw data loaded from an URL.
191 | 
192 |         Helper function. Put in an URL and it sets the agent, sends
193 |         the requests, checks that we got error code 200 and returns
194 |         the raw data only when everything is OK.
195 |         """
196 |         request = urllib.request.Request(url_in)
197 |         request.add_header('User-agent', self.user_agent)
198 |         response = urllib.request.urlopen(request)
199 |         if 200 != response.code:
200 |             raise ValueError(str(response.code) + ': ' + response.msg)
201 |         return response.read()
202 | 
203 |     def get_tempfile_from_url(self, url_in):
204 |         """
205 |         Download raw data from url and put into a tempfile
206 | 
207 |         Wrapper helper function aronud self.get_data_from_url().
208 |         """
209 |         data = self.get_data_from_url(url_in)
210 |         # We put the data into RAM first so that we don’t have to
211 |         # clean up the temp file when the get does not work. (Bad
212 |         # get_data raises all kinds of exceptions that fly through
213 |         # here.)
214 |         tfile = tempfile.NamedTemporaryFile(
215 |             delete=False, prefix='anki_audio_', suffix=self.file_extension)
216 |         tfile.write(data)
217 |         tfile.close()
218 |         return tfile.name
219 | 
220 |   
221 | 
222 |         
223 | 
224 | 
225 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from random import randint
  3 | import json
  4 | import socket
  5 | import ssl
  6 | 
  7 | from aqt import mw
  8 | from anki import notes
  9 | 
 10 | from . import styles
 11 | from ._names import *
 12 | from .mediafile_utils import *
 13 | 
 14 | from PyQt6.QtWidgets import QMessageBox
 15 | 
 16 | 
 17 | fields = ['Word','Examples','Definition','Audio','Picture','Pronunciation','Grammar','Meaning','SynonymAntonym']
 18 | 
 19 | def fill_note(word_entry, note):
 20 | 
 21 |     config = get_config()
 22 |     pronunciation_uk = config['pronunciation_uk'] if 'pronunciation_uk' in config else True
 23 |     pronunciation_us = config['pronunciation_us'] if 'pronunciation_us' in config else True
 24 | 
 25 |     note['Word']            = word_entry.word_title
 26 |     note['Examples']        = "<br> ".join(word_entry.word_examples)
 27 |     note['Definition']      = word_entry.word_specific
 28 |     note['Pronunciation']   = ""
 29 |     if pronunciation_uk:
 30 |         note['Pronunciation'] += word_entry.word_pro_uk + ' ' 
 31 |     if pronunciation_us:
 32 |         note['Pronunciation'] += word_entry.word_pro_us
 33 |     note['Grammar']         = word_entry.word_part_of_speech
 34 |     note['Meaning']         = word_entry.word_general
 35 |     audio_field = ''
 36 |     if word_entry.word_uk_media and pronunciation_uk:
 37 |         try:
 38 |             f_entry = get_file_entry(word_entry.word_uk_media,note['Word'])
 39 |             audio_field = audio_field + '[sound:' + unmunge_to_mediafile(f_entry)+'] '
 40 |         except :
 41 |             pass
 42 |     try:
 43 |         if word_entry.word_us_media and pronunciation_us:
 44 |             f_entry = get_file_entry(word_entry.word_us_media,note['Word'])
 45 |             audio_field = audio_field + '[sound:' + unmunge_to_mediafile(f_entry)+'] '
 46 |     except :
 47 |         pass        
 48 |     
 49 |     note['Audio'] = audio_field
 50 |     #Fetching picture
 51 |     #picture_name = word.get('Picture').split('/')[-1] if word.get('Picture') else ''
 52 |     picture_field = ''
 53 |     f_picture = word_entry.word_image
 54 |     if f_picture:
 55 |         f_entry = get_file_entry(f_picture,note['Word'])
 56 |         picture_field = '<img src="' + unmunge_to_mediafile(f_entry) + '">'
 57 |     note['Picture'] = picture_field
 58 |     return note
 59 | 
 60 | def add_word(word, model):
 61 |     # TODO: Use picture_name and sound_name to check
 62 |     #  if update is needed and don't download media if not
 63 |     collection = mw.col
 64 |     note = notes.Note(collection, model)
 65 |     note = fill_note(word, note)
 66 |     
 67 |     # TODO: Rewrite to use is_duplicate()
 68 |             #word_value = word.get('wordValue') if word.get('wordValue') else 'NO_WORD_VALUE'
 69 |             #dupes = collection.findDupes("en", word_value)
 70 |             ## a hack to support words with apostrophes
 71 |             #note_dupes1 = collection.findNotes("en:'%s'" % word_value)
 72 |             #note_dupes2 = collection.findNotes('en:"%s"' % word_value)
 73 |             #note_dupes = note_dupes1 + note_dupes2
 74 |     collection.addNote(note)
 75 |    
 76 | def add_word_to_collection(word_entry, collection):
 77 | 
 78 |     model = prepare_model(mw.col, fields, styles.model_css)
 79 |     note = notes.Note(collection, model)
 80 |     note = fill_note(word_entry, note)
 81 |     
 82 |     collection.addNote(note)
 83 | 
 84 | 
 85 |     #if not dupes and not note_dupes:
 86 |     #    collection.addNote(note)
 87 |     ## TODO: Update notes if translation or tags (user wordsets) changed
 88 |     #elif (note['picture_name'] or note['sound_name']) and note_dupes:
 89 |     #    # update existing notes with new pictures and sounds in case
 90 |     #    # they have been changed in LinguaLeo's UI
 91 |     #    for nid in note_dupes:
 92 |     #        note_in_db = notes.Note(collection, id=nid)
 93 |     #        # a dirty hack below until a new field in the model is introduced
 94 |     #        # put a space before or after a *sound* field of an existing note if you want it to be updated
 95 |     #        # if a note has no picture or sound, it will be updated anyway
 96 |     #        # TODO: Check if hack is still needed, remove if not
 97 |     #        sound_name = note_in_db['sound_name']
 98 |     #        sound_name = sound_name.replace("&nbsp;", " ")
 99 |     #        note_needs_update = sound_name != sound_name.strip()
100 |     #        if note['picture_name'] and (note_needs_update or not note_in_db['picture_name'].strip()):
101 |     #            note_in_db['picture_name'] = note['picture_name']
102 |     #        if note['sound_name'] and (note_needs_update or not note_in_db['sound_name'].strip()):
103 |     #            note_in_db['sound_name'] = note['sound_name']
104 |     #        note_in_db.flush()
105 |     ## TODO: Check if it is possible to update Anki's media collection to remove old (unused) media
106 | 
107 | def create_templates(collection):
108 |     template_Recognition = collection.models.newTemplate('Recognition')
109 |     template_Recognition['qfmt'] = styles.std_word
110 |     template_Recognition['afmt'] = styles.std_word_def_sound
111 |     template_Recall = collection.models.newTemplate('Recall')
112 |     template_Recall['qfmt'] = styles.std_def
113 |     template_Recall['afmt'] = styles.std_def_word_sound
114 |     template_Sound = collection.models.newTemplate('Sound')
115 |     template_Sound['qfmt'] = styles.std_sound
116 |     template_Sound['afmt'] = styles.std_sound_word_def
117 |     return (template_Recognition, template_Recall, template_Sound)
118 | 
119 | def create_new_model(collection, fields, model_css):
120 |     model = collection.models.new(CAMBRIDGE_MODEL)
121 |     #Seems like tag key is depricated
122 |     #model['tags'].append("Cambridge")
123 |     #model['css'] = model_css
124 |     for field in fields:
125 |         collection.models.addField(model, collection.models.newField(field))
126 |     template_Recognition, template_Recall, template_Sound = create_templates(collection)
127 |     collection.models.addTemplate(model, template_Recognition)
128 |     collection.models.addTemplate(model, template_Recall)
129 |     collection.models.addTemplate(model, template_Sound)
130 |     model['id'] = randint(100000, 1000000)  # Essential for upgrade detection
131 |     collection.models.update(model)
132 |     return model
133 | 
134 | def is_model_exist(collection, fields):
135 |     name_exist = CAMBRIDGE_MODEL in collection.models.allNames()
136 |     if name_exist:
137 |         fields_ok = collection.models.fieldNames(collection.models.byName(
138 |                                                 CAMBRIDGE_MODEL)) == fields
139 |     else:
140 |         fields_ok = False
141 |     return name_exist and fields_ok
142 | 
143 | def prepare_model(collection, fields, model_css):
144 |     """
145 |     Returns a model for our future notes.
146 |     Creates a deck to keep them.
147 |     """
148 |     if is_model_exist(collection, fields):
149 |         model = collection.models.byName(CAMBRIDGE_MODEL)
150 |     else:
151 |         model = create_new_model(collection, fields, model_css)
152 |     # TODO: Move Deck name to config?
153 |     # Create a deck "Cambridge" and write id to deck_id
154 |     model['did'] = collection.decks.id('Cambridge')
155 |     collection.models.setCurrent(model)
156 |     collection.models.save(model)
157 |     return model
158 | 
159 | def get_cambridge_model(collection):
160 |     """
161 |     Get model named defined in STANDARD_MODEL_WITH_AUDIO literal
162 |     """
163 |     return collection.models.byName(CAMBRIDGE_MODEL)
164 | 
165 | def is_valid_ascii(url):
166 |     """
167 |     Check an url if it is a valid ascii string
168 |     After the LinguaLeo update some images
169 |     have broken links with cyrillic characters
170 |     """
171 |     if url == '':
172 |         return True
173 |     try:
174 |         url.encode('ascii')
175 |     except:
176 |         return False
177 |     return True
178 | 
179 | def get_module_name():
180 | 
181 |     return __name__.split(".")[0]
182 | 
183 | def get_addon_dir():
184 |     root = mw.pm.addonFolder()
185 |     addon_dir = os.path.join(root, get_module_name())
186 |     return addon_dir
187 | 
188 | def get_cookies_path():
189 |     """
190 |     Returns a full path to cookies.txt in the user_files folder
191 |     :return:
192 |     """
193 |     # user_files folder in the current addon's dir
194 |     uf_dir = os.path.join(get_addon_dir(), 'user_files')
195 |     # Create a folder if doesn't exist
196 |     if not os.path.exists(uf_dir):
197 |         try:
198 |             os.makedirs(uf_dir)
199 |         except:
200 |             # TODO: Improve error handling
201 |             return None
202 |     return os.path.join(uf_dir, 'cookies.txt')
203 | 
204 | def get_config():
205 |     # Load config from config.json file
206 |     #if getattr(getattr(mw, "addonManager", None), "getConfig", None):
207 |     #    config = mw.addonManager.getConfig(get_module_name())
208 |     #else:
209 |         try:
210 |             config_file = os.path.join(get_addon_dir(), 'config.json')
211 |             with open(config_file, 'r') as f:
212 |                 config = json.loads(f.read())
213 |         except IOError:
214 |             try:
215 |                 config = {'cookie':'', 'pronunciation_uk': True, 'pronunciation_us': True}
216 |                 config_file = os.path.join(get_addon_dir(), 'config.json')
217 |                 with open(config_file, 'w') as f:
218 |                     json.dump(config, f, sort_keys=True, indent=2)
219 |                 f.close()
220 |                 with open(config_file, 'r') as f:
221 |                     config = json.loads(f.read())
222 |             except:
223 |                 config = None
224 |         return config
225 |     
226 | def update_config(config):
227 |     #if getattr(getattr(mw, "addonManager", None), "writeConfig", None):
228 |     #    mw.addonManager.writeConfig(get_module_name(), config)
229 |     #else:
230 |     try:
231 |         config_file = os.path.join(get_addon_dir(), 'config.json')
232 |         with open(config_file, 'w') as f:
233 |             json.dump(config, f, sort_keys=True, indent=2)
234 |     except:
235 |         # TODO: Improve error handling
236 |         #pass
237 |         raise SystemError
238 |     
239 | def get_config_dict():
240 |     config = {}
241 |     config['cookie'] = ''
242 |     config['pronunciation_uk'] = True
243 |     config['pronunciation_us'] = True
244 |     return config
245 | 
246 | def find_note_with_url_pictures(AddonDialog):
247 |     query = "Picture:*https*"
248 |     foundCardsIDs = mw.col.findNotes(query)
249 |     AddonDialog.progress.setMaximum(len(foundCardsIDs))
250 |     n = 0
251 |     for f_noteID in foundCardsIDs:
252 |         f_note = mw.col.getNote(f_noteID)
253 |         f_picture = mw.cddownloader.get_tempfile_from_url(f_note['Picture'])
254 |         picture_name = f_note['Word'].split('/')[-1] if f_note['Word'] else ''
255 |         f_entry = get_file_entry(f_picture,picture_name)
256 |         picture_field = '<img src="' + unmunge_to_mediafile(f_entry) + '">'
257 |         f_note['Picture'] = picture_field 
258 |         f_note.flush()
259 |         n =+ 1
260 |         AddonDialog.progress.setValue(n)
261 | 
262 | 
263 | 
264 | 
265 | 


--------------------------------------------------------------------------------
/Cambridge.py:
--------------------------------------------------------------------------------
  1 | # system libs
  2 | from urllib.error import *
  3 | from urllib.parse import quote, urljoin
  4 | from urllib.request import Request, urlopen
  5 | from copy import copy
  6 | from bs4 import BeautifulSoup
  7 | from PyQt6.QtCore import QObject
  8 | from PyQt6.QtWidgets import QMessageBox
  9 | import tempfile
 10 | import queue
 11 | import copy
 12 | 
 13 | # project libs
 14 | from .utils import *
 15 | 
 16 | 
 17 | class CDDownloader(QObject):
 18 |     """
 19 |     Class to download word definitions and media files - audio and picture (if exist) 
 20 |     from Cambridge Dictionary.
 21 |     Element fields: word_title, word_gram, word_pro_uk, word_pro_us, word_uk_media, word_us_media, word_image, meanings
 22 |     """
 23 | 
 24 |     def __init__(self):
 25 |         super(CDDownloader, self).__init__()
 26 |         self.icon_url = 'https://dictionary.cambridge.org/'
 27 |         self.url = \
 28 |             'https://dictionary.cambridge.org/dictionary/english/'
 29 |         # self.url_sound = self.icon_url
 30 |         self.extras = dict(Source=u"Cambridge Dictionary")
 31 |         self.base_url = 'https://dictionary.cambridge.org/'
 32 |         self.user_url = ''
 33 |         self.word = ''
 34 |         self.language = 'en'
 35 |         self.word_data = []
 36 |         self.word_media = {}
 37 |         self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
 38 |         self.wordlist_id = ''
 39 |         self.word_id = ''
 40 |         self.wordlist = []
 41 |         self.wordlist_entry = None
 42 |         self.words_queue = queue.Queue()
 43 |         # Definitions
 44 |         self.cambridge_plus_url = 'https://dictionary.cambridge.org/plus/'
 45 |         self.cambridge_plus_api_url = 'https://dictionary.cambridge.org/plus/api/'
 46 |         self.config = get_config()
 47 |         self.req = None
 48 | 
 49 |         if self.config is None:
 50 |             raise Exception("Config file error")
 51 |         if self.config['cookie'] is None:
 52 |             raise Exception("Config doesn't have cookie")
 53 | 
 54 |     def get_word_defs(self):
 55 | 
 56 |         if not self.language.lower().startswith('en'):
 57 |             return
 58 |         word = self.word.replace("'", "-")
 59 |         if not word and not self.user_url:
 60 |             return
 61 | 
 62 |         self.word_data.clear()
 63 |         # self.maybe_get_icon()
 64 |         # Do our parsing with BeautifulSoup
 65 | 
 66 |         if self.user_url:
 67 |             self.req = Request(self.user_url)
 68 |         else:
 69 |             self.req = Request(self.url + quote(word.encode('utf-8')))
 70 | 
 71 |         self._fill_request_headers()
 72 | 
 73 |         try:
 74 |             response = urlopen(self.req)
 75 |         except HTTPError as e:
 76 |             QMessageBox.warning(mw, 'URL error', e.reason.strip())
 77 |             return
 78 |         except URLError as e:
 79 |             QMessageBox.warning(mw, 'URL error', e.reason.strip())
 80 |             return
 81 |         html_doc = response.read()
 82 | 
 83 |         word_soup = BeautifulSoup(html_doc, "html.parser")
 84 | 
 85 |         stop_further_parsing = False
 86 | 
 87 |         for tag_cald4 in word_soup.find_all(name='div', attrs={'class': 'pr dictionary',
 88 |                                                                'data-id': ['cald4', 'cbed', 'cacd', 'cald4-us']}):
 89 |             for tag_entry in tag_cald4.find_all(name='div', attrs={
 90 |                 'class': ['pr entry-body__el', 'pr idiom-block', 'entry-body__el clrd js-share-holder']}):
 91 |                 word_to_add = word_entry()
 92 |                 tag_dict = tag_cald4.find(name='div', attrs={'class': 'cid'})
 93 |                 word_to_add.word_dictionary_id = tag_dict['id']
 94 |                 word_to_add.word_dictionary = self.get_dict_name(word_to_add.word_dictionary_id)
 95 |                 # Different types of entries
 96 |                 # pr entry-body__el - ordinary entry
 97 |                 # pr idiom-block - idiomatic expressions
 98 |                 # entry-body__el clrd js-share-holder phrasal verbs
 99 |                 l1_word = {}
100 |                 # Word title
101 |                 cur_tag = tag_entry.find(name='div', attrs={'class': 'di-title'})
102 |                 if cur_tag:
103 |                     word_to_add.word_title = self._prettify_string(cur_tag.text)
104 |                 else:
105 |                     return
106 |                 # Word's grammatical part
107 |                 cur_tag = tag_entry.find(name='div', attrs={'class': 'posgram dpos-g hdib lmr-5'})
108 |                 if cur_tag:
109 |                     word_to_add.word_part_of_speech = self._prettify_string(cur_tag.text)
110 |                 else:
111 |                     l1_word["word_gram"] = ''
112 |                 # UK IPA
113 |                 cur_tag = tag_entry.find("span", class_=re.compile("uk\sdpron-i\s"))
114 |                 if not cur_tag:
115 |                     cur_tag = tag_entry.find("span", attrs={'class': 'uk dpron-i'})
116 |                 if cur_tag:
117 |                     ipa = cur_tag.find(name='span', attrs={'class': 'ipa dipa lpr-2 lpl-1'})
118 |                     if ipa:
119 |                         ipa_text = self._prettify_string(ipa.text)
120 |                     else:
121 |                         ipa_text = ''
122 |                     word_to_add.word_pro_uk = 'UK ' + ipa_text
123 |                     media_file_tag = cur_tag.find("source", attrs={'type': 'audio/mpeg'})
124 |                     if media_file_tag:
125 |                         if media_file_tag['src'] in self.word_media:
126 |                             word_to_add.word_uk_media = self.word_media[media_file_tag['src']]
127 |                         else:
128 |                             word_to_add.word_uk_media = self.get_tempfile_from_url(
129 |                                 self.base_url.rstrip('/') + media_file_tag['src'])
130 |                             self.word_media[media_file_tag['src']] = word_to_add.word_uk_media
131 |                 # US IPA
132 |                 cur_tag = tag_entry.find("span", class_=re.compile("us\sdpron-i\s"))
133 |                 if not cur_tag:
134 |                     cur_tag = tag_entry.find("span", attrs={'class': 'us dpron-i'})
135 |                 if cur_tag:
136 |                     ipa = cur_tag.find(name='span', attrs={'class': 'ipa dipa lpr-2 lpl-1'})
137 |                     if ipa:
138 |                         ipa_text = self._prettify_string(ipa.text)
139 |                     else:
140 |                         ipa_text = ''
141 |                     l1_word["word_pro_us"] = 'US ' + ipa_text
142 |                     word_to_add.word_pro_us = 'US ' + ipa_text
143 |                     media_file_tag = cur_tag.find("source", attrs={'type': 'audio/mpeg'})
144 |                     if media_file_tag:
145 |                         if media_file_tag['src'] in self.word_media:
146 |                             word_to_add.word_us_media = self.word_media[media_file_tag['src']]
147 |                         else:
148 |                             word_to_add.word_us_media = self.get_tempfile_from_url(
149 |                                 self.base_url.rstrip('/') + media_file_tag['src'])
150 |                             self.word_media[media_file_tag['src']] = word_to_add.word_uk_media
151 | 
152 |                 l2_word = {}
153 |                 suffix = 1
154 |                 word_to_copy = copy.deepcopy(word_to_add)
155 |                 # Looping through word general definition - like 'draw verb
156 |                 # (PICTURE)'
157 |                 for html_l2_tag in tag_entry.find_all(name=['div', 'span'], attrs={
158 |                     'class': ['pos-body', 'idiom-body didiom-body', 'pv-body dpv-body']}):
159 |                     # Looping through words specific definitions - l2_meaning
160 |                     # (def & examples)
161 |                     for html_pos_body in html_l2_tag.find_all(
162 |                             attrs={'class': ['pr dsense', 'pr dsense ', 'sense-body dsense_b']}):
163 |                         tag_l2_word_key = html_pos_body.find(attrs={'class': 'dsense_h'})
164 |                         if not tag_l2_word_key:
165 |                             continue
166 |                         general_m = self._prettify_string(tag_l2_word_key.get_text())
167 |                         word_to_add.word_general = general_m
168 |                         l2_word[general_m] = None
169 |                         l2_meaning_key = {}
170 |                         l2_meaning_examples = []
171 |                         l2_meaning = {}
172 |                         for html_meaning in html_pos_body.find_all(name="div", attrs={
173 |                             'class': ['def-block ddef_block', 'def-block ddef_block ']}):
174 |                             tag_l2_meaning = html_meaning.find("div", attrs={'class': 'ddef_h'})
175 |                             if not tag_l2_meaning:
176 |                                 continue
177 |                             # Image
178 |                             tag_picture = html_meaning.find(name='amp-img', attrs={'class': 'dimg_i'})
179 |                             if tag_picture:
180 |                                 if tag_picture.attrs['src'] in self.word_media:
181 |                                     word_to_add.word_image = self.word_media[tag_picture.attrs['src']]
182 |                                 else:
183 |                                     word_to_add.word_image = self.get_tempfile_from_url(
184 |                                         self.base_url.rstrip('/') + tag_picture.attrs['src'])
185 |                                     self.word_media[tag_picture.attrs['src']] = word_to_add.word_image
186 |                             word_to_add.senseId = html_meaning.attrs['data-wl-senseid']
187 |                             tag_l2_specific_gram = tag_l2_meaning.find("span", attrs={'class': 'gram dgram'})
188 |                             word_to_add.word_specific_gram = self._prettify_string(
189 |                                 tag_l2_specific_gram.text if tag_l2_specific_gram else '')
190 |                             tag_usage = tag_l2_meaning.find("span", attrs={'class': 'usage dusage'})
191 |                             if tag_usage:
192 |                                 word_to_add.usage = self._prettify_string(tag_usage.text)
193 |                             tag_l2_specific = tag_l2_meaning.find("div", attrs={'class': 'def ddef_d db'})
194 |                             word_to_add.word_specific = self._prettify_string(
195 |                                 tag_l2_specific.text if tag_l2_specific else '')
196 |                             specific_m = self._prettify_string(tag_l2_meaning.text)
197 |                             l2_meaning[specific_m] = None
198 |                             examples = []
199 |                             for tag_examples in html_meaning.find_all(name='div', attrs={'class': 'examp dexamp'}):
200 |                                 examples.append(self._prettify_string(tag_examples.text))
201 |                             l2_meaning[specific_m] = examples
202 |                             word_to_add.word_examples = examples
203 |                             self.word_data.append(word_to_add)
204 |                             word_to_add = copy.deepcopy(word_to_copy)
205 |                         l2_word[general_m] = l2_meaning
206 | 
207 |                     for html_pos_body in html_l2_tag.find_all(name='div', attrs={'class': 'pr', 'class': 'dsense',
208 |                                                                                  'class': 'dsense-noh'}):
209 |                         general_m = 'UNDEFINED' + str(suffix)
210 |                         word_to_add.word_general = ''
211 |                         l2_word[general_m] = None
212 |                         l2_meaning_key = {}
213 |                         l2_meaning_examples = []
214 |                         l2_meaning = {}
215 |                         for html_meaning in html_pos_body.find_all(name="div",
216 |                                                                    attrs={'class': 'def-block', 'class': 'ddef_block'}):
217 |                             tag_l2_meaning = html_meaning.find("div", attrs={'class': 'ddef_h'})
218 |                             if not tag_l2_meaning:
219 |                                 continue
220 |                             # Image
221 |                             tag_picture = html_meaning.find(name='amp-img', attrs={'class': 'dimg_i'})
222 |                             if tag_picture:
223 |                                 if tag_picture.attrs['src'] in self.word_media:
224 |                                     word_to_add.word_image = self.word_media[tag_picture.attrs['src']]
225 |                                 else:
226 |                                     word_to_add.word_image = self.get_tempfile_from_url(
227 |                                         self.base_url.rstrip('/') + tag_picture.attrs['src'])
228 |                                     self.word_media[tag_picture.attrs['src']] = word_to_add.word_image
229 |                             specific_m = self._prettify_string(tag_l2_meaning.text)
230 |                             word_to_add.senseId = html_meaning.attrs['data-wl-senseid']
231 |                             tag_l2_specific_gram = tag_l2_meaning.find("span", attrs={'class': 'gram dgram'})
232 |                             word_to_add.word_specific_gram = self._prettify_string(
233 |                                 tag_l2_specific_gram.text if tag_l2_specific_gram else '')
234 |                             tag_usage = tag_l2_meaning.find("span", attrs={'class': 'usage dusage'})
235 |                             if tag_usage:
236 |                                 word_to_add.usage = self._prettify_string(tag_usage.text)
237 |                             tag_l2_specific = tag_l2_meaning.find("div", attrs={'class': 'def ddef_d db'})
238 |                             word_to_add.word_specific = self._prettify_string(
239 |                                 tag_l2_specific.text if tag_l2_specific else '')
240 |                             l2_meaning[specific_m] = None
241 |                             examples = []
242 |                             for tag_examples in html_meaning.find_all(name='div', attrs={'class': 'examp dexamp'}):
243 |                                 examples.append(self._prettify_string(tag_examples.text))
244 |                             l2_meaning[specific_m] = examples
245 |                             word_to_add.word_examples = examples
246 |                             self.word_data.append(word_to_add)
247 |                             word_to_add = copy.deepcopy(word_to_copy)
248 |                         l2_word[general_m] = l2_meaning
249 |                         suffix += 1
250 | 
251 |                 l1_word["meanings"] = l2_word
252 | 
253 |             if not self.word and self.user_url:
254 |                 self.word = self.user_url.split('/')[-1]
255 | 
256 |         self.word_data.sort(key=lambda x: x.word_dictionary_id)
257 | 
258 |     def get_tempfile_from_url(self, url_in):
259 |         """
260 |         Download raw data from url and put into a tempfile
261 | 
262 |         Wrapper helper function aronud self.get_data_from_url().
263 |         """
264 |         if not url_in:
265 |             return None
266 |         data = self.get_data_from_url(url_in)
267 |         if data == None:
268 |             return None
269 | 
270 |         self.file_extension = '.' + url_in.split('.')[-1]
271 |         # We put the data into RAM first so that we don’t have to
272 |         # clean up the temp file when the get does not work.  (Bad
273 |         # get_data raises all kinds of exceptions that fly through
274 |         # here.)
275 |         tfile = tempfile.NamedTemporaryFile(delete=False, prefix='anki_audio_', suffix=self.file_extension)
276 |         tfile.write(data)
277 |         tfile.close()
278 |         return tfile.name
279 | 
280 |     def get_data_from_url(self, url_in):
281 |         """
282 |         Return raw data loaded from an URL.
283 | 
284 |         Helper function. Put in an URL and it sets the agent, sends
285 |         the requests, checks that we got error code 200 and returns
286 |         the raw data only when everything is OK.
287 |         """
288 |         self.req = Request(url_in)
289 |         self._fill_request_headers()
290 |         try:
291 |             response = urlopen(self.req)
292 |         except URLError as e:
293 |             return None
294 | 
295 |         if 200 != response.code:
296 |             raise ValueError(str(response.code) + ': ' + response.msg)
297 |         return response.read()
298 | 
299 |     def clean_up(self):
300 |         self.user_url = ''
301 |         self.word = ''
302 |         self.word_data.clear()
303 |         self.word_media = {}
304 |         self.wordlist.clear()
305 |         self.word_id = ''
306 |         self.wordlist_entry = None
307 | 
308 |     def get_file_entry(self, file, base_name):
309 |         file_entry = {}
310 |         file_entry['base_name'] = base_name
311 |         file_entry['file_extension'] = os.path.splitext(file)[1][1:].strip()
312 |         file_entry['file_path'] = os.path.abspath(file)
313 |         return file_entry
314 | 
315 |     def fetch_wordlist_entries(self, wordlist_id):
316 | 
317 |         for n in range(1, 100):
318 | 
319 |             # https://dictionary.cambridge.org/plus/wordlist/21215803/entries/100/
320 | 
321 |             url_for_request = urljoin(self.base_url, 'plus/wordlist/')
322 |             url_for_request = urljoin(url_for_request, str(wordlist_id) + '/entries/')
323 |             url_for_request = urljoin(url_for_request, str(n) + '/')
324 | 
325 |             self.req = Request(url_for_request)
326 |             self._fill_request_headers()
327 |             self.req.add_header('Accept', 'application/json')
328 |             self.req.add_header('Cookie', self.config['cookie'])
329 | 
330 |             response = urlopen(self.req)
331 |             word_list_json = json.loads(response.read())
332 |             if not word_list_json:
333 |                 break
334 | 
335 |             for word_in_response in word_list_json:
336 |                 wl_entry = wordlist_entry()
337 |                 wl_entry.wordlist_id = word_in_response['wordlistId']
338 |                 wl_entry.word_id = word_in_response['id']
339 |                 wl_entry.senseId = word_in_response['senseId']
340 |                 wl_entry.word_url = word_in_response['entryUrl']
341 |                 wl_entry.definition = word_in_response['definition']
342 |                 wl_entry.soundUKMp3 = word_in_response['soundUKMp3']
343 |                 wl_entry.soundUSMp3 = word_in_response['soundUSMp3']
344 |                 wl_entry.dictCode = word_in_response['dictCode']
345 |                 wl_entry.headword = word_in_response['headword']
346 |                 self.wordlist.append(wl_entry)
347 |                 # {
348 |                 # "id": 26061590,
349 |                 # "entryId": "walk-on-part",
350 |                 # "headword": "walk-on part",
351 |                 # "senseId": "ID_00035581_01",
352 |                 # "dictCode": "English",
353 |                 # "definition": "A walk-on part in a play is a very small part
354 |                 # in which the actor is on the stage for a short time and speaks
355 |                 # very few or no words.",
356 |                 # "translation": "",
357 |                 # "pos": "noun",
358 |                 # "soundUK": "/CUK01223",
359 |                 # "soundUS": "/CUS02231",
360 |                 # "soundUKMp3":
361 |                 # "https://dictionary.cambridge.org/media/english/uk_pron/c/cuk/cuk01/cuk01223.mp3",
362 |                 # "soundUKOgg":
363 |                 # "https://dictionary.cambridge.org/media/english/uk_pron_ogg/c/cuk/cuk01/cuk01223.ogg",
364 |                 # "soundUSMp3":
365 |                 # "https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus02/cus02231.mp3",
366 |                 # "soundUSOgg":
367 |                 # "https://dictionary.cambridge.org/media/english/us_pron_ogg/c/cus/cus02/cus02231.ogg",
368 |                 # "wordlistId": 21215803,
369 |                 # "entryUrl":
370 |                 # "https://dictionary.cambridge.org/dictionary/english/walk-on-part"
371 |                 # },
372 | 
373 |     def delete_word_from_wordlist(self, wl_entry):
374 | 
375 |         self.req = Request(self.cambridge_plus_api_url + 'deleteWordlistEntry')
376 |         self._fill_request_headers()
377 |         self.req.add_header('Content-Type', 'application/json')
378 | 
379 |         self.req.add_header('Cookie', self.config['cookie'])
380 |         data = json.dumps({'id': wl_entry.word_id, 'wordlistId': wl_entry.wordlist_id})
381 |         data = data.encode('ascii')
382 |         r = urlopen(self.req, data)
383 | 
384 |     def get_dict_name(self, dict_id):
385 | 
386 |         dicts = {
387 |             'dataset_cald4': 'Cambridge Advanced Learner''s Dictionary & Thesaurus',
388 |             'dataset_cbed': 'Cambridge Business English Dictionary',
389 |             'dataset_cacd': 'Cambridge American English Dictionary'}
390 |         if dict_id in dicts:
391 |             return dicts[dict_id]
392 |         else:
393 |             return ''
394 | 
395 |     def find_word_by_definition(self, definition):
396 |         for wd_entry in self.word_data:
397 |             if wd_entry.word_specific == definition:
398 |                 return self.word_data.pop()
399 |         return None
400 | 
401 |     def find_word_by_wl_entry(self, wl_entry):
402 |         wd_entries = list(filter(lambda wd_entry: wd_entry.senseId == wl_entry.senseId, self.word_data))
403 |         if len(wd_entries) == 1:
404 |             return wd_entries[0]
405 | 
406 |     def _prettify_string(self, in_str):
407 |         if not in_str:
408 |             return ''
409 |         in_str = re.sub(r' +', ' ', in_str).strip()
410 |         in_str = re.sub(r'\n', '', in_str).strip()
411 |         in_str = re.sub(r':$', '', in_str).strip()
412 |         return in_str.strip()
413 | 
414 |     def _fill_request_headers(self):
415 |         self.req.add_header("Host", "dictionary.cambridge.org")
416 |         self.req.add_header('Accept-Language', 'en-US')
417 |         self.req.add_header('User-Agent',
418 |                             'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36')
419 | 
420 | 
421 | class wordlist_entry():
422 |     """
423 |     This class represent a single entry from Cambridge Wordlist.
424 |     """
425 | 
426 |     def __init__(self, word=None, ref=None, l2_meaning=None, dataWordID=None, wordlist_id=None):
427 |         self.wordlist_id = wordlist_id
428 |         self.senseId = ''
429 |         self.word = word
430 |         self.word_url = ref
431 |         self.word_l2_meaning = l2_meaning
432 |         self.word_id = dataWordID
433 |         self.definition = l2_meaning
434 |         self.dictCode = ''
435 |         self.soundUKMp3 = ''
436 |         self.soundUSMp3 = ''
437 | 
438 | 
439 | class word_entry():
440 |     # word_title, word_gram, word_pro_uk, word_pro_us, word_uk_media, word_us_media, word_image, meanings
441 |     def __init__(self):
442 |         self.word_dictionary_id = ''
443 |         self.word_dictionary = ''
444 |         self.word_title = ''
445 |         self.word_part_of_speech = ''
446 |         self.word_pro_uk = ''
447 |         self.word_pro_us = ''
448 |         self.word_uk_media = ''
449 |         self.word_us_media = ''
450 |         self.word_image = ''
451 |         self.word_general = ''
452 |         self.word_specific = ''
453 |         self.word_specific_gram = ''
454 |         self.word_examples = []
455 |         self.usage = ''
456 |         self.senseId = ''
457 | 
458 | # def self._prettify_string(self,in_str):
459 | #    if not in_str:
460 | #        return ''
461 | #    in_str = re.sub(r' +',' ',in_str).strip()
462 | #    in_str = re.sub(r'\n','',in_str).strip()
463 | #    in_str = re.sub(r':','',in_str).strip()    
464 | #    return in_str.strip()
465 | 
466 | # This code for debugging purposes
467 | # ad = CDDownloader()
468 | # ad.language = 'en'
469 | # ad.user_url = 'https://dictionary.cambridge.org/dictionary/english/pit'
470 | # ad.get_word_defs()
471 | # ad = None
472 | 
473 | 
474 | ## This code for testing with WebDriver
475 | # ad = CDDownloader()
476 | # ad.language = 'en'
477 | ##ad.user_url = 'https://dictionary.cambridge.org/dictionary/english/tear-up'
478 | ##ad.get_all_words_in_list('https://dictionary.cambridge.org/plus/wordlist/21215803_cald')
479 | # ad.fetch_wordlist_entries('21215803')
480 | # ad = None
481 | 


--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | #
  3 | # License: GNU AGPL, version 3 or later;
  4 | # http://www.gnu.org/copyleft/agpl.html
  5 | """
  6 | Anki2 add-on to download card's fields with audio from Cambridge Dictionary
  7 | 
  8 | """
  9 | 
 10 | import queue
 11 | import traceback
 12 | from PyQt6.QtGui import QIcon, QFont
 13 | from PyQt6.QtWidgets import *
 14 | from PyQt6 import QtCore
 15 | from PyQt6.QtCore import (QThread, QObject, pyqtSignal, QUrl, Qt)
 16 | from PyQt6.QtWebEngineWidgets import QWebEngineView
 17 | from PyQt6.QtWebEngineCore import QWebEnginePage, QWebEngineProfile
 18 | from PyQt6.QtNetwork import QNetworkCookie
 19 | 
 20 | from aqt import mw
 21 | from aqt.utils import askUserDialog, showInfo, showText, showWarning, tooltip
 22 | from anki.hooks import addHook
 23 | 
 24 | from .Cambridge import (CDDownloader, word_entry, wordlist_entry)
 25 | from urllib.error import *
 26 | 
 27 | from ._names import *
 28 | from .utils import *
 29 | 
 30 | icons_dir = os.path.join(mw.pm.addonFolder(), 'icons')
 31 | 
 32 | 
 33 | # These are classes for GUI dialogues
 34 | class LinkDialogue(QDialog):
 35 |     """
 36 |     A Dialog to let the user edit the texts or change the language.
 37 |     """
 38 | 
 39 |     def __init__(self, parent=None):
 40 |         self.edit_word_head = None
 41 |         self.user_url = ''
 42 |         self.word = ''
 43 |         QDialog.__init__(self)
 44 |         self.setAttribute(Qt.WidgetAttribute.WA_DeleteOnClose, True)
 45 |         self.initUI()
 46 | 
 47 |     def initUI(self):
 48 |         """Build the dialog box."""
 49 | 
 50 |         self.setWindowTitle(('Anki – Download definitions'))
 51 |         self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png')))
 52 |         layout = QVBoxLayout()
 53 |         self.setLayout(layout)
 54 |         self.edit_word_head = QLabel()
 55 | 
 56 |         self.edit_word_head.setText(('''<h4>Enter link for parsing</h4>'''))
 57 |         bold_font = QFont()
 58 |         bold_font.setBold(True)
 59 |         self.edit_word_head.setFont(bold_font)
 60 |         layout.addWidget(self.edit_word_head)
 61 | 
 62 |         self.link_editor = QLineEdit()
 63 |         self.link_editor.placeholderText = 'Enter your link here'
 64 |         layout.addWidget(self.link_editor)
 65 | 
 66 |         dialog_buttons = QDialogButtonBox(self)
 67 |         dialog_buttons.addButton(QDialogButtonBox.StandardButton.Cancel)
 68 |         dialog_buttons.addButton(QDialogButtonBox.StandardButton.Ok)
 69 |         dialog_buttons.accepted.connect(self.get_word_definitions_from_link)
 70 |         dialog_buttons.rejected.connect(self.reject)
 71 |         layout.addWidget(dialog_buttons)
 72 |         self.link_editor.setFocus()
 73 | 
 74 |     def get_word_definitions_from_link(self):
 75 |         self.user_url = self.link_editor.text()
 76 |         if not self.user_url:
 77 |             QMessageBox.warning(mw, 'Link is not provided', 'Please, provide a link for you word or phrase.')
 78 |             return
 79 | 
 80 |         downloader = mw.cddownloader
 81 |         # downloader.clean_up()
 82 |         downloader.user_url = self.user_url
 83 |         downloader.get_word_defs()
 84 |         self.setResult(QDialog.DialogCode.Accepted)
 85 |         self.done(QDialog.DialogCode.Accepted)
 86 | 
 87 | 
 88 | class WordDefDialogue(QDialog):
 89 |     """
 90 |     A Dialog to let the user choosing defs to be added.
 91 |     """
 92 | 
 93 |     def __init__(self, word_data, word, l2_meaning=None, wd_entry=None):
 94 |         self.word_data = word_data
 95 |         self.word = word
 96 |         self.selected_defs = []  # list of selected words (word_entry)
 97 |         self.deletion_mark = False
 98 |         self.l2_def = None
 99 |         self.single_word = False
100 |         self.l2_meaning = l2_meaning
101 |         self.set_model()
102 |         QDialog.__init__(self)
103 |         self.initUI()
104 | 
105 |     def initUI(self):
106 |         """Build the dialog box."""
107 | 
108 |         self.setWindowTitle(self.word)
109 |         self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png')))
110 | 
111 |         dialog_layout = QVBoxLayout(self)
112 |         scrollArea = QScrollArea()
113 |         scrollArea.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff)
114 |         dialog_layout.addWidget(scrollArea)
115 | 
116 |         scroll_area_content = QWidget()
117 |         scrollArea.setWidget(scroll_area_content)
118 | 
119 |         layout = QVBoxLayout(scroll_area_content)
120 | 
121 |         word_specific = ''
122 |         word_part_of_speech = ''
123 |         word_dictionary = ''
124 |         # Looping through data structure
125 |         for word_el in self.word_data:
126 |             if word_dictionary != word_el.word_dictionary:
127 |                 row = 0
128 |                 gl = QGridLayout()
129 |                 gr = QGroupBox()
130 |                 gr.setTitle(word_el.word_dictionary)
131 |                 gr.setLayout(gl)
132 |                 word_dictionary = word_el.word_dictionary
133 | 
134 |             if word_dictionary == word_el.word_dictionary and word_part_of_speech != word_el.word_part_of_speech:
135 |                 row += 1
136 |                 word_title = QLabel('<h3>' + word_el.word_title + '</h3>')
137 |                 gl.addWidget(word_title, row, 0)
138 |                 word_gram = QLabel('<h4>' + word_el.word_part_of_speech + '</h4>')
139 |                 gl.addWidget(word_gram, row, 1)
140 |                 word_part_of_speech = word_el.word_part_of_speech
141 | 
142 |             row += 1
143 |             word_specific = word_el.word_specific
144 |             l2_def_check = QCheckBox(word_specific)
145 |             l2_def_check.l2_def = word_specific
146 |             l2_def_check.word = word_el
147 |             l2_def_check.word_specific = word_specific
148 |             l2_def_check.stateChanged.connect(self.toggle_def)
149 |             gl.addWidget(l2_def_check, row, 0, 1, -1)
150 |             self.l2_def = word_specific
151 |             layout.addWidget(gr)
152 | 
153 |         dialog_buttons = QDialogButtonBox(self)
154 |         dialog_buttons.addButton(QDialogButtonBox.StandardButton.SaveAll)
155 |         dialog_buttons.addButton(QDialogButtonBox.StandardButton.Cancel)
156 |         dialog_buttons.addButton(QDialogButtonBox.StandardButton.Ok)
157 |         dialog_buttons.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.create_selected_notes)
158 |         dialog_buttons.button(QDialogButtonBox.StandardButton.Cancel).clicked.connect(self.reject)
159 |         dialog_buttons.button(QDialogButtonBox.StandardButton.SaveAll).clicked.connect(self.save_all)
160 |         dialog_layout.addWidget(dialog_buttons)
161 | 
162 |         # Automatic add single word with single def if in add_single mode
163 |         if len(self.word_data) == 1:
164 |             self.selected_defs.append(self.word_data[0])
165 |             self.create_selected_notes()
166 |             self.single_word = True
167 | 
168 |         scroll_area_content.adjustSize()  # required to get correct content area size
169 |         scrollArea.verticalScrollBar().adjustSize()  # required to get the correct scrollbar size
170 |         scrollArea.setMinimumWidth(scroll_area_content.size().width() + scrollArea.verticalScrollBar().width())
171 | 
172 |     def toggle_def(self, state):
173 |         sender = self.sender()
174 |         word_specific = sender.word_specific
175 |         if self.sender():
176 |             if word_specific in self.selected_defs:
177 |                 self.selected_defs.remove(word_specific)
178 |             else:
179 |                 self.selected_defs.append(word_specific)
180 | 
181 |     def create_selected_notes(self):
182 | 
183 |         for wd_entry in self.word_data:
184 |             for word_to_save in self.selected_defs:
185 |                 if wd_entry.word_specific == word_to_save:
186 |                     add_word(wd_entry, self.model)
187 |         self.accept()
188 | 
189 |     def set_model(self):
190 |         self.model = prepare_model(mw.col, fields, styles.model_css)
191 | 
192 |     def add_note(self, word_to_add):
193 |         """
194 |         Note is an SQLite object in Anki so you need
195 |         to fill it out inside the main thread
196 | 
197 |         """
198 |         QMessageBox.warning(mw, 'Link is not provided', str(word_to_add))
199 |         word = {}
200 |         word['Word'] = word_to_add['word_title']
201 |         word['Grammar'] = word_to_add['word_gram']
202 |         word['Pronunciation'] = word_to_add['word_pro_uk'] + ' ' + word_to_add['word_pro_us']
203 |         word['Meaning'] = word_to_add['word_general'] if not 'UNDEFINED' in word_to_add['word_general'] else ''
204 |         word['Definition'] = word_to_add['word_specific']
205 |         word['Examples'] = word_to_add['word_examples']
206 |         word['Sounds'] = [word_to_add['word_uk_media'], word_to_add['word_us_media']]
207 |         word['Picture'] = word_to_add['word_image']
208 | 
209 |         add_word(word, self.model)
210 | 
211 |     def save_all(self):
212 |         for wd_entry in self.word_data:
213 |             add_word(wd_entry, self.model)
214 |         self.done(0)
215 | 
216 | 
217 | class AddonConfigWindow(QDialog):
218 |     """
219 |     A Dialog to let the user to choose defs to be added.
220 |     """
221 | 
222 |     def __init__(self):
223 |         self.config = get_config()
224 |         QDialog.__init__(self)
225 |         self.initUI()
226 | 
227 |     def initUI(self):
228 |         self.setWindowTitle('Cambridge Addon Settings')
229 |         self.setWindowIcon(QIcon(os.path.join(icons_dir, 'cambridge_icon.png')))
230 |         layout = QVBoxLayout()
231 |         self.setLayout(layout)
232 | 
233 |         # Authorize and save cookies - Google OAuth2
234 |         # Some useful variable go here
235 |         auth_layout = QHBoxLayout()
236 |         auth_label = QLabel()
237 |         auth_label.setText('Authorization status:')
238 |         auth_layout.addWidget(auth_label)
239 |         auth_label_status = QLabel()
240 |         auth_label_status.setText('<b>Singed in</b>' if self.config['cookie'] else 'Not signed in')
241 |         auth_layout.addWidget(auth_label_status)
242 |         auth_btn = QPushButton()
243 |         auth_btn.setText('Sign in')
244 |         auth_btn.clicked.connect(self.btn_auth_clicked)
245 |         auth_layout.addWidget(auth_btn)
246 |         layout.addLayout(auth_layout)
247 | 
248 |         # Cookie - for semi authorization
249 |         h_layout = QHBoxLayout()
250 |         h_label = QLabel()
251 |         h_label.setText('Cookie:')
252 |         h_layout.addWidget(h_label)
253 |         h_layout.addStretch()
254 |         self.editor_cookie = QLineEdit()
255 |         self.editor_cookie.setClearButtonEnabled(True)
256 |         self.editor_cookie.setText(self.config['cookie'] if self.config['cookie'] else '')
257 |         h_layout.addWidget(self.editor_cookie, QtCore.Qt.AlignmentFlag.AlignRight)
258 |         layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop)
259 | 
260 |         # Pronunciation UK
261 |         h_layout = QHBoxLayout()
262 |         h_label = QLabel()
263 |         h_label.setText('Pronunciation UK:')
264 |         h_layout.addWidget(h_label)
265 |         h_layout.addStretch()
266 |         self.cb_pronunciation_uk = QCheckBox()
267 |         self.cb_pronunciation_uk.setChecked(
268 |             self.config['pronunciation_uk'] if 'pronunciation_uk' in self.config else True)
269 |         h_layout.addWidget(self.cb_pronunciation_uk, QtCore.Qt.AlignmentFlag.AlignRight)
270 |         layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop)
271 | 
272 |         # Pronunciation US
273 |         h_layout = QHBoxLayout()
274 |         h_label = QLabel()
275 |         h_label.setText('Pronunciation US:')
276 |         h_layout.addWidget(h_label)
277 |         h_layout.addStretch()
278 |         self.cb_pronunciation_us = QCheckBox()
279 |         self.cb_pronunciation_us.setChecked(
280 |             self.config['pronunciation_us'] if 'pronunciation_us' in self.config else True)
281 |         h_layout.addWidget(self.cb_pronunciation_us, QtCore.Qt.AlignmentFlag.AlignRight)
282 |         layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop)
283 | 
284 |         # word list IDs
285 |         h_layout = QVBoxLayout()
286 |         h_label = QLabel()
287 |         # h_label.setText('Cookie:')
288 |         # h_layout.addWidget(h_label)
289 |         # h_layout.addStretch()
290 |         self.wordlist_list = QListWidget()
291 |         if 'wordlist_ids' in self.config:
292 |             links = self.config['wordlist_ids']
293 |             for l in links:
294 |                 self.wordlist_list.addItem(l)
295 |         self.wordlist_list.itemDoubleClicked.connect(self.wl_edit_row)
296 |         h_layout.addWidget(self.wordlist_list)
297 |         h_label = QLabel()
298 |         h_label.setText('ID to add:')
299 |         h_layout.addWidget(h_label)
300 |         self.ledit_wl = QLineEdit()
301 |         h_layout.addWidget(self.ledit_wl)
302 |         btn_Add_WL = QPushButton()
303 |         btn_Add_WL.setText('Add ID')
304 |         btn_Add_WL.clicked.connect(self.wl_add)
305 |         h_layout.addWidget(btn_Add_WL)
306 |         layout.addLayout(h_layout, QtCore.Qt.AlignmentFlag.AlignTop)
307 | 
308 |         # Find and fetch all pictures instead of links
309 |         find_btn = QPushButton()
310 |         find_btn.setText('Replace all URL with pictures in deck')
311 |         find_btn.clicked.connect(self.find_and_fetch_pictures)
312 |         layout.addWidget(find_btn)
313 | 
314 |         # Stretcher
315 |         layout.addStretch()
316 | 
317 |         # Progress bar
318 |         h_layout = QHBoxLayout()
319 |         self.progress = QProgressBar(self)
320 |         self.progress.setGeometry(0, 0, 200, 30)
321 |         self.progress.visbile = False
322 |         h_layout.addWidget(self.progress)
323 |         layout.addLayout(h_layout)
324 | 
325 |         # Bottom buttons - Ok, Cancel
326 |         btn_bottom_layout = QHBoxLayout()
327 |         btn_bottom_layout.addStretch()
328 |         btn_Ok = QPushButton()
329 |         btn_Ok.setText('Ok')
330 |         btn_bottom_layout.addWidget(btn_Ok, QtCore.Qt.AlignmentFlag.AlignRight)
331 |         btn_Cancel = QPushButton()
332 |         btn_Cancel.setText('Cancel')
333 |         btn_bottom_layout.addWidget(btn_Cancel, QtCore.Qt.AlignmentFlag.AlignRight)
334 |         layout.addLayout(btn_bottom_layout)
335 |         btn_Ok.clicked.connect(self.btn_Ok)
336 |         btn_Cancel.clicked.connect(self.close)
337 | 
338 |     def find_and_fetch_pictures(self):
339 |         self.progress.visible = True
340 |         find_note_with_url_pictures(self)
341 |         self.progress.visible = False
342 | 
343 |     def btn_auth_clicked(self):
344 |         auth_window = WebPageView(AUTH_URL)
345 |         auth_window.accepted.connect(self.onAuthCompleted)
346 |         auth_window.exec()
347 |         self.config['cookie'] = auth_window.get_cookie()
348 |         update_config(self.config)
349 |         self.editor_cookie.setText(self.config['cookie'] if self.config['cookie'] else '')
350 | 
351 |     def onAuthCompleted(self):
352 |         QMessageBox.information(self, 'Singing in', 'Signed in successfully')
353 | 
354 |     def btn_Ok(self):
355 |         # Fill config dict with current settings and write them to file
356 |         self.config['cookie'] = self.editor_cookie.text()
357 |         self.config['pronunciation_uk'] = self.cb_pronunciation_uk.isChecked()
358 |         self.config['pronunciation_us'] = self.cb_pronunciation_us.isChecked()
359 |         wl = []
360 |         for i in self.iterAllItems(self.wordlist_list):
361 |             wl.append(i.text())
362 |         self.config['wordlist_ids'] = wl
363 |         update_config(self.config)
364 |         self.close()
365 | 
366 |     def btn_Cancel(self):
367 |         self.close()
368 | 
369 |     def wl_add(self):
370 |         self.wordlist_list.addItem(self.ledit_wl.text())
371 | 
372 |     def iterAllItems(self, iterable):
373 |         for i in range(iterable.count()):
374 |             yield iterable.item(i)
375 | 
376 |     def wl_edit_row(self):
377 |         current_item = self.wordlist_list.currentItem()
378 |         self.ledit_wl.setText(current_item.text())
379 |         self.wordlist_list.takeItem(self.wordlist_list.currentRow())
380 | 
381 | 
382 | class WParseSavedWL(QObject):
383 |     instance = None
384 | 
385 |     def __init__(self):
386 |         super(WParseSavedWL, self).__init__()
387 |         self.config = get_config()
388 |         self.collection = mw.col
389 |         self.word_to_fetch = '0'
390 |         self.fetched = 0
391 |         self.need_to_stop = False
392 |         self.wordlist_queue = queue.Queue()
393 | 
394 |     def __call__(self, *pargs, **kargs):
395 |         if instance is None:
396 |             instance = super().__call__(*args, **kw)
397 |         return instance
398 | 
399 |     def parse(self):
400 | 
401 |         if 'wordlist_ids' not in self.config:
402 |             return
403 | 
404 |         t = self.thread = FetchThread(0, True, self.wordlist_queue)
405 |         t._event.connect(self.onEvent)
406 |         t._add_word_event.connect(self.on_add_word)
407 |         self.thread.start()
408 | 
409 |     def onEvent(self, evt, *args):
410 |         n = 1
411 |         if evt == 'spawn_other_threads':
412 |             self.word_to_fetch = args[0]
413 |             for n in range(1, 5):
414 |                 t = FetchThread(max_words=5, fetch_wordlist=False, wordlist_queue=self.wordlist_queue)
415 |                 t._event.connect(self.onEvent)
416 |                 t._add_word_event.connect(self.on_add_word)
417 |                 setattr(self, 'thread' + str(n), t)
418 |                 thread_to_start = getattr(self, 'thread' + str(n))
419 |                 thread_to_start.start()
420 |                 n += 1
421 |         if evt == 'batch_completed':
422 |             if self.wordlist_queue.empty() or self.need_to_stop:
423 |                 return
424 |             for n in range(1, 5):
425 |                 thread_to_restart = getattr(self, 'thread' + str(n), None)
426 |                 if thread_to_restart != None and thread_to_restart.isFinished():
427 |                     thread_to_restart.start()
428 |         if evt == 'need_to_stop':
429 |             self.need_to_stop = True
430 |             tooltip(
431 |                 (str('Stopping all error')),
432 |                 parent=mw,
433 |             )
434 |         if evt == 'error':
435 |             self.need_to_stop = True
436 |             showText(("Fetching failed:\n%s") % self._rewriteError(args[0]))
437 | 
438 |         if evt == 'message':
439 |             tooltip(
440 |                 (str(args[0])),
441 |                 parent=mw,
442 |             )
443 | 
444 |     def on_add_word(self, word_entry):
445 |         add_word_to_collection(word_entry, self.collection)
446 |         self.fetched += 1
447 |         tooltip(
448 |             (str(self.fetched) + ' / ' + str(self.word_to_fetch)),
449 |             parent=mw,
450 |         )
451 | 
452 |     def _rewriteError(self, err):
453 |         if "Errno 61" in err:
454 |             return (
455 |                 """\
456 | Couldn't connect to AnkiWeb. Please check your network connection \
457 | and try again."""
458 |             )
459 |         elif "timed out" in err or "10060" in err:
460 |             return (
461 |                 """\
462 | The connection to AnkiWeb timed out. Please check your network \
463 | connection and try again."""
464 |             )
465 |         elif "403" in err:
466 |             return (
467 |                 """\
468 | Authentication failed. Either your cookie expired or your are trying to delete entries from a community wordlist."""
469 |             )
470 |         elif "code: 500" in err:
471 |             return (
472 |                 """\
473 | AnkiWeb encountered an error. Please try again in a few minutes, and if \
474 | the problem persists, please file a bug report."""
475 |             )
476 |         elif "code: 501" in err:
477 |             return (
478 |                 """\
479 | Please upgrade to the latest version of Anki."""
480 |             )
481 |         # 502 is technically due to the server restarting, but we reuse the
482 |         # error message
483 |         elif "code: 502" in err:
484 |             return ("Cambridge Dictionary is under maintenance. Please try again in a few minutes.")
485 |         elif "code: 503" in err:
486 |             return (
487 |                 """\
488 | Cambridge Dictionary is too busy at the moment. Please try again in a few minutes."""
489 |             )
490 |         elif "code: 504" in err:
491 |             return (
492 |                 "504 gateway timeout error received. Please try temporarily disabling your antivirus."
493 |             )
494 |         elif "10061" in err or "10013" in err or "10053" in err:
495 |             return (
496 |                 "Antivirus or firewall software is preventing Anki from connecting to the internet."
497 |             )
498 |         elif "10054" in err or "Broken pipe" in err:
499 |             return (
500 |                 "Connection timed out. Either your internet connection is experiencing problems, or you have a very large file in your media folder."
501 |             )
502 |         elif "Unable to find the server" in err or "socket.gaierror" in err:
503 |             return (
504 |                 "Server not found. Either your connection is down, or antivirus/firewall "
505 |                 "software is blocking Anki from connecting to the internet."
506 |             )
507 |         elif "code: 407" in err:
508 |             return ("Proxy authentication required.")
509 |         elif "code: 413" in err:
510 |             return ("Your collection or a media file is too large to sync.")
511 |         elif "EOF occurred in violation of protocol" in err:
512 |             return (
513 |                     (
514 |                         "Error establishing a secure connection. This is usually caused by antivirus, firewall or VPN software, or problems with your ISP."
515 |                     )
516 |                     + " (eof)"
517 |             )
518 |         elif "certificate verify failed" in err:
519 |             return (
520 |                     (
521 |                         "Error establishing a secure connection. This is usually caused by antivirus, firewall or VPN software, or problems with your ISP."
522 |                     )
523 |                     + " (invalid cert)"
524 |             )
525 |         return err
526 | 
527 | 
528 | class FetchThread(QThread):
529 |     _event = pyqtSignal(str, str)
530 |     _add_word_event = pyqtSignal(word_entry)
531 | 
532 |     def __init__(self, max_words=5, fetch_wordlist=False, wordlist_queue=None):
533 |         QThread.__init__(self)
534 |         self.config = get_config()
535 |         self.downloader = CDDownloader()
536 |         self.max_words = max_words
537 |         self.fetch_wordlist = fetch_wordlist
538 |         self.wordlist_queue = wordlist_queue
539 | 
540 |     def run(self):
541 |         if self.wordlist_queue == None:
542 |             return
543 |         try:
544 |             if self.fetch_wordlist:
545 |                 self._fetch_wordlist()
546 |                 self.fireEvent('spawn_other_threads', str(self.wordlist_queue.qsize()))
547 |                 return
548 |         except:
549 |             err = traceback.format_exc()
550 |             self.fireEvent("error", err)
551 |             return
552 | 
553 |         try:
554 |             self._fetch_words()
555 |             self.fireEvent('batch_completed')
556 |         except:
557 |             err = traceback.format_exc()
558 |             self.fireEvent("error", err)
559 |             return
560 | 
561 |     def fireEvent(self, evt, args=''):
562 |         #
563 |         self._event.emit(evt, args)
564 | 
565 |     def addWordEvent(self, wd_entry):
566 | 
567 |         self._add_word_event.emit(wd_entry)
568 | 
569 |     def _fetch_wordlist(self):
570 |         self.fireEvent('message', 'Starting fetching wordlists')
571 |         self.downloader = CDDownloader()
572 |         self.downloader.clean_up()
573 |         for wordlist_id in self.config['wordlist_ids']:
574 |             self.downloader.fetch_wordlist_entries(wordlist_id)
575 | 
576 |         for wl_entry in self.downloader.wordlist:
577 |             self.wordlist_queue.put(wl_entry)
578 | 
579 |     def _fetch_words(self):
580 |         n = 0
581 |         while n < self.max_words and not self.wordlist_queue.empty():
582 |             wl_entry = self.wordlist_queue.get()
583 |             self.downloader.clean_up()
584 |             self.downloader.wordlist_entry = wl_entry
585 |             self.downloader.user_url = wl_entry.word_url
586 |             self.downloader.word_id = wl_entry.word_id
587 |             self.downloader.get_word_defs()
588 |             if self.downloader.word_data:
589 |                 wd_entry = self.downloader.find_word_by_wl_entry(wl_entry)
590 |                 if wd_entry != None:
591 |                     self.addWordEvent(wd_entry)
592 |                     self.downloader.delete_word_from_wordlist(wl_entry)
593 |             n += 1
594 | 
595 | 
596 | class WebPageView(QDialog):
597 |     def __init__(self, user_url=''):
598 |         self.user_url = user_url
599 |         QDialog.__init__(self)
600 |         self.webview = QWebEngineView()
601 |         self.profile = QWebEngineProfile("storage", self.webview)
602 |         self.cookie_store = self.profile.cookieStore()
603 |         self.cookie_store.cookieAdded.connect(self.onCookieAdded)
604 |         self.webpage = QWebEnginePage(self.profile, self.webview)
605 |         self.webview.setPage(self.webpage)
606 |         self.webview.urlChanged.connect(self.onUrlChanged)
607 |         self.cookies = []
608 |         self.initUI()
609 | 
610 |     def initUI(self):
611 |         layout = QVBoxLayout()
612 |         self.setLayout(layout)
613 | 
614 |         layout.addWidget(self.webview)
615 |         self.webview.load(QUrl(self.user_url))
616 |         self.webview.show()
617 | 
618 |     def onCookieAdded(self, cookie):
619 |         for c in self.cookies:
620 |             if c.hasSameIdentifier(cookie):
621 |                 return
622 |         self.cookies.append(QNetworkCookie(cookie))
623 | 
624 |     def get_cookie(self):
625 |         cookie_builder = ""
626 |         for c in self.cookies:
627 |             # Get cookies that domain contains cambridge.org
628 |             if "cambridge.org" not in str(c.domain()):
629 |                 continue
630 | 
631 |             name = bytearray(c.name()).decode()
632 |             value = bytearray(c.value()).decode()
633 |             cookie_builder = name + "=" + value + "; " + cookie_builder
634 |         return cookie_builder
635 | 
636 |     def onUrlChanged(self):
637 |         if self.webview.url().toString() != AUTH_URL:
638 |             self.accept()
639 | 
640 | 
641 | class MyQWebEngineView(QWebEngineView):
642 |     def __init__(self):
643 |         QWebEngineView.__init__(self)
644 |         self.urlChanged.connect(self.url_changed)
645 | 
646 |     def createWindow(self, type=None):
647 |         QMessageBox.warning(mw, 'Link is not provided', str(type))
648 |         wpv = WebPageView()
649 |         wpv.exec()
650 | 
651 |     def url_changed(self):
652 |         QMessageBox.warning(mw, 'Link is not provided', 'URL changed')
653 | 


--------------------------------------------------------------------------------