├── .gitignore ├── Formatters.py ├── Latex.py ├── MediaConverter.py ├── Models.py ├── README.md ├── Utils ├── Encoding.py ├── Fonts.py ├── HtmlUtils.py └── __init__.py ├── anki2sm.py ├── anki2smV2.py ├── init.bat ├── mustache.py ├── qrun.py ├── requirements.txt └── run.bat /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.tex 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | Ü*.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .idea/ 104 | in/ 105 | ignore/ 106 | *.apkg 107 | *.xml 108 | *.zip 109 | *anki2 110 | random/ 111 | random/* 112 | *.cfg 113 | release.py 114 | releaseFiles.txt 115 | qrun.py -------------------------------------------------------------------------------- /Formatters.py: -------------------------------------------------------------------------------- 1 | import re 2 | from gettext import gettext 3 | from html.entities import name2codepoint 4 | 5 | import mustache 6 | 7 | reComment = re.compile("(?s)") 8 | reStyle = re.compile("(?si).*?") 9 | reScript = re.compile("(?si).*?") 10 | reTag = re.compile("(?s)<.*?>") 11 | reEnts = re.compile(r"&#?\w+;") 12 | reMedia = re.compile("(?i)]+src=[\"']?([^\"'>]+)[\"']?[^>]*>") 13 | reSound = re.compile(r"\[sound:[^]]+\]") 14 | 15 | 16 | def entsToTxt(html: str) -> str: 17 | # entitydefs defines nbsp as \xa0 instead of a standard space, so we 18 | # replace it first 19 | html = html.replace(" ", " ") 20 | 21 | def fixup(m): 22 | text = m.group(0) 23 | if text[:2] == "&#": 24 | # character reference 25 | try: 26 | if text[:3] == "&#x": 27 | return chr(int(text[3:-1], 16)) 28 | else: 29 | return chr(int(text[2:-1])) 30 | except ValueError: 31 | pass 32 | else: 33 | # named entity 34 | try: 35 | text = chr(name2codepoint[text[1:-1]]) 36 | except KeyError: 37 | pass 38 | return text # leave as is 39 | 40 | return reEnts.sub(fixup, html) 41 | 42 | 43 | def stripHTML(s: str) -> str: 44 | s = reComment.sub("", s) 45 | s = reStyle.sub("", s) 46 | s = reScript.sub("", s) 47 | s = reTag.sub("", s) 48 | s = entsToTxt(s) 49 | return s 50 | 51 | 52 | def _removeFormattingFromMathjax(txt, ordi) -> str: 53 | creg = clozeReg.replace("(?si)", "") 54 | in_mathjax = False 55 | 56 | def replace(match): 57 | nonlocal in_mathjax 58 | if match.group("mathjax_open"): 59 | if in_mathjax: 60 | print("MathJax opening found while already in MathJax") 61 | in_mathjax = True 62 | elif match.group("mathjax_close"): 63 | if not in_mathjax: 64 | print("MathJax close found while not in MathJax") 65 | in_mathjax = False 66 | elif match.group("cloze"): 67 | if in_mathjax: 68 | return match.group(0).replace("{{c{}::".format(ordi), "{{C{}::".format(ordi)) 69 | else: 70 | print("Unexpected: no expected capture group is present") 71 | return match.group(0) 72 | 73 | return re.sub(r"(?si)(?P\\[([])|(?P\\[\])])|(?P" + (creg % ordi) + ")", replace, 74 | txt) 75 | 76 | 77 | FURIGANA = re.compile(r" ?([^ >]+?)\[(.+?)\]") 78 | 79 | clozeReg = r"(?si)\{\{(?Pc)%s::(?P.*?)(::(?P.*?))?\}\}" 80 | 81 | CLOZE_REGEX_MATCH_GROUP_TAG = "tag" 82 | CLOZE_REGEX_MATCH_GROUP_CONTENT = "content" 83 | CLOZE_REGEX_MATCH_GROUP_HINT = "hint" 84 | 85 | 86 | def _clozeText(txt: str, ordi: str, type: str) -> str: 87 | """Process the given Cloze deletion within the given template.""" 88 | reg = clozeReg 89 | currentRegex = clozeReg % ordi 90 | if not re.search(currentRegex, txt): 91 | # No Cloze deletion was found in txt. 92 | return "" 93 | txt = _removeFormattingFromMathjax(txt, str(ordi)) 94 | 95 | def repl(m): 96 | if type == "q": 97 | if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): 98 | buf = "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) 99 | else: 100 | buf = "[...]" 101 | else: 102 | buf = m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) 103 | # uppercase = no formatting 104 | if m.group(CLOZE_REGEX_MATCH_GROUP_TAG) == "c": 105 | buf = "%s" % buf 106 | return buf 107 | 108 | if type == 'q': 109 | txt = re.sub(currentRegex, repl, txt) 110 | # and display other clozes normally 111 | return re.sub(reg % r"\d+", "\\2", txt) 112 | else: 113 | txt = re.search(currentRegex, txt) 114 | if txt: 115 | return repl(txt) 116 | 117 | 118 | # filter args is a regex argument to the cloze {{c%s}} 119 | def _cloze_filter(field_text: str, filter_args: str, q_or_a: str): 120 | return _clozeText(field_text, filter_args, q_or_a) 121 | 122 | 123 | def cloze_q_filter(field_text: str, filter_args: str, *args): 124 | return _cloze_filter(field_text, filter_args, "q") 125 | 126 | 127 | def cloze_a_filter(field_text: str, filter_args: str, *args): 128 | return _cloze_filter(field_text, filter_args, "a") 129 | 130 | 131 | def text_filter(txt: str, *args) -> str: 132 | return stripHTML(txt) 133 | 134 | 135 | def hint_filter(txt: str, args, context, tag: str, fullname) -> str: 136 | if not txt.strip(): 137 | return "" 138 | domid = "hint%d" % id(txt) 139 | return """ 140 | %s 141 | """ % ( 142 | domid, 143 | gettext("Show %s") % tag, 144 | domid, txt, 145 | ) 146 | 147 | 148 | def captured_sound(groups: re.Match): 149 | return groups.group(2).startswith("sound") 150 | 151 | 152 | def kana_filter(txt: str) -> str: 153 | def replace(match: re.Match) -> str: 154 | if captured_sound(match): 155 | return match.group(0) 156 | else: 157 | return match.group(2) 158 | 159 | return FURIGANA.sub(replace, txt.replace(" ", " ")) 160 | 161 | 162 | def kanji_filter(txt: str) -> str: 163 | def replace(match: re.Match) -> str: 164 | if captured_sound(match): 165 | return match.group(0) 166 | else: 167 | return match.group(1) 168 | 169 | return FURIGANA.sub(replace, txt.replace(" ", " ")) 170 | 171 | 172 | def furigana_filter(txt: str) -> str: 173 | def replace(match: re.Match) -> str: 174 | if captured_sound(match): 175 | return match.group(0) 176 | else: 177 | return "{}{}"\ 178 | .format(match.group(1), 179 | match.group(2)) 180 | return FURIGANA.sub(replace, txt.replace(" ", " ")) 181 | 182 | 183 | 184 | # #EXPANDS THE CLOSES INTO Hint and the actual Text 185 | # given expand_clozes("{{c1::a}} {{c2::b}} {{c3::c}} {{c4::d}}") 186 | # output ['[...] b cd', 'a [...] cd', 'a b [...]d', 'a b c [...]', 'a b c d'] 187 | # first second third forth fifth .... continues in order 188 | def expand_clozes(string: str): 189 | ords = set(re.findall(r"{{c(\d+)::.+?}}", string)) 190 | strings = [] 191 | 192 | def qrepl(m): 193 | if m.group(CLOZE_REGEX_MATCH_GROUP_HINT): 194 | return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT) 195 | else: 196 | return "[...]" 197 | 198 | def arepl(m): 199 | return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT) 200 | 201 | for ord in ords: 202 | s = re.sub(clozeReg % ord, qrepl, string) 203 | s = re.sub(clozeReg % ".+?", arepl, s) 204 | strings.append(s) 205 | strings.append(re.sub(clozeReg % ".+?", arepl, string)) 206 | return strings 207 | 208 | 209 | mustache.filters["hint"] = hint_filter 210 | mustache.filters["Text"] = text_filter 211 | 212 | mustache.filters["furigana"] = furigana_filter 213 | mustache.filters["kanji"] = kanji_filter 214 | mustache.filters["kana"] = kana_filter -------------------------------------------------------------------------------- /Latex.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import re 3 | from pdf2image import convert_from_bytes 4 | import subprocess 5 | from typing import AnyStr, Tuple 6 | from Cryptodome.Hash import SHA1 7 | 8 | from Formatters import stripHTML 9 | 10 | LATEX = re.compile(r"(?xsi)\[latex\](.+?)\[/latex\]|\[\$\](.+?)\[/\$\]|\[\$\$\](.+?)\[/\$\$\]") 11 | LATEX_NEWLINES = re.compile(r"(?xi)|
") 12 | 13 | 14 | class ExtractedLatex(object): 15 | def __init__(self, fname: str, latex: str): 16 | self.fname = fname 17 | self.latex = latex 18 | 19 | def __str__(self): 20 | return "ExtractedLatex {\n\tfname:" + self.fname + ", \n\tlatex:" + self.latex + "\n}" 21 | 22 | def __repr__(self): 23 | return "ExtractedLatex {\n\tfname:" + self.fname + ", \n\tlatex:" + self.latex + "\n}" 24 | 25 | 26 | def _string_checksum(string: AnyStr) -> bytes: 27 | h = SHA1.new() 28 | h.update(string.encode()) 29 | return h.hexdigest()[:20].encode() 30 | 31 | 32 | def contains_latex(text: AnyStr) -> bool: 33 | return LATEX.match(text) is not None 34 | 35 | 36 | def fname_for_latex(latex: str, is_svg: bool) -> str: 37 | ext = "svg" if is_svg else "png" 38 | csum = binascii.hexlify(_string_checksum(latex)).decode() 39 | return "latex-{}.{}".format(csum, ext) 40 | 41 | 42 | def image_link_for_fname(fname: str) -> str: 43 | return "".format(fname) 44 | 45 | 46 | def strip_html_for_latex(html: str) -> str: 47 | out = html 48 | o = LATEX_NEWLINES.sub("\n", html) 49 | if o is not None: 50 | out = o 51 | o = stripHTML(out) 52 | if o is not None: 53 | out = o 54 | return out 55 | 56 | 57 | def export_latex(latex_src: ExtractedLatex, latexPre: str, latexPost: str) -> None: 58 | filename = latex_src.fname.split(".")[0] + '.tex' 59 | template = r'''\documentclass[preview]{{standalone}}\begin{{document}}{}\end{{document}}''' 60 | with open(filename, 'wb') as f: 61 | f.write(bytes(template.format(str(latex_src.latex.replace("\n", " \\\\ "))), 'UTF-8')) 62 | 63 | subprocess.call('pdflatex ' + filename, shell=True, ) 64 | 65 | images = convert_from_bytes(open(latex_src.fname.split(".")[0] + ".pdf", 'rb').read()) 66 | images[0].save(latex_src.fname.split(".")[0] + ".png") 67 | 68 | 69 | def extract_latex(text: str, svg: bool) -> [AnyStr, [ExtractedLatex]]: 70 | extracted = [] 71 | 72 | def replace(match: re.Match) -> str: 73 | latex = None 74 | m1, m2, m3 = match.group(1), match.group(2), match.group(3) 75 | if m1 is not None: 76 | latex = m1 77 | elif m2 is not None: 78 | latex = "${}$".format(m2) 79 | elif m3 is not None: 80 | latex = r"\begin{{displaymath}}{}\end{{displaymath}}".format(m3) 81 | 82 | latex_text = strip_html_for_latex(latex); 83 | 84 | fname = fname_for_latex(latex_text, svg); 85 | 86 | img_link = image_link_for_fname(fname); 87 | 88 | extracted.append(ExtractedLatex 89 | ( 90 | fname, 91 | latex=latex_text 92 | ) 93 | ) 94 | return img_link 95 | 96 | return LATEX.sub(replace, text), extracted 97 | 98 | 99 | if __name__ == '__main__': 100 | latexPre = "\\documentclass[12pt]{article}\n\\special{papersize=3in,5in}\n\\usepackage{amssymb,amsmath}\n\\pagestyle{empty}\n\\setlength{\\parindent}{0in}\n\\begin{document}\n" 101 | latexPost = "\\end{document}" 102 | export_latex(extract_latex("a[latex]one
and
two[/latex]b", False)[1][0], latexPre, 103 | latexPost) # , end="\n\n") 104 | export_latex(extract_latex("[$]hello  world[/$]", True)[1][0], latexPre, latexPost) # , end="\n\n") 105 | export_latex(extract_latex("[$$]math & stuff[/$$]", False)[1][0], latexPre, latexPost) # ,end="\n\n") 106 | -------------------------------------------------------------------------------- /MediaConverter.py: -------------------------------------------------------------------------------- 1 | from svglib.svglib import svg2rlg 2 | from reportlab.graphics import renderPM 3 | from PIL import Image 4 | import os 5 | 6 | 7 | class MediaConverter: 8 | # anki jpg png gif tiff svg tif jpeg mp3 ogg wav avi ogv 9 | # sm jpg png gif jpeg mp3 avi mp4 bmp 10 | def convertImage(self, filepath: str) -> str: 11 | if "\\" in filepath: 12 | filepath = filepath.replace("\\", "/") 13 | ext = filepath.split("/")[-1].split(".")[-1] 14 | filepath = filepath.replace(ext,ext.lower()) 15 | file = filepath 16 | ext = ext.lower() 17 | if ext not in ["jpg"]: 18 | if ext == "png": 19 | im = Image.open(filepath) 20 | rgb_im = im.convert('RGB') 21 | file = filepath.replace(ext, "jpg") 22 | rgb_im.save(file) 23 | if ext == "svg": 24 | drawing = svg2rlg(filepath) 25 | file = filepath.replace(ext, "png") 26 | renderPM.drawToFile(drawing, file, fmt="PNG") 27 | im = Image.open(file) 28 | 29 | rgb_im = im.convert('RGB') 30 | rgb_im.save(filepath.replace(ext, "jpg")) 31 | os.remove(file) 32 | file = filepath.replace(ext, "jpg") 33 | return file 34 | -------------------------------------------------------------------------------- /Models.py: -------------------------------------------------------------------------------- 1 | 2 | def EmptyString(s: str) -> str: 3 | if s is None or len(s) == 0: 4 | return "" 5 | else: 6 | return s 7 | 8 | 9 | class Model: 10 | def __init__(self, mid, typeofD, css, latexPre, latexPost): 11 | self.id = mid 12 | self.type = typeofD 13 | self.flds = None 14 | self.tmpls = None 15 | self.css = css 16 | self.latexPre = None 17 | self.latexPost = None 18 | 19 | def __str__(self): 20 | return ("").replace("\n", "\\n") 42 | 43 | def __repr__(self): 44 | return ("").replace("\n", "\\n") 47 | 48 | 49 | class Card: 50 | def __init__(self, cid, qs, ans): 51 | self.cid = cid 52 | self.q = qs 53 | self.a = ans 54 | self.afactor =None 55 | self.ufactor =None 56 | self.lapses =None 57 | self.last_rep = None 58 | self.repetitions = None 59 | self.interval = None 60 | 61 | def __str__(self): 62 | return ("").replace("\n", "") 63 | 64 | def __repr__(self): 65 | return ("").replace("\n", "") 66 | 67 | 68 | class Collection: 69 | def __init__(self, did, name): 70 | self.name = name 71 | self.did = did 72 | self.cards = [] 73 | 74 | def __str__(self): 75 | return "" 77 | 78 | def __repr__(self): 79 | return "" 81 | 82 | 83 | class Note: 84 | def __init__(self, model, flds): 85 | self.model = model 86 | self.flds = flds 87 | self.tags = None 88 | 89 | def __str__(self): 90 | return "" 91 | 92 | def __repr__(self): 93 | return "" 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > taking a break for a while 2 | # Anki to SuperMemo converter 3 | anki2sm is a python script that is meant to batch convert anki decks into supermemo decks, including audios, images and videos. 4 | The scripts also comes with an additional feature of extracting annotated links, for incremental reading. It creates a bat file that when run opens explorer with multiple tabs. You can open supermemo to import these webpages. 5 | This script is meant to run on Windows. If you don't already use SuperMemo or are new to it, feel free to schedule call [here]( https://calendly.com/test0009/raj) with Raj to guide you through the basics of SuperMemo. You can find downloads of SuperMemo at [supermemo.wiki/learn](supermemo.wiki/learn]). 6 | 7 | ### Steps for usage: 8 | - clone this repo 9 | - make sure requirements.txt is met or run the ```init.bat``` to install the dependancies 10 | - create an ```apkgs``` folder and ```out``` folder within the root directory of the cloned repo 11 | - paste your apkgs into the ```apkgs``` 12 | - run ```run.bat``` which should run the anki2smV2 13 | 14 | For a guide on using anki2sm in video form, check out [this video](https://www.youtube.com/watch?v=j6dmQHMGTJs). 15 | 16 | ##### Some Notes: 17 | ##### Media: 18 | - Media from anki is stored into ```C:\Users\\AppData\Local\Temp\smmedia```. You donot need to create the directory the script creates it. Tested with images and audio. 19 | ##### Fonts: 20 | - Run the script in admin mode for it to install fonts that are sometimes bundled with apkgs. 21 | # TODO: 22 | 1) [ ] Bug test it. 23 | 2) [ ] Anki progress import. 24 | 3) [x] Each collection should have its own concept or topic. 25 | 4) [ ] Support Latex. 26 | 5) [ ] Suport image occlusion. 27 | 6) [ ] Item names to reflect the content. 28 | 29 | 30 | # Contributers: 31 | - [Raj](https://github.com/rajlego) 32 | 33 | - Leo 34 | 35 | - [lotabout](https://github.com/lotabout/) ([Modified pymustache Library](https://github.com/lotabout/pymustache/blob/master/pymustache/mustache.py)) 36 | 37 | ## Original Contributers 38 | 39 | - [KeepOnSurviving](https://github.com/KeepOnSurviving) 40 | 41 | - [cutie](https://github.com/cutie) 42 | 43 | 44 | -------------------------------------------------------------------------------- /Utils/Encoding.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | 4 | def encode_file_b64(filename: str): 5 | with open(filename, "rb") as image_file: 6 | encoded_string = base64.b64encode(image_file.read()) 7 | return encoded_string -------------------------------------------------------------------------------- /Utils/Fonts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import ctypes 4 | from ctypes import wintypes 5 | 6 | try: 7 | import winreg 8 | except ImportError: 9 | import _winreg as winreg 10 | 11 | user32 = ctypes.WinDLL('user32', use_last_error=True) 12 | gdi32 = ctypes.WinDLL('gdi32', use_last_error=True) 13 | 14 | FONTS_REG_PATH = r'Software\Microsoft\Windows NT\CurrentVersion\Fonts' 15 | 16 | HWND_BROADCAST = 0xFFFF 17 | SMTO_ABORTIFHUNG = 0x0002 18 | WM_FONTCHANGE = 0x001D 19 | GFRI_DESCRIPTION = 1 20 | GFRI_ISTRUETYPE = 3 21 | 22 | if not hasattr(wintypes, 'LPDWORD'): 23 | wintypes.LPDWORD = ctypes.POINTER(wintypes.DWORD) 24 | 25 | user32.SendMessageTimeoutW.restype = wintypes.LPVOID 26 | user32.SendMessageTimeoutW.argtypes = ( 27 | wintypes.HWND, # hWnd 28 | wintypes.UINT, # Msg 29 | wintypes.LPVOID, # wParam 30 | wintypes.LPVOID, # lParam 31 | wintypes.UINT, # fuFlags 32 | wintypes.UINT, # uTimeout 33 | wintypes.LPVOID) # lpdwResult 34 | 35 | gdi32.AddFontResourceW.argtypes = ( 36 | wintypes.LPCWSTR,) # lpszFilename 37 | 38 | # http://www.undocprint.org/winspool/getfontresourceinfo 39 | gdi32.GetFontResourceInfoW.argtypes = ( 40 | wintypes.LPCWSTR, # lpszFilename 41 | wintypes.LPDWORD, # cbBuffer 42 | wintypes.LPVOID, # lpBuffer 43 | wintypes.DWORD) # dwQueryType 44 | 45 | def install_font(src_path): 46 | # copy the font to the Windows Fonts folder 47 | dst_path = os.path.join(os.environ['SystemRoot'], 'Fonts', 48 | os.path.basename(src_path)) 49 | shutil.copy(src_path, dst_path) 50 | # load the font in the current session 51 | if not gdi32.AddFontResourceW(dst_path): 52 | os.remove(dst_path) 53 | raise WindowsError('AddFontResource failed to load "%s"' % src_path) 54 | # notify running programs 55 | user32.SendMessageTimeoutW(HWND_BROADCAST, WM_FONTCHANGE, 0, 0, 56 | SMTO_ABORTIFHUNG, 1000, None) 57 | # store the fontname/filename in the registry 58 | filename = os.path.basename(dst_path) 59 | fontname = os.path.splitext(filename)[0] 60 | # try to get the font's real name 61 | cb = wintypes.DWORD() 62 | if gdi32.GetFontResourceInfoW(filename, ctypes.byref(cb), None, 63 | GFRI_DESCRIPTION): 64 | buf = (ctypes.c_wchar * cb.value)() 65 | if gdi32.GetFontResourceInfoW(filename, ctypes.byref(cb), buf, 66 | GFRI_DESCRIPTION): 67 | fontname = buf.value 68 | is_truetype = wintypes.BOOL() 69 | cb.value = ctypes.sizeof(is_truetype) 70 | gdi32.GetFontResourceInfoW(filename, ctypes.byref(cb), 71 | ctypes.byref(is_truetype), GFRI_ISTRUETYPE) 72 | if is_truetype: 73 | fontname += ' (TrueType)' 74 | with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, FONTS_REG_PATH, 0, 75 | winreg.KEY_SET_VALUE) as key: 76 | winreg.SetValueEx(key, fontname, 0, winreg.REG_SZ, filename) -------------------------------------------------------------------------------- /Utils/HtmlUtils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from xml.sax.saxutils import unescape 3 | from bs4 import BeautifulSoup 4 | from Formatters import reSound 5 | from MediaConverter import MediaConverter 6 | from Utils.Encoding import encode_file_b64 7 | 8 | IMAGE_TO_ELEMENT_RATIO_W = 0.5 9 | ELEMENT_TO_WINDOW_RATO = (0.5, 0.5) 10 | 11 | 12 | def wrapHtmlIn(html: str, pointOfIns: str, tagtoWrapIn: str) -> str: 13 | tempSoup = BeautifulSoup(html, features="lxml") 14 | newBody = tempSoup.find(pointOfIns) 15 | bodytag = tempSoup.new_tag(tagtoWrapIn) 16 | for content in reversed(newBody.contents): 17 | bodytag.insert(0, content.extract()) 18 | newBody.append(bodytag) 19 | return str(newBody) 20 | 21 | 22 | def strip_control_characters(input): 23 | if input: 24 | import re 25 | 26 | # unicode invalid characters 27 | RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \ 28 | u'|' + \ 29 | u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \ 30 | (chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 31 | chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 32 | chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 33 | ) 34 | input = re.sub(RE_XML_ILLEGAL, "", input) 35 | 36 | # ascii control characters 37 | input = re.sub(r"[\x01-\x1F\x7F]", "", input) 38 | # removing sound tags 39 | input = reSound.sub("", input, ) 40 | 41 | return input 42 | 43 | 44 | def cleanHtml(html, imgcmp=False): 45 | res = reallocateRes('img', html, 46 | "file:///" + os.path.expandvars(r'%LocalAppData%').replace("\\", "/") + "/temp/smmedia/", 47 | imgcomp=imgcmp) 48 | 49 | html = res["soup"] 50 | soup = BeautifulSoup(unescape(html), features="lxml") 51 | 52 | for script in soup(["script"]): 53 | if script == "script": 54 | script.extract() 55 | 56 | for tag in soup.findAll(True): 57 | for attr in [attr for attr in tag.attrs if 58 | attr not in ["style", "name", "id", "class", "src", "href", "onclick"]]: 59 | del tag[attr] 60 | if (imgcmp): 61 | return {"soup": str(soup), "imgs": res["imgs"]} 62 | else: 63 | return {"soup": str(soup)} 64 | 65 | 66 | def get_rule_for_selector(stylesheet, selector): 67 | for rule in stylesheet.cssRules: 68 | if hasattr(rule, "selectorList") and selector in [s.selectorText for s in rule.selectorList]: 69 | return rule 70 | 71 | 72 | def insertHtmlAt(html, mod, target, pos): 73 | soup = BeautifulSoup(html, "html.parser") 74 | target = soup.find("head") 75 | toInsert = BeautifulSoup(mod, "html.parser") 76 | target.insert(pos, toInsert) 77 | return str(soup) 78 | 79 | 80 | def reallocateRes(tag, text, location, imgcomp=False): 81 | imptple = () 82 | soup = BeautifulSoup(unescape(text), features="lxml") 83 | for img in soup.find_all(tag): 84 | try: 85 | if img is not None: 86 | if 'src' in img.attrs.keys(): 87 | if not imgcomp: 88 | img_urls = img['src'] 89 | mc = MediaConverter() 90 | img_urls = mc.convertImage(os.getcwd() + "\\out\\out_files\\elements\\"+img_urls) 91 | img_urls = img_urls.split("/")[-1] 92 | # IMAGES_TEMP = IMAGES_TEMP + (img_urls,) 93 | img['src'] = location + img_urls 94 | print(location + img_urls) 95 | # img['width'] = "50%" 96 | # img["style"] = "" 97 | # img['src'] = "data:image/{};base64,{}".format( 98 | # img_urls.split(".")[-1], 99 | # encode_file_b64((location + img_urls).replace("file:///", "")).decode("utf-8")) 100 | else: 101 | img_urls = img['src'] 102 | img.decompose() 103 | imptple = imptple + (img_urls,) 104 | except Exception as e: 105 | print("Failed at parsing this image", img, e) 106 | if imgcomp: 107 | return {"imgs": imptple, "soup": str(soup)} 108 | else: 109 | return {"soup": str(soup)} 110 | -------------------------------------------------------------------------------- /Utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anki2smArchive/anki2sm/561035dd9f0077316cf7a4c3d7daa616fa7bfccf/Utils/__init__.py -------------------------------------------------------------------------------- /anki2sm.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import re 3 | import shutil 4 | import click 5 | import magic 6 | import json 7 | import sqlite3 8 | import os 9 | import shutil 10 | from pyquery import PyQuery as pq 11 | import itertools 12 | from os import listdir 13 | from os.path import isfile, join 14 | from bs4 import BeautifulSoup as Soup 15 | from zipfile import ZipFile 16 | from pathlib import Path 17 | import datetime 18 | from yattag import Doc 19 | import fnmatch 20 | from xml.sax.saxutils import unescape 21 | from html.entities import name2codepoint 22 | 23 | 24 | TMP = "out/out_files/elements" 25 | urls = [] 26 | #@click.command() 27 | #@click.option('--file', help='Filename.') 28 | #@click.option('--v', default=True, help='Filename.') 29 | 30 | reComment = re.compile("(?s)") 31 | reStyle = re.compile("(?si).*?") 32 | reScript = re.compile("(?si).*?") 33 | reTag = re.compile("(?s)<.*?>") 34 | reEnts = re.compile(r"&#?\w+;") 35 | 36 | 37 | def stripHTML(s: str) -> str: 38 | s = reComment.sub("", s) 39 | s = reStyle.sub("", s) 40 | s = reScript.sub("", s) 41 | s = reTag.sub("", s) 42 | s = entsToTxt(s) 43 | return s 44 | 45 | def entsToTxt(html: str) -> str: 46 | # entitydefs defines nbsp as \xa0 instead of a standard space, so we 47 | # replace it first 48 | html = html.replace(" ", " ") 49 | 50 | def fixup(m): 51 | text = m.group(0) 52 | if text[:2] == "&#": 53 | # character reference 54 | try: 55 | if text[:3] == "&#x": 56 | return chr(int(text[3:-1], 16)) 57 | else: 58 | return chr(int(text[2:-1])) 59 | except ValueError: 60 | pass 61 | else: 62 | # named entity 63 | try: 64 | text = chr(name2codepoint[text[1:-1]]) 65 | except KeyError: 66 | pass 67 | return text # leave as is 68 | return reEnts.sub(fixup, html) 69 | 70 | def hello(file, v): 71 | """Insert helptext here.""" 72 | p = unzip_file(Path(file)) 73 | if p: 74 | media = unpack_media(p) 75 | out = Path("out") 76 | out.mkdir(parents=True, exist_ok=True) 77 | elements = Path(f"{out.as_posix()}/out_files/elements") 78 | elements.mkdir(parents=True, exist_ok=True) 79 | for k in media: 80 | try: 81 | shutil.move(p.joinpath(k).as_posix(), elements.joinpath(media[k]).as_posix()) 82 | except: 83 | pass 84 | 85 | doc = unpack_db(p) 86 | 87 | with open(f"{out.as_posix()}/"+os.path.split(file)[-1].split(".")[0]+".xml", "w", encoding="utf-8") as f: 88 | f.write(doc.getvalue()) 89 | return 0 90 | else: 91 | ep("Cannot convert ",os.path.basename(file) ) 92 | return -1 93 | 94 | def unzip_file(zipfile_path: Path) -> Path: 95 | if "zip" not in magic.from_file(zipfile_path.as_posix(), mime=True): 96 | ep("apkg does not appear to be a ZIP file...") 97 | return -1 98 | with ZipFile(zipfile_path.as_posix(), 'r') as apkg: 99 | apkg.extractall(zipfile_path.stem) 100 | return Path(zipfile_path.stem) 101 | 102 | def get_id_func(): 103 | counter = itertools.count() 104 | next(counter) 105 | 106 | def p(): 107 | return str(next(counter)) 108 | return p 109 | 110 | def unpack_db(path: Path): 111 | conn = sqlite3.connect(path.joinpath("collection.anki2").as_posix()) 112 | cursor = conn.cursor() 113 | 114 | doc, tag, text = Doc().tagtext() 115 | 116 | cursor.execute("SELECT * FROM notes") 117 | sep = "\x1f" #some kind of control code that is not valid XML 118 | get_id = get_id_func() 119 | 120 | with tag('SuperMemoCollection'): 121 | with tag('Count'): 122 | text('3') 123 | with tag("SuperMemoElement"): 124 | with tag('ID'): 125 | text(get_id()) 126 | with tag('Title'): #Items don't have titles 127 | text(str(os.path.split(str(Path))[-1].split(".")[0])) 128 | with tag('Type'): #Concept, Topic or Item 129 | text('Topic') 130 | for row in cursor.fetchall(): # @Todo, each collection should have its own concept or topic // donot understand this 131 | id, guid, mid, mod, usn, tags, flds, sfld, csum, flags, data = row 132 | qs = flds.split(sep) 133 | for z in range(len(qs)-1): 134 | if qs[z]!="": 135 | qs[z]=qs[z]+'
' 136 | e = "" 137 | if isinstance(flds, str): 138 | e += flds 139 | if isinstance(flds, str): 140 | sfld += sfld 141 | d = pq(e) 142 | 143 | #anki supports: 144 | #jpg png gif tiff svg tif jpeg mp3 ogg wav avi ogv 145 | #mpg mpeg mov mp4 mkv ogx ogv oga flv swf flac 146 | 147 | #sm17 supports (known) 148 | #jpg png gif bmp jpeg mp3 avi mp4 149 | Content_Sound = () 150 | Content_Video = () 151 | Content_Images=() 152 | if 'img' in e: 153 | img_list = re.findall(']+>', str(d)) 154 | for imgs in img_list: 155 | Content_Images=Content_Images+(imgs,) 156 | 157 | if "[sound:" in e: #@Todo: what happens if [ or ] is in the name?//yea need to fix this 158 | g = re.search("\[sound\:([^\]]+)", e) 159 | for p in g.groups(): 160 | m = Path("{}/{}".format(TMP,p)) 161 | if m.exists(): 162 | if any([ext in m.suffix for ext in ["mp3", "ogg", "wav"]]) \ 163 | or "audio" in magic.from_file(m.as_posix(), mime=True): 164 | Content_Sound = Content_Sound + (p,) 165 | if any([ext in m.suffix for ext in ["mp4", "wmv", "mkv"]]) \ 166 | or "video" in magic.from_file(m.as_posix(), mime=True): 167 | Content_Video = Content_Video + (p,) 168 | 169 | with tag('SuperMemoElement'): 170 | with tag('ID'): 171 | text(get_id()) 172 | with tag('Type'): 173 | text('Item') 174 | with tag('Content'): #zero or more of Question Answer Sound Video Image Binary 175 | with tag('Question'): 176 | a = strip_control_characters(qs[0]) 177 | a = a.encode("ascii", "xmlcharrefreplace").decode("utf-8") 178 | text(stripHTML(a)) 179 | html = Soup(a,'html.parser') 180 | m=[p['href'] for p in html.find_all('a') ] 181 | urls.append(m[0]) if len(m) else "" 182 | 183 | with tag('Answer'): 184 | a = strip_control_characters(" ".join(qs[1:])) 185 | a = a.encode("ascii", "xmlcharrefreplace").decode("utf-8") 186 | text(stripHTML(a)) 187 | 188 | for img in Content_Images: 189 | with tag('Image'): 190 | with tag('URL'): 191 | text(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\{}".format(img)) 192 | with tag('Name'): 193 | text(img) 194 | 195 | for s in Content_Video: 196 | with tag('Video'): 197 | with tag('URL'): 198 | text(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\{}".format(s)) 199 | with tag('Name'): 200 | text(s) 201 | 202 | for s in Content_Sound: 203 | with tag('Sound'): 204 | with tag('URL'): 205 | text(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\{}".format(s)) 206 | with tag('Name'): 207 | text(s) 208 | with tag('Text'): 209 | text("") 210 | 211 | with tag("LearningData"): #@Todo, convert anki learning data to sm //anki's database is a mess 212 | with tag("Interval"): 213 | text("1") 214 | with tag("Repetitions"): 215 | text("1") 216 | with tag("Lapses"): 217 | text("0") 218 | with tag("LastRepetition"): 219 | text(datetime.date.today().strftime("%d.%m.%Y")) 220 | with tag("AFactor"): 221 | text("3.92") # values taken off untrained data in SM 222 | with tag("UFactor"): 223 | text("3") 224 | with tag("RepHist"): 225 | text("") 226 | pp("Amount of cards: {}".format(int(get_id())-1)) 227 | return doc 228 | 229 | def unpack_media(media_dir: Path): 230 | if not media_dir.exists(): 231 | raise FileNotFoundError 232 | 233 | with open(media_dir.joinpath("media").as_posix(), "r") as f: 234 | m = json.loads(f.read()) 235 | pp("Amount of media files: {}".format(len(m))) 236 | return m 237 | 238 | 239 | 240 | def strip_control_characters(input): 241 | if input: 242 | import re 243 | 244 | # unicode invalid characters 245 | RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \ 246 | u'|' + \ 247 | u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \ 248 | (chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 249 | chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 250 | chr(0xd800), chr(0xdbff), chr(0xdc00), chr(0xdfff), 251 | ) 252 | input = re.sub(RE_XML_ILLEGAL, "", input) 253 | 254 | # ascii control characters 255 | input = re.sub(r"[\x01-\x1F\x7F]", "", input) 256 | #removing sound tags 257 | input = re.sub(r"\[sound\:([^\]]+)]","",input) 258 | 259 | return input 260 | 261 | def ep(p): 262 | click.secho(str(">> "+p), fg="red", nl=False) 263 | 264 | def pp(p): 265 | click.secho(">> ", fg="green", nl=False) 266 | click.echo(p) 267 | 268 | 269 | if __name__ == '__main__': 270 | IR_yee_or_nay = str(input("Do you want to scrape annotations into a bat file for IR (Y/N): ")) 271 | mypath =str(os.getcwd()+"\\apkgs\\") 272 | apkgfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] 273 | for i in range(len(apkgfiles)): 274 | hello(mypath+apkgfiles[i],v=True) 275 | print("Done with ",i+1,"out of",len(apkgfiles)) 276 | 277 | try: 278 | shutil.rmtree(os.path.splitext(apkgfiles[i])[0]) 279 | except OSError as e: 280 | print ("Error: %s - %s." % (e.filename, e.strerror)) 281 | 282 | #creating smmedia if it doesnot exist 283 | if not os.path.exists(str(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\")): 284 | try: 285 | os.makedirs(str(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\")) 286 | except OSError as e: 287 | if e.errno != errno.EEXIST: 288 | raise 289 | #moving media files to smmedia 290 | files = os.listdir(os.getcwd()+"\\out\\out_files\\elements") 291 | for f in files: 292 | if f not in os.listdir(str(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\")): 293 | shutil.move(os.getcwd()+"\\out\\out_files\\elements\\"+f, str(os.path.expandvars(r'%LocalAppData%')+"\\temp\\smmedia\\")) 294 | #deleting temp media files 295 | try: 296 | shutil.rmtree(os.getcwd()+"\\out\\out_files\\elements") 297 | shutil.rmtree(os.getcwd()+"\\out\\out_files") 298 | except OSError as e: 299 | print ("Error: %s - %s." % (e.filename, e.strerror)) 300 | 301 | 302 | if IR_yee_or_nay in ['1','Y','y','ye','yes'] and urls: 303 | with open('IR.bat', 'w')as ir: 304 | ir.writelines(['@echo off\n\n']+[str("start /d IEXPLORE.EXE"+i+'\n') for i in urls]) 305 | meh=input("Press Enter to Exit") -------------------------------------------------------------------------------- /anki2smV2.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import errno 3 | import os 4 | import re 5 | import shutil 6 | import sqlite3 7 | import zipfile 8 | from datetime import datetime 9 | from os import listdir 10 | from os.path import isfile, join 11 | from pathlib import Path,WindowsPath 12 | import json 13 | from collections import defaultdict 14 | from zipfile import ZipFile 15 | from progress.bar import IncrementalBar 16 | from magic import magic 17 | import Formatters 18 | import mustache 19 | from yattag import Doc 20 | import itertools 21 | import premailer 22 | import cssutils 23 | import logging 24 | import click 25 | from Utils.Fonts import install_font 26 | from config import Config 27 | from Utils.HtmlUtils import \ 28 | ( 29 | wrapHtmlIn, 30 | strip_control_characters, 31 | cleanHtml, 32 | get_rule_for_selector, 33 | insertHtmlAt 34 | ) 35 | from Models import \ 36 | ( 37 | Model, 38 | Template, 39 | Card, 40 | Collection, 41 | Note, 42 | EmptyString 43 | ) 44 | 45 | cssutils.log.setLevel(logging.CRITICAL) 46 | 47 | SUB_DECK_MARKER = '' 48 | 49 | Anki_Collections = defaultdict(dict, ((SUB_DECK_MARKER, []),)) 50 | AnkiNotes = {} 51 | AnkiModels = {} 52 | totalCardCount = 0 53 | 54 | doc, tag, text = Doc().tagtext() 55 | 56 | IMPORT_LEARNING_DATA = False 57 | IMAGES_AS_COMPONENT = False 58 | MAINTAIN_STYLING = True 59 | 60 | SIDES = ("q", "a", "anki") 61 | 62 | DEFAULT_SIDE = SIDES[2] 63 | 64 | IMAGES_TEMP = () 65 | FAILED_DECKS = [] 66 | 67 | 68 | # ============================================ Other Util Stuff But Deck related ================================= 69 | 70 | def getDeckFromID(d, did: str): 71 | res = None 72 | for key, value in d.items(): 73 | if key == SUB_DECK_MARKER: 74 | if value: 75 | for col in value: 76 | if col.did == did and res is None: 77 | res = col 78 | else: 79 | if isinstance(value, dict): 80 | if res is None: 81 | res = getDeckFromID(value, did) 82 | else: 83 | if isinstance(value, Collection): 84 | if value.did == did and res is None: 85 | res = value 86 | return res 87 | 88 | 89 | def getTemplateofOrd(templates, ord: int): 90 | for templ in templates: 91 | if (templ.ord == ord): 92 | return templ 93 | 94 | 95 | def get_id_func(): 96 | counter = itertools.count() 97 | next(counter) 98 | 99 | def p(): 100 | return str(next(counter)) 101 | 102 | return p 103 | 104 | 105 | get_id = get_id_func() 106 | 107 | 108 | # Commented until a better understanding of anki is reached 109 | # Code Source: https://groups.google.com/d/msg/supermemo_users/dTzhEog6zPk/8wqBk4qcCgAJ 110 | # Its Author: Mnd Mau 111 | # def convert_time(x): 112 | # if x == '': 113 | # return ('') 114 | # space = x.find(' ') 115 | # if space == -1 and 'm' in x: 116 | # return (1) 117 | # if '(new)' in x: 118 | # return (0) 119 | # number = float(x[:space]) 120 | # if 'months' in x: 121 | # return (round(number * 30)) 122 | # elif 'years' in x: 123 | # return (round(number * 365)) 124 | # elif 'day' in x: 125 | # return (round(number)) 126 | # 127 | # 128 | # def scale_afactor(a, min_ease, max_ease): 129 | # return (6.868 - 1.3) * ((a - min_ease) / (max_ease - min_ease)) + 1.3 130 | 131 | # ============================================= Some Util Functions ============================================= 132 | 133 | # Error Print 134 | def ep(p) -> None: 135 | """error print""" 136 | click.secho(str(">> " + p), fg="red", nl=False) 137 | 138 | 139 | def pp(p) -> None: 140 | """pretty print""" 141 | click.secho(">> ", fg="green", nl=False) 142 | click.echo(p) 143 | 144 | 145 | def wp(p) -> None: 146 | """warning print - yellow in color""" 147 | click.secho(p, fg="yellow", nl=True) 148 | 149 | 150 | def resetGlobals() -> None: 151 | global Anki_Collections, AnkiNotes, AnkiModels, totalCardCount, doc, tag, text, IMAGES_TEMP, MAINTAIN_STYLING 152 | MAINTAIN_STYLING = True 153 | Anki_Collections = defaultdict(dict, ((SUB_DECK_MARKER, []),)) 154 | AnkiNotes = {} 155 | AnkiModels = {} 156 | IMAGES_TEMP = () 157 | totalCardCount = 0 158 | doc, tag, text = Doc().tagtext() 159 | 160 | 161 | def unpack_db(path: Path) -> None: 162 | conn = sqlite3.connect(path.joinpath("collection.anki2").as_posix()) 163 | cursor = conn.cursor() 164 | 165 | cursor.execute("SELECT * FROM col") 166 | for row in cursor.fetchall(): 167 | did, crt, mod, scm, ver, dty, usn, ls, conf, models, decks, dconf, tags = row 168 | buildColTree(decks) 169 | buildModels(models) 170 | buildNotes(path) 171 | buildCardsAndDeck(path) 172 | print("\tExporting into xml...\n\n") 173 | export(path) 174 | 175 | 176 | def unpack_media(media_dir: Path): 177 | # if not media_dir.exists(): 178 | # raise FileNotFoundError 179 | 180 | with open(media_dir.joinpath("media").as_posix(), "r") as f: 181 | m = json.loads(f.read()) 182 | print(f'\tAmount of media files: {len(m)}\n') 183 | return m 184 | 185 | 186 | # def f3(zipfile_path: Path) -> Path: 187 | # """Attempts at unzipping the file, if the apkg is corrupt or is not appear to be zip, raises an Exception""" 188 | # if "zip" not in magic.from_file(zipfile_path.as_posix(), mime=True): 189 | # pass 190 | # #raise Exception("Error: apkg does not appear to be a ZIP file...") 191 | # with ZipFile(zipfile_path.as_posix(), 'r') as apkg: 192 | # apkg.extractall(zipfile_path.stem) 193 | # return Path(zipfile_path.stem) 194 | 195 | 196 | def unzip_member_f3(zip_filepath, filename, dest): 197 | with open(zip_filepath, 'rb') as f: 198 | zf = ZipFile(f) 199 | zf.extract(filename, dest) 200 | fn = os.path.join(dest, filename) 201 | 202 | 203 | 204 | def unzip_file(zipfile_path: Path) -> Path: 205 | with open(zipfile_path.as_posix(), 'rb') as f: 206 | zf = ZipFile(f) 207 | futures = [] 208 | with concurrent.futures.ProcessPoolExecutor() as executor: 209 | for member in zf.infolist(): 210 | futures.append( 211 | executor.submit( 212 | unzip_member_f3, 213 | zipfile_path.as_posix(), 214 | member.filename, 215 | zipfile_path.stem, 216 | ) 217 | ) 218 | return Path(zipfile_path.stem) 219 | 220 | 221 | # ============================================= Deck Builder Functions ============================================= 222 | 223 | def attach(key, branch, trunk) -> None: 224 | """Insert a branch of Decks on its trunk.""" 225 | parts = branch.split('::', 1) 226 | if len(parts) == 1: # branch is a leaf sub-deck 227 | trunk[SUB_DECK_MARKER].append(Collection(key, parts[0])) 228 | else: 229 | node, others = parts 230 | if node not in trunk: 231 | trunk[node] = defaultdict(dict, ((SUB_DECK_MARKER, []),)) 232 | attach(key, others, trunk[node]) 233 | 234 | 235 | def prettyDeckTree(d, indent=0): 236 | for key, value in d.items(): 237 | if key == SUB_DECK_MARKER: 238 | if value: 239 | print(' ' * indent + str(value)) 240 | else: 241 | print(' ' * indent + str(key)) 242 | if isinstance(value, dict): 243 | prettyDeckTree(value, indent + 1) 244 | else: 245 | print(' ' * (indent + 1) + str(value)) 246 | 247 | 248 | def isSubDeck(d: dict, name: str) -> bool: 249 | res = False 250 | for key, value in d.items(): 251 | if key == name: 252 | res = True 253 | else: 254 | if isinstance(value, dict): 255 | if not res: 256 | res = isSubDeck(value, name) 257 | return res 258 | 259 | 260 | def getSubDeck(d: dict, name: str) -> Collection: 261 | res = None 262 | for key, value in d.items(): 263 | if key == SUB_DECK_MARKER: 264 | if value: 265 | for col in value: 266 | if col.name == name: 267 | res = col 268 | else: 269 | if isinstance(value, dict): 270 | if res is None: 271 | res = getSubDeck(value, name) 272 | return res 273 | 274 | 275 | def buildColTree(m: str): 276 | global Anki_Collections 277 | y = json.loads(m) 278 | decks = [] 279 | with IncrementalBar("\tBuilding Collection Tree", max=len(y.keys())) as bar: 280 | for k in y.keys(): 281 | attach(k, y[k]["name"], Anki_Collections) 282 | bar.next() 283 | bar.finish() 284 | 285 | 286 | def buildModels(t: str): 287 | global AnkiModels 288 | y = json.loads(t) 289 | templates = [] 290 | flds = [] 291 | with IncrementalBar("\tBuilding Models", max=len(y.keys())) as bar: 292 | for k in y.keys(): 293 | AnkiModels[str(y[k]["id"])] = Model(str(y[k]["id"]), y[k]["type"], cssutils.parseString(y[k]["css"]), y[k]["latexPre"], y[k]["latexPost"]) 294 | 295 | for fld in y[k]["flds"]: 296 | flds.append((fld["name"], fld["ord"])) 297 | flds.sort(key=lambda x: int(x[1])) 298 | 299 | AnkiModels[str(y[k]["id"])].flds = tuple([f[0] for f in flds]) 300 | 301 | for tmpl in y[k]["tmpls"]: 302 | templates.append( 303 | Template(tmpl["name"], tmpl["qfmt"], tmpl["did"], tmpl["bafmt"], tmpl["afmt"], tmpl["ord"], 304 | tmpl["bqfmt"])) 305 | 306 | AnkiModels[str(y[k]["id"])].tmpls = tuple(templates) 307 | templates = [] 308 | flds = [] 309 | bar.next() 310 | bar.finish() 311 | 312 | 313 | def buildStubbleDict(note: Note): 314 | cflds = note.flds.split(u"") 315 | temp_dict = {} 316 | for f, v in zip(note.model.flds, cflds): 317 | temp_dict[str(f)] = str(v) 318 | temp_dict["Tags"] = [i for i in note.tags if i] 319 | return temp_dict 320 | 321 | 322 | def buildNotes(path: Path): 323 | global AnkiNotes 324 | conn = sqlite3.connect(path.joinpath("collection.anki2").as_posix()) 325 | cursor = conn.cursor() 326 | cursor.execute("SELECT * FROM notes") 327 | rows = cursor.fetchall() 328 | with IncrementalBar('\tBuilding Notes', max=len(rows)) as bar: 329 | for row in rows: 330 | nid, guid, mid, mod, usn, tags, flds, sfld, csum, flags, data = row 331 | reqModel = AnkiModels[str(mid)] 332 | AnkiNotes[str(nid)] = Note(reqModel, flds) 333 | AnkiNotes[str(nid)].tags = EmptyString(tags).split(" ") 334 | bar.next() 335 | bar.finish() 336 | 337 | 338 | # Commented until a better understanding of anki is reached 339 | # Source: https://groups.google.com/d/msg/supermemo_users/dTzhEog6zPk/8wqBk4qcCgAJ 340 | # Author: Mnd Mau 341 | # 342 | # def buildCardData(card: Card, minEase, maxEase): 343 | # if element[5] == '': 344 | # last_repetition = datetime.strptime(element[7], '%Y-%m-%d') 345 | # else: 346 | # last_repetition = datetime.strptime(element[5], '%Y-%m-%d') 347 | # current_interval = convert_time(element[2]) 348 | # prior_interval = convert_time(element[6]) 349 | # if prior_interval == '': 350 | # card.ufactor = format(current_interval, '.3f') 351 | # else: 352 | # card.ufactor = format(current_interval / prior_interval, '.3f') 353 | # if '(new)' in element[8]: 354 | # card.afactor = '3.000' 355 | # else: 356 | # ease = float(element[8][:-1]) 357 | # card.afactor = str(format(scale_afactor(ease, minEase, maxEase), '.3f')) 358 | # 359 | 360 | def buildCardsAndDeck(path: Path): 361 | global AnkiNotes, AnkiModels, Anki_Collections, totalCardCount, FAILED_DECKS 362 | conn = sqlite3.connect(path.joinpath("collection.anki2").as_posix()) 363 | cursor = conn.cursor() 364 | cursor.execute( 365 | "SELECT * FROM cards ORDER BY factor ASC") # min ease would at rows[0] and max index would be at rows[-1] 366 | rows = cursor.fetchall() 367 | with IncrementalBar("\tBuilding Cards and deck", max=len(rows)) as bar: 368 | for row in rows: 369 | cid, nid, did, ordi, mod, usn, crtype, queue, due, ivl, factor, reps, lapses, left, odue, odid, flags, data = row 370 | reqNote = AnkiNotes[str(nid)] 371 | genCard = None 372 | 373 | if reqNote.model.type == 0: 374 | reqTemplate = getTemplateofOrd(reqNote.model.tmpls, int(ordi)) 375 | 376 | questionTg = "
" \ 378 | + mustache.render(reqTemplate.qfmt, buildStubbleDict(reqNote)) + "
" 379 | answerTag = "
" \ 381 | + mustache.render(reqTemplate.afmt, buildStubbleDict(reqNote)) + "
" 382 | questionTg = premailer.transform(questionTg) 383 | answerTag = premailer.transform(answerTag) 384 | genCard = Card(cid, questionTg, answerTag) 385 | 386 | elif reqNote.model.type == 1: 387 | reqTemplate = getTemplateofOrd(reqNote.model.tmpls, 0) 388 | 389 | mustache.filters["cloze"] = lambda txt: Formatters.cloze_q_filter(txt, str(int(ordi) + 1)) 390 | 391 | css = reqNote.model.css 392 | css = buildCssForOrd(css, ordi) if css else "" 393 | 394 | questionTg = "
" \ 395 | + mustache.render(reqTemplate.qfmt, buildStubbleDict(reqNote)) + "
" 396 | 397 | mustache.filters["cloze"] = lambda txt: Formatters.cloze_a_filter(txt, str(int(ordi) + 1)) 398 | 399 | answerTag = "
" \ 400 | + mustache.render(reqTemplate.afmt, buildStubbleDict(reqNote)) + "
" 401 | 402 | questionTg = premailer.transform(questionTg) 403 | answerTag = premailer.transform(answerTag) 404 | genCard = Card(cid, questionTg, answerTag) 405 | 406 | if genCard is not None: 407 | reqDeck = getDeckFromID(Anki_Collections, str(did)) 408 | if reqDeck is not None: 409 | reqDeck.cards.append(genCard) 410 | else: 411 | if did not in FAILED_DECKS: 412 | FAILED_DECKS.append(did) 413 | else: 414 | if did not in FAILED_DECKS: 415 | FAILED_DECKS.append(did) 416 | totalCardCount += 1 417 | bar.next() 418 | bar.finish() 419 | 420 | 421 | def buildCssForOrd(css, ordi): 422 | pagecss = css 423 | defaultCardCss = get_rule_for_selector(pagecss, ".card") 424 | ordinalCss = get_rule_for_selector(pagecss, ".card{}".format(ordi + 1)) 425 | try: 426 | ordProp = [prop for prop in ordinalCss.style.getProperties()] 427 | for dprop in defaultCardCss.style.getProperties(): 428 | if dprop.name in [n.name for n in ordProp]: 429 | defaultCardCss.style[dprop.name] = ordinalCss.style.getProperty(dprop.name).value 430 | except: 431 | pass 432 | if defaultCardCss is not None: 433 | return defaultCardCss.cssText 434 | else: 435 | return "" 436 | 437 | 438 | # ============================================= Import and Export Function ============================================= 439 | 440 | def export(file): 441 | global Anki_Collections 442 | out = Path("out") 443 | out.mkdir(parents=True, exist_ok=True) 444 | 445 | with tag('SuperMemoCollection'): 446 | with tag('Count'): 447 | text(str(totalCardCount)) 448 | SuperMemoCollection(Anki_Collections) 449 | 450 | with open(f"{out.as_posix()}/" + os.path.split(file)[-1].split(".")[0] + ".xml", "w", encoding="utf-8") as f: 451 | f.write(doc.getvalue()) 452 | 453 | 454 | def start_import(file: str) -> int: 455 | p = unzip_file(Path(file)) 456 | if p is not None and type(p) is WindowsPath: 457 | media = unpack_media(p) 458 | out = Path("out") 459 | out.mkdir(parents=True, exist_ok=True) 460 | elements = Path(f"{out.as_posix()}/out_files/elements") 461 | try: 462 | os.makedirs(elements.as_posix()) 463 | except: 464 | pass 465 | for k in media: 466 | try: 467 | shutil.move(p.joinpath(k).as_posix(), elements.joinpath(media[k]).as_posix()) 468 | except: 469 | pass 470 | unpack_db(p) 471 | return 0 472 | else: 473 | ep("Error: Cannot convert %s" % os.path.basename(file)) 474 | return -1 475 | 476 | 477 | # =============================================SuperMemo Xml Output Functions ============================================= 478 | 479 | def SuperMemoCollection(d: dict, indent=0): 480 | global doc, tag, text 481 | for key, value in d.items(): 482 | if key == SUB_DECK_MARKER: 483 | if value: 484 | for col in value: 485 | if not isSubDeck(Anki_Collections, col.name): 486 | SuperMemoTopic(col, col.name) 487 | else: 488 | if isinstance(value, dict): 489 | with tag("SuperMemoElement"): 490 | with tag('ID'): 491 | text(get_id()) 492 | with tag('Title'): 493 | text(str(key)) 494 | with tag('Type'): 495 | text('Topic') 496 | SuperMemoCollection(value, indent=indent + 1) 497 | subdk = getSubDeck(Anki_Collections, key) 498 | if subdk: 499 | if subdk.cards is not None: 500 | for c in subdk.cards: 501 | SuperMemoElement(c) 502 | 503 | 504 | def cardHasData(card: Card) -> bool: 505 | if card != None: 506 | return card.ufactor and card.afactor and \ 507 | card.interval and card.lapses and \ 508 | card.last_rep and card.repetitions 509 | else: 510 | return False 511 | 512 | 513 | def SuperMemoElement(card: Card) -> None: 514 | global doc, tag, text, get_id, IMAGES_TEMP, DEFAULT_SIDE, SIDES 515 | IMAGES_TEMP = () 516 | 517 | QContent_Sounds = () 518 | QContent_Videos = () 519 | 520 | AContent_Sounds = () 521 | AContent_Videos = () 522 | 523 | if "[sound:" in str(card.q): 524 | g = re.search(r"(?:\[sound:)([^]]+)(?:\])", str(card.q)) 525 | if g is not None: 526 | for p in g.groups(): 527 | m = Path("{}/{}".format("out/out_files/elements", p)) 528 | if m.exists(): 529 | if any([ext in m.suffix for ext in ["mp3", "ogg", "wav"]]) \ 530 | or "audio" in magic.from_file(m.as_posix(), mime=True): 531 | QContent_Sounds = QContent_Sounds + (p,) 532 | if any([ext in m.suffix for ext in ["mp4", "wmv", "mkv"]]) \ 533 | or "video" in magic.from_file(m.as_posix(), mime=True): 534 | QContent_Videos = QContent_Videos + (p,) 535 | 536 | if "[sound:" in str(card.a): 537 | g = re.search(r"(?:\[sound:)([^]]+)(?:\])", str(card.a)) 538 | if g is not None: 539 | for p in g.groups(): 540 | m = Path("{}/{}".format("out/out_files/elements", p)) 541 | if m.exists(): 542 | if any([ext in m.suffix for ext in ["mp3", "ogg", "wav"]]) \ 543 | or "audio" in magic.from_file(m.as_posix(), mime=True): 544 | AContent_Sounds = AContent_Sounds + (p,) 545 | if any([ext in m.suffix for ext in ["mp4", "wmv", "mkv"]]) \ 546 | or "video" in magic.from_file(m.as_posix(), mime=True): 547 | AContent_Videos = AContent_Videos + (p,) 548 | 549 | card.q = Formatters.reSound.sub("", card.q) 550 | card.a = Formatters.reSound.sub("", card.a) 551 | 552 | enforceSectionJS = """""" 553 | liftIERestriction = """""" 554 | forcedCss = """""" 555 | with tag('SuperMemoElement'): 556 | with tag('ID'): 557 | text(get_id()) 558 | with tag('Type'): 559 | text('Item') 560 | with tag('Content'): 561 | with tag('Question'): 562 | a = wrapHtmlIn(card.q, 'head', 'body') 563 | res = cleanHtml(a, imgcmp=IMAGES_AS_COMPONENT) 564 | if IMAGES_AS_COMPONENT: 565 | IMAGES_TEMP = IMAGES_TEMP + res["imgs"] 566 | if MAINTAIN_STYLING: 567 | a = insertHtmlAt(res["soup"], enforceSectionJS, 'head', 0) 568 | a = insertHtmlAt(a, liftIERestriction, 'head', 0) 569 | else: 570 | a = res["soup"] 571 | if not IMAGES_AS_COMPONENT and len(IMAGES_TEMP) != 0: 572 | a = insertHtmlAt(a, forcedCss, 'head', 0) 573 | a = strip_control_characters(a) 574 | a = a.encode("ascii", "xmlcharrefreplace").decode("utf-8") 575 | text(a) 576 | 577 | for s in QContent_Videos: 578 | with tag('Video'): 579 | with tag('URL'): 580 | text(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\{}".format(s)) 581 | with tag('Name'): 582 | text(s) 583 | if DEFAULT_SIDE != SIDES[2] and \ 584 | DEFAULT_SIDE != SIDES[0]: 585 | with tag("Question"): 586 | text("F") 587 | with tag("Answer"): 588 | text("T") 589 | else: 590 | with tag("Question"): 591 | text("T") 592 | with tag("Answer"): 593 | text("F") 594 | 595 | for s in QContent_Sounds: 596 | with tag('Sound'): 597 | with tag('URL'): 598 | text(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\{}".format(s)) 599 | with tag('Name'): 600 | text(s) 601 | with tag('Text'): 602 | text("") 603 | if DEFAULT_SIDE != SIDES[2] and \ 604 | DEFAULT_SIDE != SIDES[0]: 605 | with tag("Question"): 606 | text("F") 607 | with tag("Answer"): 608 | text("T") 609 | else: 610 | with tag("Question"): 611 | text("T") 612 | with tag("Answer"): 613 | text("F") 614 | 615 | # html = Soup(a,'html.parser') 616 | # m=[p['href'] for p in html.find_all('a') ] 617 | # urls.append(m[0]) if len(m) else "" 618 | 619 | with tag('Answer'): 620 | res = cleanHtml(card.a, imgcmp=IMAGES_AS_COMPONENT) 621 | if IMAGES_AS_COMPONENT: 622 | IMAGES_TEMP = IMAGES_TEMP + res["imgs"] 623 | if MAINTAIN_STYLING: 624 | a = insertHtmlAt(res["soup"], enforceSectionJS, 'head', 0) 625 | a = insertHtmlAt(a, liftIERestriction, 'head', 0) 626 | else: 627 | a = res["soup"] 628 | if not IMAGES_AS_COMPONENT and len(IMAGES_TEMP) != 0: 629 | a = insertHtmlAt(a, forcedCss, 'head', 0) 630 | a = strip_control_characters(a) 631 | a = a.encode("ascii", "xmlcharrefreplace").decode("utf-8") 632 | text(a) 633 | 634 | for s in AContent_Videos: 635 | with tag('Video'): 636 | with tag('URL'): 637 | text(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\{}".format(s)) 638 | with tag('Name'): 639 | text(s) 640 | if DEFAULT_SIDE != SIDES[2] and \ 641 | DEFAULT_SIDE != SIDES[1]: 642 | with tag("Question"): 643 | text("T") 644 | with tag("Answer"): 645 | text("F") 646 | else: 647 | with tag("Question"): 648 | text("F") 649 | with tag("Answer"): 650 | text("T") 651 | 652 | for s in AContent_Sounds: 653 | with tag('Sound'): 654 | with tag('URL'): 655 | text(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\{}".format(s)) 656 | with tag('Name'): 657 | text(s) 658 | with tag('Text'): 659 | text("") 660 | if DEFAULT_SIDE != SIDES[2] and \ 661 | DEFAULT_SIDE != SIDES[1]: 662 | with tag("Question"): 663 | text("T") 664 | with tag("Answer"): 665 | text("F") 666 | else: 667 | with tag("Question"): 668 | text("F") 669 | with tag("Answer"): 670 | text("T") 671 | 672 | for img in IMAGES_TEMP: 673 | with tag('Image'): 674 | with tag('URL'): 675 | text(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\{}".format(img)) 676 | with tag('Name'): 677 | text(img) 678 | if DEFAULT_SIDE == SIDES[1]: 679 | with tag("Question"): 680 | text("F") 681 | with tag("Answer"): 682 | text("T") 683 | elif DEFAULT_SIDE == SIDES[0]: 684 | with tag("Question"): 685 | text("T") 686 | with tag("Answer"): 687 | text("F") 688 | 689 | if False and cardHasData(card): 690 | with tag("LearningData"): 691 | with tag("Interval"): 692 | text("1") 693 | with tag("Repetitions"): 694 | text("1") 695 | with tag("Lapses"): 696 | text("0") 697 | with tag("LastRepetition"): 698 | text(datetime.date("").strftime("%d.%m.%Y")) 699 | with tag("AFactor"): 700 | text("3.92") 701 | with tag("UFactor"): 702 | text("3") 703 | 704 | 705 | def SuperMemoTopic(col, ttl) -> None: 706 | global doc, tag, text, get_id 707 | with tag("SuperMemoElement"): 708 | with tag('ID'): 709 | text(get_id()) 710 | with tag('Title'): 711 | text(str(ttl)) 712 | # print(str(ttl)) 713 | with tag('Type'): 714 | text('Topic') 715 | if col.cards != None: 716 | for c in col.cards: 717 | SuperMemoElement(c) 718 | 719 | 720 | # ============================================= Configuration ============================================= 721 | def loadConfig(): 722 | global IMAGES_AS_COMPONENT, DEFAULT_SIDE, IMPORT_LEARNING_DATA, SIDES 723 | f = open('anki2smConfig.cfg') 724 | cfg = Config(f) 725 | try: 726 | tempIMAGES_AS_COMPONENT = cfg.get("img_as_component", False) 727 | tempDEFAULT_SIDE = cfg["default_side"] if cfg["default_side"] in SIDES else "anki" 728 | tempIMPORT_LEARNING_DATA = cfg.get("import_learning_data", False) 729 | 730 | IMAGES_AS_COMPONENT = tempIMAGES_AS_COMPONENT 731 | DEFAULT_SIDE = tempDEFAULT_SIDE 732 | IMPORT_LEARNING_DATA = tempIMPORT_LEARNING_DATA 733 | except: 734 | ep("Error: Corrupt Configuration file!") 735 | return -1 736 | finally: 737 | f.close() 738 | return 0 739 | 740 | 741 | def saveConfig(): 742 | global IMAGES_AS_COMPONENT, DEFAULT_SIDE, IMPORT_LEARNING_DATA 743 | with open('anki2smConfig.cfg', 'w+') as f: 744 | f.write(f'{"img_as_component"}:{IMAGES_AS_COMPONENT}\n') 745 | f.write(f'{"default_side"}:\"{DEFAULT_SIDE}\"\n') 746 | f.write(f'{"import_learning_data"}:{IMPORT_LEARNING_DATA}\n') 747 | 748 | 749 | def prompt_for_config(): 750 | global IMAGES_AS_COMPONENT, DEFAULT_SIDE 751 | # Asking the user how they want the images to be displayed 752 | print("Do You want images as:") 753 | print("\tY - A separate component ") 754 | print("\tN - Embedded within the Html - experimental") 755 | tempInp: str = str(input("")) 756 | if tempInp.casefold() in "Y".casefold(): 757 | IMAGES_AS_COMPONENT = True 758 | elif tempInp.casefold() != "N".casefold(): 759 | print("Wrong input provided, proceeding as embedded") 760 | # Asking the user where they want the components to end up 761 | print("Where do you want the components to end up:") 762 | print("\t 1 = Front") 763 | print("\t 2 = Back ") 764 | print("\t 3 = Leave them as is") 765 | tempInp: int = int(input("")) 766 | if 0 >= tempInp > 3: 767 | print("Wrong input provided, proceeding as it is in anki") 768 | else: 769 | DEFAULT_SIDE = SIDES[tempInp - 1] 770 | # Asking the user if they want to save the options as a configuration file 771 | print("Do you want to save options for later? (Y/N)") 772 | tempInp: str = str(input("")) 773 | if tempInp.casefold() in "Y".casefold(): 774 | saveConfig() 775 | 776 | 777 | # ============================================= Main Function ============================================= 778 | 779 | def main(): 780 | global AnkiNotes, totalCardCount, IMAGES_AS_COMPONENT, DEFAULT_SIDE, SIDES, MAINTAIN_STYLING 781 | mypath = str(os.getcwd() + "\\apkgs\\") 782 | apkgfiles = [f for f in listdir(mypath) if isfile(join(mypath, f)) and (f.endswith(".apkg") or f.endswith(".zip"))] 783 | 784 | if len(apkgfiles) == 0: 785 | ep("Error: No apkg in apkgs folder.") 786 | exit(0) 787 | 788 | if os.path.isfile('./anki2smConfig.cfg'): 789 | if 0 > loadConfig(): 790 | prompt_for_config() 791 | else: 792 | prompt_for_config() 793 | 794 | for i in range(len(apkgfiles)): 795 | pp(f'Processing {apkgfiles[i]} : {i + 1}/{len(apkgfiles)}') 796 | 797 | print("Do you want to enforce the deck's attributes and lift IE restrictions: ") 798 | wp("Please be aware that selecting No is going to:\n \t1. Rendering maybe broken\n\t2. Disable the deck's css and fonts\n\n") 799 | print("\tY - Yes I want to lift the restrictions.") 800 | print("\tN - No I choose not to lift the restrictions.") 801 | tempInp: str = str(input("")) 802 | if tempInp.casefold() in "N".casefold(): 803 | MAINTAIN_STYLING = False 804 | elif tempInp.casefold() != "Y".casefold(): 805 | print("Wrong input provided, preserving deck's attributes") 806 | 807 | start_import(mypath + apkgfiles[i]) 808 | resetGlobals() 809 | try: 810 | shutil.rmtree(os.path.splitext(apkgfiles[i])[0]) 811 | except OSError as e: 812 | ep("Error: %s - %s." % (e.filename, e.strerror)) 813 | 814 | # creating smmedia if it doesnot exist 815 | if not os.path.exists(str(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\")): 816 | try: 817 | os.makedirs(str(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\")) 818 | except OSError as e: 819 | if e.errno != errno.EEXIST: 820 | raise 821 | 822 | # moving media files to smmedia 823 | files = os.listdir(os.getcwd() + "\\out\\out_files\\elements") 824 | fonts= [x for x in files if x.endswith(".ttf")] 825 | for font in fonts: 826 | try: 827 | font_path = os.getcwd() + "\\out\\out_files\\elements\\"+font 828 | install_font(font_path.replace("\\","/")) 829 | except: 830 | ep("Error: Failed to install the font {}. \n\tRe-run script in admin mode if it is not or manually install it Path[{}].\n".format(font,font_path)) 831 | 832 | with IncrementalBar("Moving Media Files DON'T CLOSE!", max=len(files)) as bar: 833 | for f in files: 834 | if f not in os.listdir(str(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\")): 835 | try: 836 | shutil.move(os.getcwd() + "\\out\\out_files\\elements\\" + f, 837 | str(os.path.expandvars(r'%LocalAppData%') + "\\temp\\smmedia\\")) 838 | except: 839 | pass 840 | bar.next() 841 | bar.finish() 842 | 843 | # deleting temp media files 844 | try: 845 | shutil.rmtree(os.getcwd() + "\\out\\out_files\\elements") 846 | shutil.rmtree(os.getcwd() + "\\out\\out_files") 847 | except OSError as e: 848 | ep("Error: %s - %s." % (e.filename, e.strerror)) 849 | 850 | 851 | if __name__ == '__main__': 852 | main() 853 | if len(FAILED_DECKS) > 0: 854 | wp("An Error occured while processing the following decks:") 855 | for i in FAILED_DECKS: 856 | print(i) 857 | wp( 858 | "Please send an email to anki2sm.dev@protonmail.com with the attached deck(s) and the failed deck ids above.") 859 | -------------------------------------------------------------------------------- /init.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | pip3 install -r requirements.txt 3 | pip uninstall python-magic 4 | pip install python-magic-bin~=0.4.14 5 | pause 6 | -------------------------------------------------------------------------------- /mustache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import copy 5 | import inspect 6 | 7 | # A FORK FROM https://github.com/lotabout/pymustache/blob/master/pymustache/mustache.py 8 | # Was adapted to work with anki's card template mustache formatting 9 | # 10 | # Normally mustache Filters work like 11 | # {{ message | Filter A | Filter B }} 12 | # 1-----> 2-----> 13 | # but anki's works like 14 | # {{ Filter B : Filter A : Message }} 15 | # <-----2 <----1 16 | 17 | # Anki has a slight Variation on Sections: 18 | # 19 | # Normally a section with the local context(.) is specified as follows: 20 | # 21 | # {{#Section}} 22 | # {{.}} <<---- with the dot being the whatever data that belongs to that section 23 | # {{/Section}} 24 | # 25 | # Anki Sections: 26 | # {{#Section}} 27 | # {{Section}} <<--- Where the local context is the Section's Name 28 | # {{/Section}}# 29 | 30 | try: 31 | from html import escape as html_escape 32 | except: 33 | # python 2 34 | import cgi 35 | 36 | 37 | def html_escape(text): 38 | return cgi.escape(text, quote=True) 39 | 40 | DEFAULT_DELIMITERS = ('{{', '}}') 41 | EMPTYSTRING = "" 42 | spaces_not_newline = ' \t\r\b\f' 43 | re_space = re.compile(r'[' + spaces_not_newline + r']*(\n|$)') 44 | re_insert_indent = re.compile(r'(^|\n)(?=.|\n)', re.DOTALL) 45 | 46 | # default filters 47 | filters = {} 48 | 49 | 50 | # ============================================================================== 51 | # Context lookup. 52 | # Mustache uses javascript's prototype like lookup for variables. 53 | 54 | # A context is just a dict, and we use a list of contexts to represent the 55 | # stack, the lookup order is in reversed order 56 | 57 | # lookup('x', ({'y': 30, 'z':40}, {'x': 10, 'y': 20}) => 10 58 | # lookup('y', ({'y': 30, 'z':40}, {'x': 10, 'y': 20}) => 20 59 | # lookup('z', ({'y': 30, 'z':40}, {'x': 10, 'y': 20}) => 40 60 | # context now contains special variables: {'.': normal_context, '@': special_vars} 61 | def lookup(var_name, contexts=(), start=0): 62 | """lookup the value of the var_name on the stack of contexts 63 | 64 | :var_name: TODO 65 | :contexts: TODO 66 | :returns: None if not found 67 | 68 | """ 69 | start = len(contexts) if start >= 0 else start 70 | for context in reversed(contexts[:start]): 71 | try: 72 | if var_name in context: 73 | return context[var_name] 74 | except TypeError as te: 75 | # we may put variable on the context, skip it 76 | continue 77 | return None 78 | 79 | 80 | def get_parent(contexts): 81 | try: 82 | return contexts[-1] 83 | except: 84 | return None 85 | 86 | 87 | def parse_int(string): 88 | try: 89 | return int(string) 90 | except: 91 | return None 92 | 93 | 94 | # ============================================================================== 95 | # Compilation 96 | # To compile a template into a tree of tokens, using the given delimiters. 97 | re_delimiters = {} 98 | 99 | 100 | def delimiters_to_re(delimiters): 101 | """convert delimiters to corresponding regular expressions""" 102 | 103 | # caching 104 | delimiters = tuple(delimiters) 105 | if delimiters in re_delimiters: 106 | re_tag = re_delimiters[delimiters] 107 | else: 108 | open_tag, close_tag = delimiters 109 | 110 | # escape 111 | open_tag = ''.join([c if c.isalnum() else '\\' + c for c in open_tag]) 112 | close_tag = ''.join([c if c.isalnum() else '\\' + c for c in close_tag]) 113 | 114 | re_tag = re.compile(open_tag + r'([#^>&{/!=]?)\s*(.*?)\s*([}=]?)' + close_tag, re.DOTALL) 115 | re_delimiters[delimiters] = re_tag 116 | 117 | return re_tag 118 | 119 | 120 | class SyntaxError(Exception): 121 | pass 122 | 123 | 124 | def is_standalone(text, start, end): 125 | """check if the string text[start:end] is standalone by checking forwards 126 | and backwards for blankspaces 127 | :text: TODO 128 | :(start, end): TODO 129 | :returns: the start of next index after text[start:end] 130 | 131 | """ 132 | left = False 133 | start -= 1 134 | while start >= 0 and text[start] in spaces_not_newline: 135 | start -= 1 136 | 137 | if start < 0 or text[start] == '\n': 138 | left = True 139 | 140 | right = re_space.match(text, end) 141 | return (start + 1, right.end()) if left and right else None 142 | 143 | 144 | # compiles the mustache template into an AST Like Structure 145 | def compiled(template, delimiters=DEFAULT_DELIMITERS): 146 | """Compile a template into token tree 147 | 148 | :template: TODO 149 | :delimiters: TODO 150 | :returns: the root token 151 | 152 | """ 153 | re_tag = delimiters_to_re(delimiters) 154 | 155 | # variable to save states 156 | tokens = [] 157 | index = 0 158 | sections = [] 159 | tokens_stack = [] 160 | 161 | # root token 162 | root = Root('root') 163 | root.filters = copy.copy(filters) 164 | 165 | m = re_tag.search(template, index) 166 | while m is not None: 167 | token = None 168 | last_literal = None 169 | strip_space = False 170 | 171 | if m.start() > index: 172 | last_literal = Literal('str', template[index:m.start()], root=root) 173 | tokens.append(last_literal) 174 | 175 | # parse token 176 | prefix, name, suffix = m.groups() 177 | 178 | if prefix == '=' and suffix == '=': 179 | # {{=| |=}} to change delimiters 180 | delimiters = re.split(r'\s+', name) 181 | if len(delimiters) != 2: 182 | raise SyntaxError('Invalid new delimiter definition: ' + m.group()) 183 | re_tag = delimiters_to_re(delimiters) 184 | strip_space = True 185 | 186 | elif prefix == '{' and suffix == '}': 187 | # {{{ variable }}} 188 | token = Variable(name, name, root=root) 189 | 190 | elif prefix == '' and suffix == '': 191 | # {{ name }} 192 | token = Variable(name, name, root=root) 193 | token.escape = True 194 | 195 | elif suffix != '' and suffix != None: 196 | raise SyntaxError('Invalid token: ' + m.group()) 197 | 198 | elif prefix == '&': 199 | # {{& escaped variable }} 200 | token = Variable(name, name, root=root) 201 | 202 | elif prefix == '!': 203 | # {{! comment }} 204 | token = Comment(name, root=root) 205 | if len(sections) <= 0: 206 | # considered as standalone only outside sections 207 | strip_space = True 208 | 209 | elif prefix == '>': 210 | # {{> partial}} 211 | token = Partial(name, name, root=root) 212 | strip_space = True 213 | 214 | pos = is_standalone(template, m.start(), m.end()) 215 | if pos: 216 | token.indent = len(template[pos[0]:m.start()]) 217 | 218 | elif prefix == '#' or prefix == '^': 219 | # {{# section }} or # {{^ inverted }} 220 | 221 | # strip filter 222 | sec_name = name.split(':')[0].strip() 223 | token = Section(sec_name, name, root=root) if prefix == '#' else Inverted(name, name, root=root) 224 | token.delimiter = delimiters 225 | tokens.append(token) 226 | 227 | # save the tokens onto stack 228 | token = None 229 | tokens_stack.append(tokens) 230 | tokens = [] 231 | 232 | sections.append((sec_name, prefix, m.end())) 233 | strip_space = True 234 | # closing of the section {/section} 235 | elif prefix == '/': 236 | tag_name, sec_type, text_end = sections.pop() 237 | if tag_name != name: 238 | raise SyntaxError("unclosed tag: '" + tag_name + "' Got:" + m.group()) 239 | 240 | children = tokens 241 | tokens = tokens_stack.pop() 242 | 243 | tokens[-1].text = template[text_end:m.start()] 244 | tokens[-1].children = children 245 | strip_space = True 246 | 247 | else: 248 | raise SyntaxError('Unknown tag: ' + m.group()) 249 | 250 | if token is not None: 251 | tokens.append(token) 252 | 253 | index = m.end() 254 | if strip_space: 255 | pos = is_standalone(template, m.start(), m.end()) 256 | if pos: 257 | index = pos[1] 258 | if last_literal: last_literal.value = last_literal.value.rstrip(spaces_not_newline) 259 | 260 | m = re_tag.search(template, index) 261 | 262 | tokens.append(Literal('str', template[index:])) 263 | root.children = tokens 264 | return root 265 | 266 | 267 | def render(template, context, partials={}, delimiters=None): 268 | contexts = [context] 269 | 270 | if not isinstance(partials, dict): 271 | raise TypeError('partials should be dict, but got ' + type(partials)) 272 | 273 | return inner_render(template, contexts, partials, delimiters) 274 | 275 | 276 | def inner_render(template, contexts, partials={}, delimiters=None): 277 | delimiters = DEFAULT_DELIMITERS if delimiters is None else delimiters 278 | parent_token = compiled(template, delimiters) 279 | return parent_token._render(contexts, partials) 280 | 281 | 282 | # ============================================================================== 283 | # Token 284 | # We'll parse the template into a tree of tokens, so a Token is actually a 285 | # node of the tree. 286 | # We'll save the all the information about the node here. 287 | 288 | class Token(): 289 | """The node of a parse tree""" 290 | 291 | def __init__(self, name, value=None, text='', children=None, root=None): 292 | self.name = name 293 | self.value = value 294 | self.text = text 295 | self.children = children 296 | self.escape = False 297 | self.delimiter = None # used for section 298 | self.indent = 0 # used for partial 299 | self.root = root 300 | self.filters = {} 301 | self.Path = None 302 | self.type_string = None 303 | 304 | def _escape(self, text): 305 | """Escape text according to self.escape""" 306 | ret = EMPTYSTRING if text is None else str(text) 307 | if self.escape: 308 | return html_escape(ret) 309 | else: 310 | return ret 311 | 312 | def _lookup(self, dot_name, contexts): 313 | """lookup value for names like 'a.b.c' and handle filters as well""" 314 | # process filters 315 | 316 | filters = [x for x in map(lambda x: x.strip(), dot_name.split(':'))] 317 | dot_name = filters[-1] 318 | filters = filters[0:-1] 319 | filters.reverse() 320 | 321 | # should support paths like '../../a.b.c/../d', etc. 322 | if not dot_name.startswith('.'): 323 | dot_name = './' + dot_name 324 | 325 | paths = dot_name.split('/') 326 | last_path = paths[-1] 327 | 328 | if (self.type_string == 'V'): 329 | if (len(contexts) >= 2 and last_path in contexts[-2]): 330 | last_path = '.' 331 | paths = '.' 332 | 333 | # path like '../..' or ./../. etc. 334 | refer_context = last_path == '' or last_path == '.' or last_path == '..' 335 | paths = paths if refer_context else paths[:-1] 336 | 337 | # count path level 338 | level = 0 339 | for path in paths: 340 | if path == '..': 341 | level -= 1 342 | elif path != '.': 343 | # ../a.b.c/.. in the middle 344 | level += len(path.strip('.').split('.')) 345 | 346 | names = last_path.split('.') 347 | # fetch the correct context 348 | if refer_context or names[0] == '': 349 | try: 350 | value = contexts[level - 1] 351 | except: 352 | value = None 353 | else: 354 | # support {{a.b.c.d.e}} like lookup 355 | value = lookup(names[0], contexts, level) 356 | 357 | # lookup for variables 358 | if not refer_context: 359 | for name in names[1:]: 360 | try: 361 | # a.num (a.1, a.2) to access list 362 | index = parse_int(name) 363 | name = parse_int(name) if isinstance(value, (list, tuple)) else name 364 | value = value[name] 365 | except: 366 | # not found 367 | value = None 368 | break; 369 | 370 | # apply filters 371 | for f in filters: 372 | try: 373 | func = self.root.filters[f] 374 | args = inspect.getfullargspec(func)[0] 375 | argDict = {} 376 | for argument in args: 377 | if('txt' == argument or 'text' == argument): 378 | argDict['txt'] = value 379 | if ('args' == argument): 380 | argDict['args']= "ags" 381 | if ('context' == argument): 382 | argDict['context']= contexts[-1] 383 | if ('tag' == argument): 384 | argDict['tag']= dot_name.split("/")[-1] 385 | if ('fullname' == argument): 386 | argDict['fullname']= "Fullname" 387 | value = func(*argDict.values()) 388 | except Exception as e: 389 | continue 390 | 391 | return value 392 | 393 | def _render_children(self, contexts, partials): 394 | """Render the children tokens""" 395 | ret = [] 396 | for child in self.children: 397 | ret.append(child._render(contexts, partials)) 398 | return EMPTYSTRING.join(ret) 399 | 400 | def _get_str(self, indent): 401 | ret = [] 402 | ret.append(' ' * indent + '[(') 403 | ret.append(self.type_string) 404 | ret.append(',') 405 | ret.append(self.name) 406 | if self.value: 407 | ret.append(',') 408 | ret.append(repr(self.value)) 409 | ret.append(')') 410 | if self.children: 411 | for c in self.children: 412 | ret.append('\n') 413 | ret.append(c._get_str(indent + 4)) 414 | ret.append(']') 415 | return ''.join(ret) 416 | 417 | def __str__(self): 418 | return self._get_str(0) 419 | 420 | def render(self, contexts, partials={}): 421 | # interface for compiled object, corresponds to render() 422 | contexts = [contexts] 423 | return self._render(contexts, partials) 424 | 425 | 426 | class Root(Token): 427 | def __init__(self, *arg, **kw): 428 | Token.__init__(self, *arg, **kw) 429 | self.type_string = 'R' 430 | 431 | def _render(self, contexts, partials): 432 | return self._render_children(contexts, partials) 433 | 434 | 435 | class Literal(Token): 436 | def __init__(self, *arg, **kw): 437 | Token.__init__(self, *arg, **kw) 438 | self.type_string = 'L' 439 | 440 | def _render(self, contexts, partials): 441 | """render simple literals""" 442 | return self._escape(self.value) 443 | 444 | 445 | class Variable(Token): 446 | def __init__(self, *arg, **kw): 447 | Token.__init__(self, *arg, **kw) 448 | self.type_string = 'V' 449 | 450 | def _render(self, contexts, partials): 451 | """render variable""" 452 | value = self._lookup(self.value, contexts) 453 | 454 | # lambda 455 | if callable(value): 456 | value = inner_render(str(value()), contexts, partials) 457 | 458 | return self._escape(value) 459 | 460 | 461 | class Section(Token): 462 | def __init__(self, *arg, **kw): 463 | Token.__init__(self, *arg, **kw) 464 | self.type_string = 'S' 465 | 466 | def _render(self, contexts, partials): 467 | """render section""" 468 | val = self._lookup(self.value, contexts) 469 | if not val: 470 | # false value 471 | return EMPTYSTRING 472 | 473 | # normally json has types: number/string/list/map 474 | # but python has more, so we decide that map and string should not iterate 475 | # by default, other do. 476 | if hasattr(val, "__iter__") and not isinstance(val, (str, dict)): 477 | # non-empty lists 478 | ret = [] 479 | for item in val: 480 | contexts.append(item) 481 | ret.append(self._render_children(contexts, partials)) 482 | contexts.pop() 483 | 484 | if len(ret) <= 0: 485 | # empty lists 486 | return EMPTYSTRING 487 | 488 | return self._escape(''.join(ret)) 489 | elif callable(val): 490 | # lambdas 491 | new_template = val(self.text) 492 | value = inner_render(new_template, contexts, partials, self.delimiter) 493 | else: 494 | # context 495 | contexts.append(val) 496 | value = self._render_children(contexts, partials) 497 | contexts.pop() 498 | 499 | return self._escape(value) 500 | 501 | 502 | class Inverted(Token): 503 | def __init__(self, *arg, **kw): 504 | Token.__init__(self, *arg, **kw) 505 | self.type_string = 'I' 506 | 507 | def _render(self, contexts, partials): 508 | """render inverted section""" 509 | val = self._lookup(self.value, contexts) 510 | if val: 511 | return EMPTYSTRING 512 | return self._render_children(contexts, partials) 513 | 514 | 515 | class Comment(Token): 516 | def __init__(self, *arg, **kw): 517 | Token.__init__(self, *arg, **kw) 518 | self.type_string = 'C' 519 | 520 | def _render(self, contexts, partials): 521 | """render comments, just skip it""" 522 | return EMPTYSTRING 523 | 524 | 525 | class Partial(Token): 526 | def __init__(self, *arg, **kw): 527 | Token.__init__(self, *arg, **kw) 528 | self.type_string = 'P' 529 | 530 | def _render(self, contexts, partials): 531 | """render partials""" 532 | try: 533 | partial = partials[self.value] 534 | except KeyError as e: 535 | return self._escape(EMPTYSTRING) 536 | 537 | partial = re_insert_indent.sub(r'\1' + ' ' * self.indent, partial) 538 | 539 | return inner_render(partial, contexts, partials, self.delimiter) 540 | 541 | 542 | # ============================================================================== 543 | # Default Filters 544 | filters['items'] = lambda dict: dict.items() 545 | filters['enum'] = lambda list: enumerate(list) 546 | filters['lower'] = lambda txt: txt.lower() 547 | filters['upper'] = lambda txt: txt.upper() 548 | -------------------------------------------------------------------------------- /qrun.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import mustache 4 | import Formatters 5 | import Utils.Fonts as fonts 6 | 7 | #qtext = """What is the key risk factor for Cervical Carcinoma?

{{c1::High-risk HPV (16, 18, 31, 33)}}

HPV 16 and 18 account for more than 70% of all Cervical Carcinoma

 8 | # 9 | #Other:
- smoking
- starting sexual intercourse at a young age
- immunodeficiency (eg. HIV infection)
""" 10 | #q = qtext.split(r"") 11 | #fonts.install_font("C:/Users/polit/AppData/Local/Temp/smmedia/_YUMIN.TTF") 12 | 13 | #import glob 14 | #print(glob.glob("C:\\Users\\polit\\AppData\\Local\\Temp\\smmedia\\*.ttf")) 15 | 16 | # 17 | # mustache.filters["cloze"] = lambda txt: Formatters.cloze_q_filter(txt, str(int(0) + 1)) 18 | # 19 | # mytemplate = "{{#Text}}{{cloze:Text}}{{/Text}}" 20 | # 21 | # print(mustache.render(mytemplate,{"Text": q[0]})) 22 | # from MediaConverter import MediaConverter 23 | # 24 | # mc = MediaConverter() 25 | # mc.convertImage("C:\\Users\\polit\\Desktop\\anki2sm\\out\\out_files\\elements\\Freesample.svg") 26 | 27 | 28 | # def lastStoneWeightII( stones: List[int]) -> int: 29 | # total = sum(stones) 30 | # 31 | # Max_weight = int(total / 2) 32 | # print("Max Weight",Max_weight) 33 | # current = (Max_weight + 1) * [0] 34 | # 35 | # for stone in stones: 36 | # for wgt in range(Max_weight, -1, -1): 37 | # if wgt - stone >= 0: 38 | # current[wgt] = max(stone + current[wgt - stone], current[wgt]) 39 | # print(stone, wgt, current) 40 | # #print("Matrix value:\n",current) 41 | # return total - 2 * current[-1] 42 | # 43 | # lastStoneWeightII([2,7,4,1,8,1]) 44 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyquery==1.4.1 2 | Click==7.0 3 | yattag==1.13.2 4 | beautifulsoup4==4.8.2 5 | pathlib~=1.0.1 6 | premailer~=3.7.0 7 | cssutils~=1.0.2 8 | Pillow~=8.0.0 9 | svglib~=1.0.1 10 | reportlab~=3.5.34 11 | config~=0.5.0.post0 12 | progress~=1.5 13 | future~=0.18.2 14 | typing~=3.7.4.1 15 | pdf2image~=1.14.0 16 | -------------------------------------------------------------------------------- /run.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | python anki2smV2.py 3 | pause --------------------------------------------------------------------------------