├── .gitignore ├── screenshot.png ├── mg ├── __main__.py ├── webisu │ ├── __init__.py │ ├── README.md │ ├── pmath.py │ ├── ebisu.py │ └── webisu.py ├── main │ ├── checkup.py │ ├── learn.py │ ├── drill.py │ ├── info.py │ ├── status.py │ └── __init__.py ├── color.py ├── media.py ├── ansi.py ├── data.py ├── topk.py ├── node.py ├── mgio.py ├── plot.py ├── graph.py └── options.py ├── tutorial ├── de.digits.mg │ └── graph.py └── README.md ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | cards 3 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matomatical/memograph/HEAD/screenshot.png -------------------------------------------------------------------------------- /mg/__main__.py: -------------------------------------------------------------------------------- 1 | from mg.main import main 2 | 3 | if __name__ == "__main__": 4 | # setup readline (disable history) 5 | import readline 6 | readline.set_auto_history(False) 7 | 8 | # launch app 9 | main() 10 | -------------------------------------------------------------------------------- /mg/webisu/__init__.py: -------------------------------------------------------------------------------- 1 | from mg.webisu.webisu import p_recall_t_lnpdf 2 | from mg.webisu.webisu import p_recall_t_pdf 3 | from mg.webisu.webisu import p_recall_t_lnmean 4 | from mg.webisu.webisu import p_recall_t_mean 5 | from mg.webisu.webisu import update_model_bernoulli 6 | from mg.webisu.webisu import init_model 7 | -------------------------------------------------------------------------------- /tutorial/de.digits.mg/graph.py: -------------------------------------------------------------------------------- 1 | from mg.graph import Node 2 | 3 | D = ['null','eins','zwei','drei','vier','fünf','sechs','sieben','acht','neun'] 4 | def graph(): 5 | for i, n in enumerate(D): 6 | yield ( 7 | Node(i, speak_str=i, speak_voice="en"), 8 | Node(n, speak_str=n, speak_voice="de"), 9 | ) 10 | -------------------------------------------------------------------------------- /mg/main/checkup.py: -------------------------------------------------------------------------------- 1 | from mg.mgio import print, input 2 | 3 | def run_checkup(graph, db, log, options): 4 | loaded_keys = set(graph.keys) 5 | 6 | # check database 7 | stored_keys = set(db.keys()) 8 | print("orphaned keys in database:") 9 | for key in stored_keys - loaded_keys: 10 | print('*', key) 11 | 12 | # check log 13 | logged_keys = set(l['id'] for l in log.old_lines) 14 | print("orphaned keys in log file:") 15 | for key in logged_keys - loaded_keys: 16 | print('*', key) 17 | 18 | -------------------------------------------------------------------------------- /mg/color.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | class RGB(collections.namedtuple("RGB", "r g b")): 4 | """ 5 | Wrap a red, green, blue triple. 6 | """ 7 | 8 | HEXDIGITS = set("0123456789abcdefABCDEF") 9 | 10 | def is_hex(s): 11 | return len(s) == 7 and s[0] == "#" and all(d in HEXDIGITS for d in s[1:]) 12 | 13 | def to_hex(rgb): 14 | return f"#{rgb.r:02x}{rgb.g:02x}{rgb.b:02x}" 15 | 16 | def to_rgb(hex): 17 | r = int(hex[1:3], base=16) 18 | g = int(hex[3:5], base=16) 19 | b = int(hex[5:7], base=16) 20 | return RGB(r, g, b) 21 | 22 | def colormap_red_green(p): 23 | return RGB(int(255*(1-p)), int(255*p), 0) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Matthew Farrugia-Roberts 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mg/main/learn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from mg.mgio import print, input 4 | 5 | 6 | def run_learn(graph, options): 7 | # decide which links to introduce 8 | print("introduce some new links...") 9 | hand = graph.query( 10 | number=options.num_cards, 11 | topics=options.topics, 12 | new=True 13 | ) 14 | n = len(hand) 15 | if n == 0: 16 | print("no new links! try drilling some old ones.") 17 | return 18 | random.shuffle(hand) 19 | 20 | # introduce the links 21 | for i, link in enumerate(hand, 1): 22 | print(f"** learn {i}/{n} **") 23 | face, back = link.u, link.v 24 | if link.t: print("topics:", link.t) 25 | print("prompt:", face.label()) 26 | face.media() 27 | input("return:") 28 | print("answer:", back.label()) 29 | back.media() 30 | instructions = "easy (g+↵) | medium (↵) | hard (h+↵)" 31 | rating = input("rating:", r=instructions) 32 | if rating == "g": 33 | link.m.init([1, 1, 2*24*60*60]) 34 | elif rating == "h": 35 | link.m.init([1, 1, 1*60]) 36 | else: 37 | link.m.init([1, 1, 1*60*60]) 38 | -------------------------------------------------------------------------------- /mg/webisu/README.md: -------------------------------------------------------------------------------- 1 | # *webisu* 2 | 3 | This module is a replication of fasiha's ebisu algorithm for a Bayesian 4 | spaced repetition system, implemented in pure Python. 5 | 6 | I built this app as a personal project and to help remove dependencies 7 | from my flashcard app. I imposed some of my own design choices on the 8 | implementation, and variations compared to the original ebisu range in 9 | scope from cosmetic to major. 10 | 11 | The key differences are as follows: 12 | 13 | * The API is changed, according to my own subjective preferences. 14 | However, the module `webisu.ebisu` provides a rough translation, 15 | where functionality is equivalent. 16 | * There is an additional function returning the probability density 17 | for a given recall probability, which may be useful for visualising 18 | the deck's status. 19 | * Binomial updates are not implemented for n > 1 (only Bernoulli trial 20 | updates are supported), as my flashcard app uses Bernoulli updates 21 | and Binomial updates required a more sophisticated safe logsumexp 22 | function than I have so far implemented in pure Python. 23 | * Right now, no cache is used for beta function calls (I might later 24 | follow fasiha on this one). 25 | * The update method currently does not implement any rebalancing, but 26 | this is a work-in-progress. 27 | * I plan for the update method to eventually use a different approach 28 | to rebalancing, but this is still a work-in-progress. 29 | -------------------------------------------------------------------------------- /mg/webisu/pmath.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some pure Python math functions for working with the beta and gamma 3 | functions and the beta distribution in logarithmic space. 4 | """ 5 | 6 | # # # 7 | # Working in log space 8 | # 9 | 10 | from math import log as ln, log1p as ln1p, exp 11 | 12 | 13 | def lnaddexp(x, y): 14 | """ 15 | Safely compute ln(exp(x) + exp(y)), to add numbers within their 16 | log-space representations. 17 | """ 18 | if x == y: 19 | return x + ln(2) 20 | elif x < y: 21 | return y + ln1p(exp(x - y)) 22 | else: 23 | return x + ln1p(exp(y - x)) 24 | 25 | 26 | def lnsubexp(x, y): 27 | """ 28 | Safely compute ln(exp(x) - exp(y)), to subtract numbers within 29 | their log-space representations. 30 | 31 | Assumes x > y so that exp(x) - exp(y) > 0 and ln(exp(x) - exp(y)) 32 | is defined; otherwise MathError. 33 | """ 34 | return x + ln1p(-exp(y-x)) 35 | 36 | 37 | 38 | # # # 39 | # Gamma and Beta functions 40 | # 41 | 42 | from math import lgamma as ln_gammafn 43 | 44 | 45 | def ln_betafn(α, β): 46 | """ 47 | Compute the logarithm of the beta function for parameters α and β. 48 | """ 49 | return ln_gammafn(α) + ln_gammafn(β) - ln_gammafn(α+β) 50 | 51 | 52 | 53 | # # # 54 | # Beta distribution 55 | # 56 | 57 | def beta_match_moments(μ, Σ): 58 | """ 59 | Match a Beta distribution given mean μ and variance Σ. 60 | """ 61 | factor = μ * (1-μ) / Σ - 1 62 | α = factor * μ 63 | β = factor * (1-μ) 64 | return α, β 65 | 66 | -------------------------------------------------------------------------------- /mg/main/drill.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from mg.mgio import print, input 4 | 5 | def run_drill(graph, options, review=False): 6 | # decide which cards to drill 7 | print("drill some old cards...") 8 | hand = graph.query( 9 | number=options.num_cards, 10 | topics=options.topics, 11 | review=review, 12 | ) 13 | n = len(hand) 14 | if n == 0: 15 | print("no old cards! try learning some new ones.") 16 | return 17 | random.shuffle(hand) 18 | 19 | # drill the cards 20 | for i, link in enumerate(hand, 1): 21 | print(f"** drill {i}/{n} **") 22 | if options.reverse: 23 | face, back = link.v, link.u 24 | else: 25 | face, back = link.u, link.v 26 | if link.t: print("topics:", link.t) 27 | print("prompt:", face.label()) 28 | face.media() 29 | guess = input("recall:") 30 | if back.match(guess): 31 | print(f"answer: {back.label()}") 32 | back.media() 33 | link.m.update(True) 34 | else: 35 | print(f"answer: {back.label()}") 36 | back.media() 37 | instructions = "forgot (↵) | got it (g+↵) | skip (s+↵)" 38 | commit = input("commit:", r=instructions) 39 | if commit == "g": 40 | print("got it!") 41 | link.m.update(True) 42 | elif commit == "s": 43 | link.m.review() 44 | else: 45 | link.m.update(False) 46 | 47 | -------------------------------------------------------------------------------- /mg/media.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import queue 3 | import threading 4 | from subprocess import run, DEVNULL 5 | 6 | class MediaDaemon: 7 | def __init__(self): 8 | self.queue = queue.SimpleQueue() 9 | self.thread = threading.Thread(target=self.loop, daemon=True) 10 | self.thread.start() 11 | def loop(self): 12 | try: 13 | while True: 14 | args = self.queue.get() 15 | result = run(args, capture_output=True, text=True) 16 | if result.returncode != 0: 17 | raise Exception( 18 | f"Exit with non-zero return code " 19 | f"{result.returncode} and stderr: " 20 | f"{result.stderr.strip()}" 21 | ) 22 | except Exception as e: 23 | print( 24 | "\nERROR Media command failed:\n", 25 | e, 26 | "\n(did you install media engines to your path?)." 27 | "\nDisabling media for this session, but you can continue.", 28 | file=sys.stderr, 29 | ) 30 | def schedule(self, *args): 31 | self.queue.put(args) 32 | 33 | md = MediaDaemon() 34 | 35 | def speak(text, voice="english"): 36 | """ 37 | Speak some text with a given voice. Uses `espeak` library. 38 | The default voice is "english", but you can use any VoiceName 39 | (see `espeak --voices`). 40 | 41 | TODO: Allow other kwargs, passed to espeak command. 42 | TODO: Allow configuration of the TTS engine (e.g. allow 'say' on macos). 43 | """ 44 | md.schedule("espeak", text, "-v", voice) 45 | 46 | -------------------------------------------------------------------------------- /mg/ansi.py: -------------------------------------------------------------------------------- 1 | import re 2 | import collections 3 | 4 | from mg.color import is_hex, to_rgb 5 | 6 | ANSI_CODE = re.compile("\033\\[[^m]+m") 7 | 8 | def ansi_len(s): 9 | return len(ANSI_CODE.sub("", s)) 10 | 11 | 12 | def to_ansi_code(a): 13 | """ 14 | Convert a keyword to an ansi escape code, e.g. 'red' to '\\033[31m' 15 | or 'reset' to '\\033[0m' or '#ffffff' to '\\033[38;5;231m'. 16 | """ 17 | if a in BASIC_CODES: 18 | return esc(BASIC_CODES[a]) 19 | if is_hex(a): 20 | return esc(f"38;5;{to_216(to_rgb(a))}") 21 | # TODO: Add bg colors 22 | raise UnknownANSIKeywordException(a) 23 | 24 | BASIC_CODES = { 25 | 'reset': '0', 26 | 'bold': '1', 27 | 'faint': '2', 28 | 'italic': '3', 29 | 'black': '30', 30 | 'red': '31', 31 | 'green': '32', 32 | 'yellow': '33', 33 | 'blue': '34', 34 | 'magenta': '35', 35 | 'cyan': '36', 36 | 'white': '37', 37 | 'b-black': '90', 38 | 'b-red': '91', 39 | 'b-green': '92', 40 | 'b-yellow': '93', 41 | 'b-blue': '94', 42 | 'b-magenta': '95', 43 | 'b-cyan': '96', 44 | 'b-white': '97', 45 | # TODO: Add bg colors 46 | } 47 | 48 | def esc(code): 49 | return f"\033[{code}m" 50 | 51 | class UnknownANSIKeywordException(Exception): 52 | """Unknown tag or similar formatted string error""" 53 | 54 | def to_216(rgb): 55 | # https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit 56 | r = rgb.r // 43 57 | g = rgb.g // 43 58 | b = rgb.b // 43 59 | return 16 + 36*r + 6*g + b 60 | -------------------------------------------------------------------------------- /mg/main/info.py: -------------------------------------------------------------------------------- 1 | from mg.mgio import print, input 2 | from mg.plot import print_bars 3 | from mg.color import colormap_red_green as color, to_hex 4 | 5 | def run_info(graph, options): 6 | keys = list(filter_topics(options.topics, graph)) 7 | # to many cards 8 | if len(keys) > 1: 9 | print(f"multiple ({len(keys)}) matches:") 10 | for i, key in enumerate(keys, 1): 11 | print(f"{i:4d}. {key}") 12 | 13 | # not enough cards 14 | elif len(keys) < 1: 15 | print("no matches! try again") 16 | 17 | # just right! 18 | else: 19 | key = keys[0] 20 | link = list(graph.links[key])[0] 21 | print(f"match! {key}") 22 | print("topics:", link.t) 23 | print("node 1:", link.u.label()) 24 | print("node 2:", link.v.label()) 25 | if link.m.is_new(): 26 | print("status: not yet learned") 27 | else: 28 | print("status: last reviewed", link.m.elapsed(), "seconds ago") 29 | print("params:", link.m) 30 | print("recall:") 31 | # pdf histogram # TODO: use CDF instead duh 32 | support = [(p+0.5)/20 for p in range(20)] 33 | pdf = [link.m.density(p)/20 for p in support] 34 | print_bars( 35 | values=pdf, 36 | labels=support, 37 | labelformat=".1%", 38 | valueformat=".3f", 39 | colors=[to_hex(color(p)) for p in support], 40 | ) 41 | 42 | 43 | def filter_topics(topics, graph): 44 | for key in graph.keys: 45 | if all(t in key for t in topics): 46 | yield key 47 | -------------------------------------------------------------------------------- /mg/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import runpy 4 | 5 | 6 | def load_graph(source_path): 7 | if os.path.lexists(source_path): 8 | return runpy.run_path(source_path)['graph']() 9 | else: 10 | raise Exception(f"No such graph file {source_path!r}") 11 | 12 | 13 | class Database(dict): 14 | def __init__(self, path): 15 | super().__init__() 16 | self.path = path 17 | if os.path.lexists(self.path): 18 | with open(self.path, 'r') as f: 19 | self.update(json.load(f)) 20 | def save(self): 21 | _ensure(self.path) 22 | with open(self.path, 'w') as f: 23 | json.dump(self, f, indent=2) 24 | def __missing__(self, key): 25 | empty = {} 26 | self[key] = empty 27 | return empty 28 | 29 | 30 | class Log: 31 | def __init__(self, path, load=False): 32 | self.path = path 33 | self.load = load 34 | self.new_lines = [] 35 | if self.load: 36 | with open(self.path, 'r') as file: 37 | self.old_lines = [json.loads(line) for line in file] 38 | else: 39 | self.old_lines = [] 40 | def lines(self): 41 | return self.old_lines + self.new_lines 42 | def log(self, id, time, event, data): 43 | self.new_lines.append({ 44 | 'id': id, 45 | 'time': time, 46 | 'event': event, 47 | 'data': data, 48 | }) 49 | def save(self): 50 | _ensure(self.path) 51 | with open(self.path, 'a') as file: 52 | for line in self.new_lines: 53 | print(json.dumps(line), file=file) 54 | 55 | 56 | def _ensure(path): 57 | os.makedirs(os.path.dirname(path), exist_ok=True) 58 | 59 | -------------------------------------------------------------------------------- /mg/main/status.py: -------------------------------------------------------------------------------- 1 | from mg.mgio import print, input 2 | from mg.plot import print_hist 3 | from mg.color import colormap_red_green as color, to_hex 4 | 5 | def run_status(graph, options): 6 | if options.histogram: 7 | plot_histogram(graph, options.topics) 8 | if options.posterior: 9 | plot_posterior(graph, options.topics) 10 | if options.scatter: 11 | plot_scatter(graph, options.topics) 12 | if options.list: 13 | plot_list(graph, options.topics) 14 | 15 | 16 | def plot_histogram(graph, topics): 17 | n_seen = graph.count(topics=topics, new=False) 18 | n_new = graph.count(topics=topics, new=True) 19 | n_total = n_seen + n_new 20 | if n_total == 0: 21 | print("no cards! try adding some or changing the topic.") 22 | return 23 | if n_seen > 0: 24 | print("probability of recall histogram:") 25 | probs = [l.m.predict(exact=True) for l in graph.query(topics=topics)] 26 | print_hist(probs, lo=0, hi=1, bins=20, height=56, labelformat="4.0%") 27 | print( 28 | f"{n_seen} cards seen ({n_seen/n_total:.0%}),", 29 | f"{n_new} cards unseen ({n_new/n_total:.0%})" 30 | ) 31 | 32 | 33 | # TODO 34 | def plot_posterior(graph, topics): 35 | print("posterior plot: not yet implemented.") 36 | 37 | 38 | # TODO 39 | def plot_scatter(graph, topics): 40 | print("scatter plot: not yet implemented.") 41 | 42 | 43 | def plot_list(graph, topics): 44 | print("cards (probability of recall):") 45 | i = 1 46 | for link in graph.query(topics=topics, new=False): 47 | p = link.m.predict(exact=True) 48 | c = to_hex(color(p)) 49 | print(f"{i:>4d}.", link, r=f"(<{c}>{p:>6.1%})") 50 | i += 1 51 | for link in graph.query(topics=topics, new=True): 52 | print(f"{i:>4d}.", link, r="(unseen)") 53 | i += 1 54 | 55 | -------------------------------------------------------------------------------- /mg/main/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from mg.mgio import print 4 | from mg.options import get_options 5 | from mg.graph import KnowledgeGraph 6 | from mg.data import Database, Log, load_graph 7 | 8 | from mg.main.status import run_status 9 | from mg.main.drill import run_drill 10 | from mg.main.learn import run_learn 11 | from mg.main.checkup import run_checkup 12 | from mg.main.info import run_info 13 | 14 | def main(): 15 | # parse command-line input 16 | options = get_options() 17 | print("** welcome **") 18 | 19 | # load graph and memory model data 20 | try: 21 | db = Database(options.db_path) 22 | log = Log(options.log_path, load=options.load_log) 23 | graph = KnowledgeGraph(load_graph(options.graph_path), db, log) 24 | except Exception as e: 25 | print(f"data error ({e.__class__.__name__}):", e) 26 | sys.exit(1) 27 | 28 | # run program 29 | saving = False 30 | try: 31 | if options.subcommand == "status": 32 | saving = False 33 | run_status(graph, options) 34 | elif options.subcommand == "drill": 35 | saving = True 36 | run_drill(graph, options) 37 | elif options.subcommand == "review": 38 | saving = True 39 | run_drill(graph, options, review=True) 40 | elif options.subcommand == "learn": 41 | saving = True 42 | run_learn(graph, options) 43 | elif options.subcommand == "info": 44 | saving = False 45 | run_info(graph, options) 46 | elif options.subcommand == "checkup": 47 | saving = False 48 | run_checkup(graph, db, log, options) 49 | else: 50 | saving = False 51 | print(subcommand, "not implemented") 52 | except KeyboardInterrupt: 53 | print("\nbye!") 54 | except EOFError: 55 | print("\nbye! (not saving)") 56 | saving = False 57 | if saving: 58 | print("saving...", flush=True, end=" ") 59 | db.save() 60 | log.save() 61 | print("done!") 62 | 63 | -------------------------------------------------------------------------------- /mg/topk.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | import functools 3 | import itertools 4 | 5 | def topk(items, k, key=id, reverse=False): 6 | items = iter(items) 7 | heap = Heap(itertools.islice(items, k), key=key, minheap=not reverse) 8 | for item in items: 9 | heap.replace_if_gt(item) 10 | return list(heap) 11 | 12 | class Heap: 13 | def __init__(self, items=None, key=id, minheap=True): 14 | self.minheap = minheap 15 | self.key = key 16 | if items is None: 17 | self.heap = [] 18 | else: 19 | self.heap = [self._itemise(i) for i in items] 20 | heapq.heapify(self.heap) 21 | def _itemise(self, item): 22 | if self.minheap: 23 | return MinHeapItem(item, self.key(item)) 24 | else: 25 | return MaxHeapItem(item, self.key(item)) 26 | def push(self, item): 27 | heapq.heappush(self.heap, self._itemise(item)) 28 | def pop(self): 29 | return heapq.heappop(self.heap).item 30 | def replace(self, item): 31 | heapq.heapreplace(self.heap, self._itemise(item)) 32 | def replace_if_gt(self, item): 33 | i = self._itemise(item) 34 | if i > self.heap[0]: 35 | heapq.heapreplace(self.heap, i) 36 | def peek(self): 37 | return self.heap[0].item 38 | def __iter__(self): 39 | return (i.item for i in self.heap) 40 | def __len__(self): 41 | return len(self.heap) 42 | def __str__(self): 43 | return "Heap({}, key={}, minheap={})".format( 44 | [i.item for i in self.heap], 45 | self.key, 46 | self.minheap, 47 | ) 48 | 49 | class HeapItem: 50 | def __init__(self, item, key): 51 | self.item = item 52 | self.key = key 53 | def __eq__(self, other): 54 | return self.key == other.key 55 | 56 | @functools.total_ordering 57 | class MinHeapItem(HeapItem): 58 | def __lt__(self, other): 59 | return self.key < other.key 60 | 61 | @functools.total_ordering 62 | class MaxHeapItem(HeapItem): 63 | def __lt__(self, other): 64 | # REVERSE THE COMPARISON 65 | return self.key > other.key 66 | 67 | -------------------------------------------------------------------------------- /mg/node.py: -------------------------------------------------------------------------------- 1 | """ 2 | Node and link classes for defining simple or rich knowledge graphs. 3 | """ 4 | from mg.media import speak 5 | 6 | 7 | # # # 8 | # Flexible representation of nodes 9 | # 10 | 11 | class Node: 12 | """ 13 | A custom node of a knowledge graph, with flexible/independent 14 | string content for indexing, display, comparison, and (optional) 15 | vocalisation. 16 | """ 17 | def __init__( 18 | self, 19 | index_str, 20 | match_str=None, 21 | print_str=None, 22 | speak_str=None, 23 | speak_voice=None, 24 | ): 25 | index_str = str(index_str) 26 | self.index_str = index_str 27 | if match_str is None: 28 | self.match_str = index_str 29 | else: 30 | self.match_str = str(match_str) 31 | if print_str is None: 32 | self.print_str = index_str 33 | else: 34 | self.print_str = str(print_str) 35 | if speak_str is None: 36 | self.speak_str = None 37 | else: 38 | self.speak_str = str(speak_str) 39 | self.speak_voice = speak_voice 40 | self.num = None 41 | def index(self): 42 | return self.index_str 43 | def label(self): 44 | if self.num is not None: 45 | return f"{self.print_str} ({self.num})" 46 | else: 47 | return self.print_str 48 | def match(self, other): 49 | return self.match_str == other 50 | def media(self): 51 | if self.speak_str is not None: 52 | speak(self.speak_str, voice=self.speak_voice) 53 | def setnum(self, num): 54 | self.num = num 55 | def __hash__(self): 56 | return hash(self.index_str) 57 | def __eq__(self, other): 58 | return self.index_str == other.index_str 59 | 60 | 61 | # These types are allowed in links (they will be cast as Nodes) 62 | PRIMITIVE = (str, int, float, bool) 63 | 64 | 65 | def load_node(n): 66 | if isinstance(n, PRIMITIVE): 67 | return Node(str(n)) 68 | elif isinstance(n, Node): 69 | return n 70 | raise ValueError(f"link node must be mg.graph.Node or {PRIMITIVE}") 71 | 72 | -------------------------------------------------------------------------------- /mg/webisu/ebisu.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides a rough translation between the webisu API 3 | and the original ebisu API, where functionality is equivalent. 4 | Note that there may still be some differences, as noted in each 5 | function, and in the README for this module. 6 | """ 7 | 8 | from mg.webisu.webisu import p_recall_t_lnpdf as _p_recall_t_lnpdf 9 | from mg.webisu.webisu import p_recall_t_pdf as _p_recall_t_pdf 10 | from mg.webisu.webisu import p_recall_t_lnmean as _p_recall_t_lnmean 11 | from mg.webisu.webisu import p_recall_t_mean as _p_recall_t_mean 12 | from mg.webisu.webisu import update_model_bernoulli as _update_model_bernoulli 13 | from mg.webisu.webisu import init_model as _init_model 14 | 15 | 16 | def _convert_prior_to_params(prior): 17 | """ 18 | Luckily, we both use (alpha, beta, half-life) tuples as parameters. 19 | If that ever changes, I will update this code. 20 | """ 21 | return prior 22 | 23 | 24 | def predictRecall(prior, tnow, exact=False): 25 | """ 26 | See ebisu's predictRecall function documentation. 27 | 28 | Note: No _cachedBetaln (yet) 29 | """ 30 | if exact: 31 | return _p_recall_t_mean(tnow, _convert_prior_to_params(prior)) 32 | else: 33 | return _p_recall_t_lnmean(tnow, _convert_prior_to_params(prior)) 34 | 35 | 36 | def updateRecall(prior, successes, total, tnow, rebalance=True, tback=None): 37 | """ 38 | See ebisu's updateRecall function documentation. 39 | 40 | Notes: 41 | * half-life chosen differently (as yet), which means the tback 42 | argument is ignored. 43 | * No _rebalace (yet), which means the rebalance argument is 44 | ignored. 45 | * No Binomial trials with total > 1, If a total argument 46 | greater than 1 is provided, this method will raise a 47 | NotImplementedError. 48 | """ 49 | if total > 1: 50 | raise NotImplementedError("Sorry, total > 1 not implemented.") 51 | r = bool(successes) # true iff successes > 0 (assume non-negative) 52 | return _update_model_bernoulli(r, tnow, _convert_prior_to_params(prior)) 53 | 54 | 55 | def modelToPercentileDecay(model, percentile=0.5, coarse=False): 56 | """ 57 | See ebisu's modelToPercentileDecay function documentation. 58 | 59 | Note: Not implemented. All arguments are ignores, and an 60 | exception is raised. 61 | """ 62 | raise NotImplementedError("Sorry, modelToPercentilDecay not implemented.") 63 | 64 | 65 | def defaultModel(t, alpha=3.0, beta=None): 66 | """ 67 | See ebisu's defaultModel function documentation. 68 | 69 | Note: This app's init_model function provides default alpha=2 70 | instead of alpha=3.0, but it's a small difference, and this 71 | function goes with default alpha=3.0. 72 | """ 73 | if beta is None: 74 | beta = alpha 75 | return (alpha, beta, t) 76 | 77 | -------------------------------------------------------------------------------- /mg/mgio.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sys 4 | 5 | 6 | std_print = print 7 | std_input = input 8 | 9 | 10 | def print(*args, r=None, sep=" ", **kwargs): 11 | if r is not None: 12 | l = sep.join(to_ansi(arg) for arg in args) 13 | std_print(justify(l=l, r=to_ansi(r)), sep=sep, **kwargs) 14 | else: 15 | std_print(*[to_ansi(arg) for arg in args], sep=sep, **kwargs) 16 | 17 | 18 | def input(prompt, r=None, expected_width=1): 19 | p = to_ansi(prompt) 20 | if r is not None: 21 | p = justify(l=p, r=to_ansi(r), padding=1+expected_width) 22 | return std_input(p + " ") 23 | # TODO: Backspacing wipes rprompt, which is not redrawn 24 | 25 | 26 | def justify(l="", r="", padding=0): 27 | llen = ansi_len(l) 28 | rlen = ansi_len(r) 29 | if GLOBAL_OUTPUT_ENABLED: 30 | cols = os.get_terminal_size().columns 31 | # TODO: WTF am i doing here with \r when I can just print them both!? 32 | # this is only necessary for input? 33 | if cols >= llen + rlen + padding: 34 | return "\r" + " "*(cols-rlen) + r + "\r" + l 35 | else: 36 | return "\r" + " "*(cols-rlen) + r + "\n" + l 37 | return r + "\n" + l 38 | 39 | # # # # # # # # 40 | # CONFIGURATION 41 | # 42 | 43 | def config( 44 | output_enabled=sys.stdout.isatty(), 45 | # color_support_bits=8, # other not yet implemented 46 | error_on_tags=False, 47 | ): 48 | # declare globals 49 | global GLOBAL_OUTPUT_ENABLED 50 | # global GLOBAL_COLOR_SUPPORT_BITS 51 | global GLOBAL_ERROR_ON_TAGS 52 | 53 | # update globals 54 | GLOBAL_OUTPUT_ENABLED = output_enabled 55 | # GLOBAL_COLOR_SUPPORT_BITS = color_support_bits 56 | GLOBAL_ERROR_ON_TAGS = error_on_tags 57 | config() 58 | 59 | 60 | # # # # # # # # 61 | # STRING MARKUP 62 | # 63 | # The following functions help convert text with markup tags to text with (or 64 | # without) ANSI escape codes. 65 | # 66 | # For example: 67 | # 68 | # >>> print(to_ansi("Hello, world!")) 69 | # \033[31mHello, world!\033[0m 70 | # >>> config(output_enabled=False) 71 | # >>> print(no_ansi("Hello, world!")) 72 | # Hello, world! 73 | # 74 | 75 | 76 | from mg.ansi import to_ansi_code, UnknownANSIKeywordException, ansi_len 77 | 78 | TAG = re.compile(r"<([^><]+)>") 79 | 80 | def to_ansi(s): 81 | return TAG.sub(to_ansi_tag_match, str(s)) 82 | 83 | 84 | def to_ansi_tag_match(match): 85 | a = match[1].lower() 86 | # escaped <, >: 87 | if a.lower() == 'lt': 88 | return "<" 89 | if a == "gt": 90 | return ">" 91 | # otherwise convert to ANSI if possible: 92 | try: 93 | ansicode = to_ansi_code(a) 94 | if GLOBAL_OUTPUT_ENABLED: 95 | return ansicode 96 | else: 97 | return "" 98 | except UnknownANSIKeywordException: 99 | if GLOBAL_ERROR_ON_TAGS: 100 | # raise an exception about the tag 101 | raise StringMarkupException(match[0]) 102 | else: 103 | # silently pass over the tag 104 | return match[0] 105 | 106 | 107 | class StringMarkupException(Exception): 108 | """Unknown tag or similar formatted string error""" 109 | -------------------------------------------------------------------------------- /mg/plot.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from mg.mgio import print 4 | from mg.color import colormap_red_green, to_hex 5 | 6 | 7 | def print_hist(data, lo=None, hi=None, bins=30, height=22, 8 | labelformat="4.2f", countformat="", color=colormap_red_green): 9 | """ 10 | Print a histogram plot of the sequence of samples in `data`, binned 11 | into boundaries `bins` (if `bins` is an int, then the data are 12 | separated into `bins` even width bins between `lo` (default: min(data) 13 | and `hi` (default: max(data)). 14 | 15 | The bin boundaries are shown below each bin using `labelformat` and 16 | the counts are shown with `countformat`. The bars are colored using 17 | the colormap `color`. TODO: Allow single color. 18 | """ 19 | data = list(data) 20 | # decide boundaries and bins 21 | if isinstance(bins, int): 22 | nbins = bins 23 | if lo is None: lo = min(data) 24 | if hi is None: hi = max(data) 25 | bins = [lo + i/nbins * (hi - lo) for i in range(nbins+1)] 26 | else: 27 | nbins = len(bins) - 1 28 | lo = bins[0] 29 | hi = bins[-1] 30 | # build their labels 31 | labels = [] 32 | for i, (b1, b2) in enumerate(zip(bins, bins[1:])): 33 | l1 = format(b1, labelformat) 34 | l2 = format(b2, labelformat) 35 | lb = "(" if i else "[" 36 | labels.append(f"{lb}{l1}, {l2}]") 37 | # count data points in each bin 38 | cumuls = [sum(d <= b for d in data) for b in bins] 39 | cumuls[0] = 0 # shift anything on low boundary into low bucket 40 | counts = [cumuls[i] - cumuls[i-1] for i in range(1, nbins+1)] 41 | # decide the colours 42 | colors = [to_hex(color((b2 - lo) / (hi - lo))) for b2 in bins[1:]] 43 | # plot the graph (a bar chart) 44 | print_bars( 45 | values=counts, 46 | labels=labels, 47 | colors=colors, 48 | height=height, 49 | valueformat=countformat, 50 | labelformat="", 51 | ) 52 | 53 | 54 | def print_bars(values, labels=None, colors=None, height=22, 55 | valueformat="", labelformat=""): 56 | """ 57 | Print a bar chart with `values` and options below `labels`. `colors` 58 | is an optional list of `colors` of the bars. The values are printed 59 | below each bar with format `valueformat` and the labels are formatted 60 | with `labelformat`. 61 | """ 62 | values = list(values) 63 | if colors is None: colors = itertools.repeat("#22dd22") 64 | # compute the bar heights 65 | vmax = max(values) 66 | heights = [height * v / vmax for v in values] 67 | # balance the labels and valuelabels 68 | if labels is not None: 69 | labels = [format(l, labelformat) for l in labels] 70 | lmax = max(len(l) for l in labels) 71 | labels = [l.rjust(lmax)+" " for l in labels] 72 | else: 73 | labels = itertools.repeat("") 74 | valuelabels = [format(v, valueformat) for v in values] 75 | vlmax = max(len(vl) for vl in valuelabels) 76 | valuelabels = [f"({vl.rjust(vlmax)}) " for vl in valuelabels] 77 | # print the bars 78 | for lab, vlab, ht, col in zip(labels, valuelabels, heights, colors): 79 | bar = int(ht) * "█" + _part(ht - int(ht)) 80 | print(lab, vlab, f"<{col}>{bar}", sep="") 81 | 82 | 83 | def _part(f): 84 | """ 85 | Return a character representing a partly-filled cell with proportion 86 | `f` (rounded down to width of nearest available character). 87 | """ 88 | return [" ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█"][int(9*f)] 89 | -------------------------------------------------------------------------------- /mg/graph.py: -------------------------------------------------------------------------------- 1 | import time 2 | import typing 3 | import itertools 4 | import collections 5 | 6 | import mg.webisu as webisu 7 | import mg.topk as topk 8 | 9 | from mg.node import Node, load_node 10 | 11 | 12 | # # # 13 | # Bayesian Memory Model 14 | # 15 | 16 | class MemoryModel: 17 | def __init__(self, key, database, log): 18 | self.key = key 19 | self.data = database[key] # modifiable reference 20 | self.log = log 21 | 22 | def is_new(self): 23 | """ 24 | bool: the memory model is yet to be initialised 25 | """ 26 | return self.data == {} 27 | 28 | def is_recalled(self): 29 | """ 30 | bool: the last trial with this link passed 31 | (false if failed *or* if never tried) 32 | """ 33 | if 'lastResult' in self.data: 34 | return self.data['lastResult'] 35 | else: 36 | return False 37 | 38 | def init(self, prior_params=[1, 1, 1*60*60]): 39 | """ 40 | set up the memory model for the first time 41 | """ 42 | self.data['priorParams'] = prior_params 43 | self.data['numDrills'] = 0 44 | self.data['lastTime'] = self._current_time() 45 | self._log("LEARN", prior=prior_params) 46 | 47 | def predict(self, exact=False): 48 | """ 49 | compute the expected (log) probability of recalling the link 50 | (link must be initialised) 51 | """ 52 | elapsed_time = self._current_time() - self.data['lastTime'] 53 | prior_params = self.data['priorParams'] 54 | if exact: 55 | return webisu.p_recall_t_mean(t=elapsed_time, θ=prior_params) 56 | else: 57 | return webisu.p_recall_t_lnmean(t=elapsed_time, θ=prior_params) 58 | 59 | def density(self, prob): 60 | """ 61 | compute the density of the probability of recalling the 62 | link (link must be initialised) 63 | """ 64 | elapsed_time = self._current_time() - self.data['lastTime'] 65 | prior_params = self.data['priorParams'] 66 | return webisu.p_recall_t_pdf(t=elapsed_time, θ=prior_params, p=prob) 67 | 68 | def review(self): 69 | """update time without updating memory model""" 70 | self.data['lastTime'] = self._current_time() 71 | self._log("REVIEW") 72 | 73 | def update(self, got): 74 | """ 75 | update the memory model based on the result of a drill 76 | note: must be initialised 77 | """ 78 | self.data['numDrills'] += 1 79 | self.data['lastResult'] = got 80 | now = self._current_time() 81 | prior_params = self.data['priorParams'] 82 | elapsed_time = now - self.data['lastTime'] 83 | postr_params = webisu.update_model_bernoulli( 84 | r=got, 85 | t=elapsed_time, 86 | θ=prior_params, 87 | ) 88 | self.data['priorParams'] = postr_params 89 | self.data['lastTime'] = now 90 | self._log("DRILL", got=got) 91 | 92 | def elapsed(self): 93 | return self._current_time() - self.data['lastTime'] 94 | 95 | def _current_time(_self): 96 | return int(time.time()) 97 | 98 | def _log(self, event, **data): 99 | self.log.log( 100 | id=self.key, 101 | time=self._current_time(), 102 | event=event, 103 | data=data, 104 | ) 105 | 106 | def __str__(self): 107 | return "(α={:.3f}, β={:.3f}, λ={:.1f}s)".format( 108 | *self.data['priorParams'], 109 | self._current_time() - self.data['lastTime'] 110 | ) 111 | 112 | 113 | 114 | 115 | # # # 116 | # Knowledge Graph Link 117 | # 118 | 119 | class Link(typing.NamedTuple): 120 | u: Node 121 | v: Node 122 | t: str 123 | m: MemoryModel 124 | i: int 125 | # w: int # weight TODO 126 | 127 | def __str__(self): 128 | s = f"[{self.t}] {self.u.label()}" 129 | if self.m.is_new(): 130 | return s 131 | else: 132 | elapsed_time = self.m._current_time() - self.m.data['lastTime'] 133 | return f"{s} [{elapsed_time}s ago]" 134 | 135 | 136 | # # # 137 | # Knowledge Graph 138 | # 139 | 140 | class KnowledgeGraph: 141 | def __init__(self, items, database, log): 142 | # generate and load all nodes and links from this script 143 | unodes = collections.defaultdict(list) 144 | vnodes = collections.defaultdict(list) 145 | links = collections.defaultdict(set) 146 | allkeys = set() 147 | for i, (u, v, *t) in enumerate(items): 148 | # topic is optional 149 | t = t[0] if t else "" 150 | # cast from primitive types 151 | u = load_node(u) 152 | v = load_node(v) 153 | # load memory model 154 | lindex = f"{u.index()}-[{t}]-{v.index()}" 155 | model = MemoryModel(lindex, database, log) 156 | # filter out duplicate links and number duplicate nodes 157 | if lindex in links: 158 | # we have already processed an identical link 159 | continue 160 | # and number duplicate u or v nodes with distinct connections 161 | # TODO: Do this more efficiently of course 162 | if u in unodes: 163 | u.setnum(len(unodes[u])+1) 164 | unodes[u][0].setnum(1) 165 | unodes[u].append(u) 166 | if v in vnodes: 167 | v.setnum(len(vnodes[v])+1) 168 | vnodes[v][0].setnum(1) 169 | vnodes[v].append(v) 170 | # load and index link 171 | link = Link(u, v, t, model, i) 172 | for topic in t.split("."): 173 | links[topic].add(link) 174 | if link.m.is_new(): 175 | links[".new"].add(link) 176 | else: 177 | links[".old"].add(link) 178 | if link.m.is_recalled(): 179 | links[".got"].add(link) 180 | else: 181 | links[".forgot"].add(link) 182 | links[".all"].add(link) 183 | links[lindex].add(link) 184 | allkeys.add(lindex) 185 | self.links = links 186 | self.keys = list(allkeys) 187 | 188 | def _query(self, topics=None, new=False, review=False): 189 | if not topics: 190 | topics = [".all"] 191 | if new: 192 | topics = [".new", *topics] 193 | else: 194 | topics = [".old", *topics] 195 | if review: 196 | topics = [".forgot", *topics] 197 | links = set.intersection(*(self.links[t] for t in topics)) 198 | return links 199 | 200 | def count(self, topics=None, new=False, review=False): 201 | links = self._query(topics, new, review) 202 | return len(links) 203 | 204 | def query(self, number=None, topics=None, new=False, review=False): 205 | links = self._query(topics, new, review) 206 | if new: 207 | # sort by lowest rank in load order 208 | key = lambda l: l.i 209 | else: 210 | # sort by lowest recall probability 211 | key = lambda l: l.m.predict() 212 | if number is None: 213 | # full sort 214 | return sorted(links, key=key) 215 | else: 216 | # just efficiently find the lowest k please 217 | return topk.topk(links, number, key=key, reverse=True) 218 | 219 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # memograph 2 | 3 | A utility for drilling flashcards based on an online Bayesian 4 | spaced-repetition memory model 5 | (the algorithm of [ebisu](https://github.com/fasiha/ebisu), but my own 6 | [independent implementation](mg/webisu/)). 7 | 8 | For example decks and my memory models, see 9 | [memograph-decks](https://github.com/matomatical/memograph-decks) repo. 10 | 11 | For a tutorial on using this app and creating your own decks, 12 | see [tutorial](tutorial/). 13 | 14 | ![Example screenshot](screenshot.png) 15 | 16 | Example screenshot: Delay-based probability-of-recall histogram (left). 17 | Practicing some nouns and genders (right). 18 | (Terminal is iterm2 under yabai under macos. 19 | [Background](https://commons.wikimedia.org/wiki/File:%D0%9C%D0%BE%D0%B6%D0%B6%D0%B5%D0%B2%D0%B5%D0%BB%D0%BE%D0%B2%D0%B0%D1%8F_%D1%80%D0%BE%D1%89%D0%B0_%D0%B2_%D0%9D%D0%BE%D0%B2%D0%BE%D0%BC_%D0%A1%D0%B2%D0%B5%D1%82%D0%B5_%D0%BC%D1%8B%D1%81_%D0%9A%D0%B0%D0%BF%D1%87%D0%B8%D0%BA.jpg) 20 | is a finalist photo from wiki loves earth 2019.) 21 | 22 | ## Installation 23 | 24 | * Install Python 3.7 or higher. 25 | * Clone this repository. 26 | * There are no mandatory Python dependencies right now. 27 | * If using TTS, install [`espeak`](https://github.com/espeak-ng/espeak-ng/). 28 | * Create some flashcard decks (.mg directories). 29 | See also the [tutorial](tutorial/) or my repository of decks 30 | [memograph-decks](https://github.com/matomatical/memograph-decks)) 31 | for examples. 32 | 33 | Should work on macOS, Android (Termux), Arch Linux, and probably elsewhere. 34 | 35 | ## Usage 36 | 37 | Call the program with `python3 -m mg ` (or use an alias `mg`). 38 | 39 | (Apologies to `mg(1)`, the 'emacs-like text editor' on unix, but I like the 40 | name too much, and I don't see why you deserve it more than I.) 41 | 42 | From there, see the help: 43 | 44 | ``` 45 | usage: mg [-h] [-v] 46 | {drill,learn,status,history,commit,sync,recompute,checkup} ... 47 | 48 | memograph: memorise a knowledge graph with Bayesian scheduling 49 | 50 | optional arguments: 51 | -h, --help show this help message and exit 52 | -v, --version show program's version number and exit 53 | 54 | subcommands: 55 | {drill,learn,status,history,commit,sync,recompute,checkup} 56 | run subcommand --help for detailed usage 57 | drill drill existing cards this session 58 | learn introduce new cards for this session 59 | status summarise model predictions 60 | history coming soon... 61 | commit coming soon... 62 | sync coming soon... 63 | recompute coming soon... 64 | checkup coming soon... 65 | ``` 66 | 67 | ### Making an alias 68 | 69 | I put the folder containing mg on my Python path and created an alias by 70 | putting the following lines into my `.zshrc`: 71 | 72 | ``` 73 | export PYTHONPATH="/path/to/repo/memograph:$PYTHONPATH" 74 | alias mg="python3 -m mg" 75 | ``` 76 | 77 | ### Deck format 78 | 79 | You can create your own flashcard decks by creating a directory in the 80 | `.mg` format. 81 | 82 | From the help: 83 | 84 | ``` 85 | knowledge graph specification format: Knowledge graph edges (a.k.a. 'cards') 86 | are taken from .mg decks in the current directory. Each .mg deck is a script 87 | defining a generator function `graph()` yielding (node 1, node 2) pairs or 88 | (node 1, node 2, topic) triples. Nodes can be primitives (str, int, float, 89 | bool) or of type `mg.graph.Node`. 90 | ``` 91 | 92 | For more guidance and deck options, see the [tutorial](tutorial/), 93 | or the example repository of decks 94 | [memograph-decks](https://github.com/matomatical/memograph-decks). 95 | 96 | 97 | 98 | ## The Future 99 | 100 | ### TODO list 101 | 102 | * Improve keyboard controls when an option is needed 103 | * Add multimedia extensions: 104 | * Sound effects 105 | * Mathematics equations (in the terminal?!) 106 | * Support image-based flashcards. 107 | * Find a nice way to allow custom assessment (autocomplete?) 108 | 109 | #### Done: 110 | 111 | * There is a noticable delay when printing using `prompt_toolkit`. Since my 112 | use case is very simple, it should be possible to replace with pure python 113 | using readline and ANSI codes. 114 | * Switch to standard readline for the input (forgo rprompt... for now!) 115 | * Switch to simpler, home-built formatted printing functionality 116 | * Reimplement right-aligned printing using '\r' and terminal width 117 | * There is a noticable delay to import ebisu, which pulls numpy. 118 | Reimplement the Bayesian scheduling algorithm in pure Python. 119 | * Add multimedia extensions: 120 | * Text-to-speech e.g. for language cards 121 | * Add a [tutorial](tutorial/). 122 | 123 | 124 | ### Longer-term ideas 125 | 126 | * Perhaps separate `ptdb` (the plain-text database) into another project. 127 | * Perhaps separate `topk` (the efficient heap-based top-k algorithm) into 128 | another project. 129 | * Perhaps switch to a domain-specific language for specifying the graphs. 130 | * It might be worth pulling in numpy for very large decks due to savings from 131 | vectorisation. Reimplement ebisu for opt-in use with more vectorisation? 132 | 133 | #### Towards modelling memory of the latent structure of decks 134 | 135 | In [Step 1 of the Tutorial](tutorial/), we discuss the use of latent 136 | deck structure in compressing deck specification through Python's 137 | expressive power. 138 | When such latent structure exists, it's another question as to whether 139 | drilling a large number of independent flashcards is appropriate. 140 | I actually think that in this case we want to be drilling *the components 141 | of the latent structure itself*. 142 | 143 | Take German numbers again. There are 10 independent concepts to learn 144 | when learning the first 10 digits. But there are maybe only 30 or so 145 | when learning the first 100. 146 | (the numbers 0--19, the multiples of ten 20, 30, 40, ..., 90, and the 147 | rules for combining the tens place and ones place into a final number 148 | with 'und') 149 | Furthermore, with the first 1,000,000 numbers, there are only a couple 150 | more concepts to learn, yet there would be 1,000,000 independent 151 | flashcards! There would never be enough time to drill each of these, 152 | let along enough times to accurately estimate memory parameters for them. 153 | 154 | Correspondingly, a flashcard-generating Python script which generates 155 | cards for the first 1,000,000 German numbers will only be a couple of 156 | times longer than our scripts from the tutorial for the first 10 numbers, 157 | as long as the structure of the script follows the compositional 158 | structure discussed above. 159 | 160 | A key point here is that *the simplest program that generates the 161 | deck probably mirrors the latent structure itself*. 162 | One day, I'd like to see if it's possible, using some analysis akin to 163 | automatic differentiation from deep learning toolkits, to introspect the 164 | generating code (which may need to be written in a new DSL) and build 165 | a memory model for the couple-of-dozen latent concepts rather than the 166 | 1,000,000 output tuples, and to perform inference on this model through 167 | results on the concrete flashcards. 168 | 169 | For example, you just got '77' right, and '186' right, so the model's 170 | update would know that it's pointless to also ask you about '177', 171 | '87', '76', etc., which do not use any different concepts from the 172 | latent structure. The next prompt should be something that uses wholly 173 | different concepts, such as '15', or '1,042'. And you don't need to ever 174 | even see all of the numbers beyond '1,000' for the model to realise that 175 | *you could translate them if you did*, because you have grasped the 176 | latent structure. 177 | 178 | The result might even be able to be pushed to practically unlimited 179 | decks with a finite latent structure/generating program 180 | (such as for sentence generation based on a context-free grammar). 181 | -------------------------------------------------------------------------------- /mg/options.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | # Program information: 5 | PROGRAM = "mg" 6 | VERSION = "0.0.4" 7 | DESCRIP = "memograph: memorise a knowledge graph with Bayesian scheduling" 8 | 9 | # XXX_DEFUALT default values (to use if flag is not provided) 10 | # XXX_NOVALUE missing values (to use if flag is provided, but with no value) 11 | NUMBER_DEFAULT = 6 12 | GRAPH_PATH_DEFAULT = "graph.py" 13 | 14 | # TODO: CHANGE THIS TO SOME KIND OF TOPIC LIST 15 | # TODO: MAYBE MAKE THE SCRIPTS HAVE A .mg EXTENSION? 16 | GRAPH_SPEC_HELP = """ 17 | knowledge graph specification format: 18 | Knowledge graph edges (a.k.a. 'flashcards') are constructed by a user-defined 19 | generator function called `graph()` in a local python script `graph.py` (see 20 | `--graph_path` flag to configure). 21 | The generator function should yield (node 1, node 2) pairs or (node 1, 22 | node 2, topic) triples. Nodes can be primitives (str, int, float, bool) or 23 | of type `mg.node.Node`. 24 | """ 25 | 26 | def get_options(): 27 | """Parse and return command-line arguments.""" 28 | parser = argparse.ArgumentParser( 29 | prog=PROGRAM, 30 | description=DESCRIP, 31 | epilog=GRAPH_SPEC_HELP, 32 | ) 33 | parser.add_argument( 34 | '-v', 35 | '--version', 36 | action='version', 37 | version=f"{PROGRAM} {VERSION}" 38 | ) 39 | 40 | # most of the action happens within one of the various subcommands: 41 | subparsers = parser.add_subparsers( 42 | dest="subcommand", 43 | title="subcommands", 44 | help="run subcommand --help for detailed usage", 45 | required=True, 46 | ) 47 | # most subcommands allow for topics to be specified, as a set, and 48 | # should allow some (... unfinished comment from years ago?) 49 | superparser = argparse.ArgumentParser(add_help=False) 50 | superparser.add_argument( 51 | 'topics', 52 | metavar='TOPIC', 53 | help="topic filter (restrict to cards with this topic)", 54 | nargs="*", 55 | ) 56 | superparser.add_argument( 57 | '-g', 58 | '--graph_path', 59 | metavar="GRAPH PATH", 60 | help="python script containing graph generator " 61 | f"(default: {GRAPH_PATH_DEFAULT})", 62 | default=GRAPH_PATH_DEFAULT, 63 | ) 64 | superparser.add_argument( 65 | '-d', 66 | '--data_path', 67 | metavar="DATA PATH", 68 | help="directory for storing memory model parameters and update log " 69 | "(default: GRAPH PATH but with .mg extension)", 70 | default=None, 71 | ) 72 | 73 | 74 | # # # 75 | # drill subcommand 76 | # 77 | drillparser = subparsers.add_parser( 78 | "drill", 79 | parents=[superparser], 80 | description="mg drill: practice the cards most in need of review", 81 | help="drill existing cards this session", 82 | epilog=GRAPH_SPEC_HELP, 83 | ) 84 | drillparser.add_argument( 85 | '-n', 86 | '--num_cards', 87 | metavar="N", 88 | type=int, 89 | default=NUMBER_DEFAULT, # if the flag is not present 90 | help=f"number of cards in session (default {NUMBER_DEFAULT})", 91 | ) 92 | drillparser.add_argument( 93 | '-r', 94 | '--reverse', 95 | action="store_true", 96 | help="reverse card sides for session", 97 | ) 98 | 99 | # # # 100 | # review subcommand 101 | # 102 | reviewparser = subparsers.add_parser( 103 | "review", 104 | parents=[superparser], 105 | description="mg review: practice fresh and recently-failed cards", 106 | help="review fresh and recently-failed cards", 107 | epilog=GRAPH_SPEC_HELP, 108 | ) 109 | reviewparser.add_argument( 110 | '-n', 111 | '--num_cards', 112 | metavar="N", 113 | type=int, 114 | default=NUMBER_DEFAULT, # if the flag is not present 115 | help=f"number of cards in session (default {NUMBER_DEFAULT})", 116 | ) 117 | reviewparser.add_argument( 118 | '-r', 119 | '--reverse', 120 | action="store_true", 121 | help="reverse card sides for session", 122 | ) 123 | 124 | # # # 125 | # learn subcommand 126 | # 127 | learnparser = subparsers.add_parser( 128 | "learn", 129 | parents=[superparser], 130 | description="mg learn: introduce new cards for the first time", 131 | help="introduce new cards for this session", 132 | epilog=GRAPH_SPEC_HELP, 133 | ) 134 | learnparser.add_argument( 135 | '-n', 136 | '--num_cards', 137 | metavar="N", 138 | type=int, 139 | default=NUMBER_DEFAULT, # if the flag is not present 140 | help=f"number of cards in session (default: {NUMBER_DEFAULT})", 141 | ) 142 | 143 | # # # 144 | # status subcommand 145 | # 146 | statusparser = subparsers.add_parser( 147 | "status", 148 | parents=[superparser], 149 | description="mg status: summarise model statistics / predictions", 150 | help="summarise model predictions", 151 | ) 152 | statusparser.add_argument( 153 | '-H', 154 | '--histogram', 155 | action="store_true", 156 | help="histogram the expected recall probabilities", 157 | ) 158 | statusparser.add_argument( 159 | '-P', 160 | '--posterior', 161 | action="store_true", 162 | help="histogram the full posterior over recall probabilities", 163 | ) 164 | statusparser.add_argument( 165 | '-S', 166 | '--scatter', 167 | action="store_true", 168 | help="scatter expected recall probability against elapsed time", 169 | ) 170 | statusparser.add_argument( 171 | '-L', 172 | '--list', 173 | action="store_true", 174 | help="print every card with elapsed time and expected recall", 175 | ) 176 | 177 | # # # 178 | # info command 179 | # 180 | infoparser = subparsers.add_parser( 181 | "info", 182 | parents=[superparser], 183 | description="mg info: inspect memory model for certain cards", 184 | help="inspect individual memory models", 185 | ) 186 | 187 | # # # 188 | # checkup subcommand 189 | # 190 | checkupparser = subparsers.add_parser( 191 | "checkup", 192 | parents=[superparser], 193 | description="mg checkup: fix broken references in logs and data", 194 | help="fix internal broken references", 195 | ) 196 | 197 | # # # 198 | # future commands 199 | # 200 | subparsers.add_parser("history", help="todo someday...") 201 | subparsers.add_parser("recompute", help="todo someday...") 202 | subparsers.add_parser("commit", help="todo someday...") 203 | subparsers.add_parser("sync", help="todo someday...") 204 | subparsers.add_parser("missed", help="todo someday...") 205 | 206 | # # # 207 | # parsing and post-processing 208 | # 209 | options = parser.parse_args() 210 | if options.data_path is None: 211 | options.data_path = os.path.splitext(options.graph_path)[0] + ".mg" 212 | options.db_path = os.path.join(options.data_path, "data.json") 213 | options.log_path = os.path.join(options.data_path, "log.jsonl") 214 | if options.subcommand == "status": 215 | if not any([ 216 | options.histogram, 217 | options.posterior, 218 | options.scatter, 219 | options.list, 220 | ]): 221 | options.histogram = True 222 | if options.subcommand == "checkup": 223 | options.load_log = True 224 | else: 225 | options.load_log = False 226 | return options 227 | 228 | -------------------------------------------------------------------------------- /mg/webisu/webisu.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pure python implementation of a variant of the ebisu memory model. 3 | See https://fasiha.github.io/ebisu/ for the original (implemented 4 | in Python/NumPy, as well as other languages). 5 | 6 | Matthew Farrugia-Roberts (m@far.in.net), 2021 7 | 8 | Functions: 9 | 10 | * p_recall_t_lnpdf(p, t, θ): 11 | logarithm of probability density at time t for recall probability p. 12 | * p_recall_t_pdf(p, t, θ) 13 | exponentiated version of the above. 14 | * p_recall_t_lnmean(t, θ): 15 | logarithm of mean/expected recall probability at time t. 16 | * p_recall_t_mean(t, θ): 17 | exponentiated version of the above. 18 | * update_model_bernoulli(r, t, θ): 19 | return an updated model based on the result of a bernoulli trial 20 | with result r at time t. 21 | * init_model(λ, α=2, β=α): 22 | return an initial model with some optional default values, you 23 | just need to provide a half-life in your preferred unit of time. 24 | 25 | Future functions: 26 | 27 | * p_recall_t_lncdf(p, t, θ): 28 | logarithm of cumulative density at time t for recall probability p. 29 | NOT YET IMPLEMENTED. 30 | * p_recall_t_cdf(p, t, θ) 31 | exponentiated version of the above. 32 | NOT YET IMPLEMENTED. 33 | 34 | 35 | On parameters θ: 36 | 37 | The above-listed functions take a parameter θ which refers to the 38 | (α, β, λ) triple of parameters of a memory model. The meaning of 39 | the components of this parameter triple is as follows: 40 | 41 | α and β are the parameters of a Bayesian Beta belief distribution 42 | about a fact's recall probability after time λ (also known as the 43 | 'half-life', because the Beta prior is often symmetric about 0.5). 44 | 45 | TODO: 46 | 47 | * Consider putting a cap/floor on δ during updates, since this might help 48 | solve numerical issues and ensure stability in the learning app itself, 49 | at the small cost of a reasonable limit on how drastically the model 50 | can update in a particular step. 51 | """ 52 | 53 | 54 | from math import exp, pow as mpow 55 | 56 | from mg.webisu.pmath import ln_gammafn, ln_betafn 57 | from mg.webisu.pmath import ln, ln1p, lnsubexp 58 | from mg.webisu.pmath import beta_match_moments 59 | 60 | # TODO: Also move GB1 pdf and expectation to a stats module? Then this 61 | # module could just be about piping the parameters into the distribns...! 62 | 63 | 64 | def p_recall_t_lnpdf(p, t, θ): 65 | """ 66 | Compute log probability density of recall prob p after t units 67 | of elapsed time since last review. 68 | 69 | θ is an (α, β, λ) triple--see module documentation. 70 | 71 | TODO: Special case for p in {0, 1} (currently gives error) 72 | """ 73 | α, β, λ = θ 74 | δ = t / λ 75 | 76 | # P_δ(p) = GB1(p; 1/δ, 1, α, β) 77 | # = p^((α-δ)/δ)(1-p^(1/δ))^(β-1) / (d Β(α, β)) 78 | lnpdf = ( 79 | + (α - δ) / δ * ln(p) 80 | + (β - 1) * ln1p(-mpow(p, 1/δ)) 81 | - ln(δ) 82 | - ln_betafn(α, β) 83 | ) 84 | return lnpdf 85 | 86 | 87 | def p_recall_t_pdf(p, t, θ): 88 | """ 89 | Compute probability density of recall prob p after t units 90 | of elapsed time since last review. 91 | 92 | θ is an (α, β, λ) triple--see module documentation. 93 | """ 94 | return exp(p_recall_t_lnpdf(p, t, θ)) 95 | 96 | 97 | def p_recall_t_lncdf(p, t, θ): 98 | """ 99 | Compute log cumulative density of recall prob p after t units 100 | of elapsed time since last review. 101 | 102 | θ is an (α, β, λ) triple--see module documentation. 103 | 104 | TODO: Implement (!) incomplete beta function to help with the 105 | computation 106 | """ 107 | α, β, λ = θ 108 | δ = t / λ 109 | 110 | # F_GB1(p; 1/δ, 1, α, β) = F_B(x/d; α, β) 111 | # according to: 112 | # "Butler and McDonald (1989) and Kleiber and Kotz (2003)", 113 | # as summarised in 114 | # Sarabia, Guillen, Chulia, and Prieto, "Tail risk measures using flex- 115 | # ible parametric distributions", SORT 2019; DOI 10.2436/20.8080.02.86 116 | return NotImplementedError("TODO: Needs the incomplete beta function") 117 | 118 | 119 | def p_recall_t_cdf(p, t, θ): 120 | """ 121 | Compute cumulative density of recall prob p after t units 122 | of elapsed time since last review. 123 | 124 | θ is an (α, β, λ) triple--see module documentation. 125 | """ 126 | return exp(p_recall_t_lncdf(p, t, θ)) 127 | 128 | 129 | def p_recall_t_lnmean(t, θ): 130 | """ 131 | Compute log expected recall probability after t units of time 132 | since last review. 133 | 134 | θ is an (α, β, λ) triple---see module documentation. 135 | """ 136 | α, β, λ = θ 137 | δ = t / λ 138 | 139 | # E_GB1[P] = (Γ(α+β) * Γ(α+δ)) / (Γ(α) * Γ(α+β+δ)) 140 | lnmean = ( 141 | + ln_gammafn(α+β) 142 | + ln_gammafn(α+δ) 143 | - ln_gammafn(α) 144 | - ln_gammafn(α+β+δ) 145 | ) 146 | return lnmean 147 | 148 | 149 | def p_recall_t_mean(t, θ): 150 | """ 151 | Compute expected recall probability after t units of time 152 | since last review. 153 | 154 | θ is an (α, β, λ) triple---see module documentation. 155 | """ 156 | return exp(p_recall_t_lnmean(t, θ)) 157 | 158 | 159 | def update_model_bernoulli(r, t, θ): 160 | """ 161 | Compute the approximate Beta posterior model parameters after a 162 | Bayesian update based on a single review. 163 | 164 | The hypothetical process is as follows: 165 | 166 | 1. prior P_recall@λ_old ~ Beta(α_old, β_old) 167 | | 168 | | move through time (the quiz is at elapsed time t) 169 | V 170 | 2. prior P_recall@t ~ GeneralisedBeta1(...) 171 | | 172 | | quiz result r (True for pass, False for fail) 173 | V 174 | 3. postr P_recall@t ~ (some other analytical expression) 175 | | 176 | | approximate λ_new by solving the equation: 177 | | E[postr P_recall@λ_old] approx. = 2^-λ_old/λ_new 178 | | and time transform the posterior to this time 179 | V 180 | 4. postr P_recall@λ_new ~ (some other analytical expression) 181 | | 182 | | moment-match a Beta distribution to approximate 183 | V 184 | 5. postr P_recall@λ_new approx. ~ Beta(α_new, β_new) 185 | 186 | This function takes parameters assuming you have completed steps 1 187 | and 2, and reasons through steps 3, 4, and 5, returning the approx. 188 | posterior's parameters θ_new = (α_new, β_new, λ_new). 189 | """ 190 | _, _, λ_old = θ 191 | 192 | # calculate the posterior after update at time t, shifted 193 | # back to time λ_old 194 | postr_λ_old = _analytic_posterior_bernoulli(r, t, λ_old, θ) 195 | 196 | # use this posterior mean to approximate the new half-life 197 | ln_μ_λ_old = postr_λ_old.ln_moment(1) 198 | λ_new = - λ_old * ln(2) / ln_μ_λ_old 199 | 200 | # compute again the posterior, this time shifted to λ_new 201 | postr_λ_new = _analytic_posterior_bernoulli(r, t, λ_new, θ) 202 | 203 | # match the posterior's moments with a beta distribution 204 | # to fit a new model 205 | ln_m1 = postr_λ_new.ln_moment(1) 206 | ln_m2 = postr_λ_new.ln_moment(2) 207 | mean = exp(ln_m1) 208 | var = exp(ln_m2) - exp(2*ln_m1) 209 | 210 | α_new, β_new = beta_match_moments(mean, var) 211 | return (α_new, β_new, λ_new) 212 | 213 | 214 | class _analytic_posterior_bernoulli: 215 | def __init__(self, result, t_update, t_new, prior): 216 | self.r = result 217 | α, β, λ = prior 218 | self.α = α 219 | self.β = β 220 | self.δ = t_update / λ 221 | # ε * δ = (t_new / t_update) * (t_update / λ) 222 | self.δε = t_new / λ 223 | # precompute the denominator: 224 | self._ln_moment_denom = self._ln_moment_numer(0) 225 | def moment(self, n): 226 | return exp(self.ln_moment(n)) 227 | def ln_moment(self, n): 228 | return self._ln_moment_numer(n) - self._ln_moment_denom 229 | def _ln_moment_numer(self, n): 230 | if self.r: 231 | return ln_betafn(self.α + n*self.δε + self.δ, self.β) 232 | else: 233 | return lnsubexp( 234 | ln_betafn(self.α + n*self.δε, self.β), 235 | ln_betafn(self.α + n*self.δε + self.δ, self.β) 236 | ) 237 | 238 | 239 | def init_model(λ, α=2, β=None): 240 | """ 241 | Construct a default/initial model with parameters α, β, and λ, 242 | see module documentation. 243 | 244 | Parameters 245 | ---------- 246 | * λ--half-life. Required. It's the initial half-life of the 247 | memory model in arbitrary time units (the same should be 248 | used for the t parameter for other functions from this 249 | module). 250 | * α--first beta distribution parameter. Optional (default: 2). 251 | * β--second beta distribution parameter. Optional (default: 252 | same value as α, for a symmetric beta distribution). 253 | """ 254 | if β is None: 255 | β = α 256 | return (α, β, λ) 257 | 258 | -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- 1 | # memograph tutorial 2 | 3 | A brief guide on creating decks of flashcards and using them with 4 | the `mg` utility. 5 | 6 | Throughout, we will work with an example flashcard deck consisting 7 | of the German names of the numbers 0 through 9. 8 | 9 | The final version of the deck we build in this tutorial is included, 10 | see [de.digits.mg](de.digits.mg/). 11 | 12 | ## Contents 13 | 14 | Steps: 15 | 16 | * [Step 1: Create a deck](#step-1-create-a-deck) 17 | * [Step 2: Learn a deck](#step-2-learn-a-deck) 18 | * [Step 3: Inspect deck status](#step-3-inspect-deck-status) 19 | * [Step 4: Drill a deck](#step-4-drill-a-deck) 20 | * [Step 5: Customising nodes](#step-5-customising-nodes) 21 | * [Terminology: Nodes, Links, Knowledge Graph](#terminology-nodes-links-knowledge-graph) 22 | * [Topics](#topics) 23 | * [Custom display and comparison strings](#custom-display-and-comparison-strings) 24 | * [Text-to-Speech](#text-to-speech) 25 | * [Duplicate nodes](#duplicate-nodes) 26 | * [Step 6: Modifying a deck after learning](#step-6-modifying-a-deck-after-learning) 27 | 28 | ## Step 1: Create a deck 29 | 30 | > #### Warning: 31 | > 32 | > This deck format is not considered stable; it might change with future 33 | > commits to this repository as this application matures. 34 | 35 | From any directory you like, make a new `.mg` file. This will constitute 36 | your flashcard deck on the filesystem. 37 | 38 | ``` 39 | $ touch de.digits.mg 40 | ``` 41 | 42 | Despite its extension, this file is really a Python script. 43 | The script's job is to *generate flashcards*. 44 | Flashcards are to be represented as two-tuples containing the 'front' of 45 | the card followed by the 'back' of the card. Each 'side' of the card can 46 | be a string, integer, float, or boolean value, and these value will be the 47 | ones shown when you are later learning and practising the flashcards. 48 | 49 | The entry point to the script is a Python generator function called 50 | `graph`. `graph` should yield all of the flashcard tuples in the deck. 51 | Thus, a very simple script for our German digits could read: 52 | 53 | ```python 54 | def graph(): 55 | yield 0, "null" 56 | yield 1, "eins" 57 | yield 2, "zwei" 58 | yield 3, "drei" 59 | yield 4, "vier" 60 | yield 5, "fünf" 61 | yield 6, "sechs" 62 | yield 7, "sieben" 63 | yield 8, "acht" 64 | yield 9, "neun" 65 | ``` 66 | 67 | Using Python to generate flashcards gives us the full power of the 68 | language in specifying your list of flashcards. 69 | In some cases, we might want to leverage this additional expressive 70 | power to avoid some typing. 71 | Here's an example equivalent to the above script: 72 | 73 | ```python 74 | D = ['null','eins','zwei','drei','vier','fünf','sechs','sieben','acht','neun'] 75 | def graph(): 76 | for i, n in enumerate(D): 77 | yield i, n 78 | ``` 79 | 80 | For such a simple deck, we haven't improved maintainability or readability 81 | by a lot: The original script was already pretty clear and simple. 82 | But for larger decks, or for decks where the content itself follows 83 | some latent structure, we might want to take advantage of Python's power. 84 | For example: 85 | 86 | * The first *100* German numbers have a simple compositional structure 87 | which could be captured with a few loops and some string construction. 88 | * Mathematical flashcards (e.g. for drilling multiplication tables, 89 | square/cubic numbers, factorials, combinations, binary/decimal 90 | conversions) can be specified very compactly with these expressions 91 | evaluated at run-time, rather than flashcard-creation time. 92 | * Flashcards for cyphers such as ROT-13 can be similarly computed at 93 | runtime and very compactly using Python's string/char manipulation 94 | utilities. 95 | 96 | Anyway, it's up to you how to make your `graph` function, `mg` just 97 | needs it to yield a bunch of flashcard tuples. 98 | 99 | In [Step 5](#step-5-customising-nodes), we'll learn how to enhance 100 | these flashcard tuples with custom strings for prompting, comparison, 101 | and even *text-to-speech*! 102 | 103 | ## Step 2: Learn a deck 104 | 105 | Now that we have a deck, it's time to learn its cards! We'll use the `mg` 106 | in *learn mode* for this. 107 | 108 | Inside the same directory as our deck (Python script) `de.digits.mg`, and 109 | with an alias for the memograph script called `mg` 110 | (see the installation and usage sections of the main README file), 111 | run the following command to begin the learning session: 112 | 113 | ``` 114 | $ mg learn 115 | ``` 116 | 117 | `mg` will look for decks (`.mg` files) in the current directory and load 118 | the cards from the generator function. 119 | 120 | Then `mg` will begin the learning session. It will randomly order the 121 | first six (by default, see `--num_cards` flag) cards from the generator 122 | and introduce them one by one. For example, you might see: 123 | 124 | ``` 125 | ** welcome ** 126 | introduce some new cards... 127 | ** learn 1/6 ** 128 | prompt: 4 129 | return: 130 | ``` 131 | 132 | The front of this card is '4', and the 'return:' instruction tells you to 133 | press the return key to see the answer. Then you will see: 134 | 135 | ``` 136 | answer: vier 137 | rating: easy (g+↵) | medium (↵) | hard (h+↵) 138 | ``` 139 | 140 | `mg` has told you that the back of this card is 'vier'. Then it asks you for 141 | an estimate of how difficult this card will initially be for you to remember. 142 | For example, you might have already known this one, or maybe this is the 143 | first time you are seeing it. `mg` will use your rating to set the initial 144 | memory model parameters for the card. 145 | 146 | Type `g` (easy), nothing (medium), or `h` (hard) and press return. 147 | 148 | Repeat this for all of the cards in the learning session. 149 | Note that progress is saved at the end of each session. 150 | 151 | Run further learning sessions if you want to learn more cards right 152 | now---for the rest of this tutorial, I will continue with just these 153 | first six cards learned, and the rest unseen. 154 | 155 | ## Step 3: Inspect deck status 156 | 157 | After learning these flashcards, they will likely be fresh in your mind. 158 | `mg` uses an exponential-decay-based Bayesian memory model where the 159 | chance of forgetting a card increases over time, and your successes and 160 | fails with the card help to refine an estimate of the speed of that decay 161 | (see [ebisu](https://fasiha.github.io/post/ebisu/)). 162 | 163 | At any time, you can inspect the 'Bayesian status' of your deck with the 164 | command `mg status`. You will see a histogram of the expected probability of 165 | recall at that moment according to the Bayesian memory model's estimates 166 | of the half-life of your memory for each card. 167 | 168 | ``` 169 | $ mg status 170 | ** welcome ** 171 | probability of recall histogram: 172 | [ 0%, 5%] (0) 173 | ( 5%, 10%] (0) 174 | ( 10%, 15%] (0) 175 | ( 15%, 20%] (0) 176 | ( 20%, 25%] (0) 177 | ( 25%, 30%] (0) 178 | ( 30%, 35%] (0) 179 | ( 35%, 40%] (0) 180 | ( 40%, 45%] (0) 181 | ( 45%, 50%] (0) 182 | ( 50%, 55%] (0) 183 | ( 55%, 60%] (0) 184 | ( 60%, 65%] (0) 185 | ( 65%, 70%] (0) 186 | ( 70%, 75%] (0) 187 | ( 75%, 80%] (0) 188 | ( 80%, 85%] (0) 189 | ( 85%, 90%] (0) 190 | ( 90%, 95%] (0) 191 | ( 95%, 100%] (6) ████████████████████████████████████████████████████████ 192 | 6 cards seen (60%), 4 cards unseen (40%) 193 | ``` 194 | 195 | Initially, of course, all of the cards will have been recently reviewed, 196 | and so with a high probability of recall (low probability of forgetting). 197 | But if you try again after some time, some of the memories will be 198 | predicted to have decayed, at a rate depending on how to rated them in 199 | the learning session: 200 | 201 | ``` 202 | $ mg status 203 | ** welcome ** 204 | probability of recall histogram: 205 | [ 0%, 5%] (0) 206 | ( 5%, 10%] (0) 207 | ( 10%, 15%] (0) 208 | ( 15%, 20%] (0) 209 | ( 20%, 25%] (2) █████████████████████████████████████▍ 210 | ( 25%, 30%] (0) 211 | ( 30%, 35%] (0) 212 | ( 35%, 40%] (0) 213 | ( 40%, 45%] (0) 214 | ( 45%, 50%] (0) 215 | ( 50%, 55%] (0) 216 | ( 55%, 60%] (0) 217 | ( 60%, 65%] (0) 218 | ( 65%, 70%] (0) 219 | ( 70%, 75%] (0) 220 | ( 75%, 80%] (0) 221 | ( 80%, 85%] (0) 222 | ( 85%, 90%] (0) 223 | ( 90%, 95%] (1) ██████████████████▊ 224 | ( 95%, 100%] (3) ████████████████████████████████████████████████████████ 225 | 6 cards seen (60%), 4 cards unseen (40%) 226 | ``` 227 | 228 | Finally, you can get a more detailed breakdown of the per-card probabilities 229 | with some of the other options from the status mode, such as the `--list` 230 | option: 231 | 232 | ``` 233 | $ mg status --list 234 | ** welcome ** 235 | cards (probability of recall): 236 | 1. 4--vier [191s ago] ( 23.9%) 237 | 2. 5--fünf [180s ago] ( 25.0%) 238 | 3. 1--eins [193s ago] ( 94.9%) 239 | 4. 3--drei [183s ago] ( 95.2%) 240 | 5. 2--zwei [195s ago] ( 99.9%) 241 | 6. 0--null [186s ago] ( 99.9%) 242 | 7. 6--sechs (unseen) 243 | 8. 7--sieben (unseen) 244 | 9. 8--acht (unseen) 245 | 10. 9--neun (unseen) 246 | ``` 247 | 248 | Watch out for spoilers! Your reading the solutions on these cards will not be 249 | taken into account in the time-since-last-review when you next drill these 250 | cards. 251 | 252 | ## Step 4: Drill a deck 253 | 254 | Now for the main event: To practice our flashcards with with `mg`! 255 | For this, we use 'drill' mode, `mg drill`. 256 | Simply run: 257 | 258 | ``` 259 | $ mg drill 260 | ``` 261 | 262 | `mg` will use the memory model to find the six (see `--num_cards` flag to 263 | change the length of the session) most-likely-to-be-forgotten flashcards 264 | and shuffle these into a drill session for you. 265 | We only have six cards learned so far, so they'll all appear in this first 266 | session. You'll see something like this: 267 | 268 | ``` 269 | ** welcome ** 270 | drill some old cards... 271 | ** drill 1/6 ** 272 | prompt: 5 273 | recall: 274 | ``` 275 | 276 | The first flashcard's prompt is '5', and 'recall:' is instructing you to 277 | type your guess as to the back of the card. Let's go ahead and put in the 278 | correct answer. Can you recall it? 279 | 280 | ``` 281 | recall: fünf 282 | answer: fünf 283 | ``` 284 | 285 | We entered the right answer, as confirmed by the 'answer' response. 286 | This happened to be one of the cards which the memory model thought 287 | we would likely get wrong, so it will update that belief and adjust 288 | the estimate of the half-life, causing the memory to decay slower 289 | next time. 290 | 291 | What's next? 292 | 293 | ``` 294 | ** drill 2/6 ** 295 | prompt: 3 296 | recall: 297 | ``` 298 | 299 | The next card is '3'. Just to see what happens, let's forget that the 300 | answer is 'drei' and enter something incorrect: 301 | 302 | ``` 303 | recall: zwei 304 | answer: drei 305 | commit: forgot (↵) | got it (g+↵) | skip (s+↵) 306 | ``` 307 | 308 | `mg` informs us that this is not correct, and asks us for confirmation. 309 | Did we really mean to type 'zwei' ('forgot', enter), or was this some kind 310 | of typo and we actually knew it ('got it', g+enter)? Or was there some other 311 | reason why we should invalidate this question and forgo updating the memory 312 | model ('skip', s+enter). 313 | 314 | We meant to type 'zwei', so admit that we forgot the answer by pressing 315 | enter. 316 | 317 | Next? 318 | 319 | ``` 320 | ** drill 3/6 ** 321 | prompt: 2 322 | recall: 323 | ``` 324 | 325 | Okay. This one is definitely 'zwei'! 326 | 327 | ``` 328 | recall: zwei' 329 | answer: zwei 330 | commit: g forgot (↵) | got it (g+↵) | skip (s+↵) 331 | got it! 332 | ``` 333 | 334 | Oops! I accidentally hit the ' key before pressing enter, and so the strings 335 | didn't match. But I didn't forget the answer, so this time I overrode the 336 | comparison by pressing 'g' and enter. 337 | 338 | 339 | Anyway, that's the basic mechanics of drilling flashcards with `mg`. 340 | Some final notes: 341 | 342 | * Continue until the end of the session for the model to be saved to disk. 343 | * If you need to or want to abort the session early, you can send 344 | '^C' (control+C) (the session so far will be saved) or 345 | '^D' (control+D) (the session so far will not be saved). 346 | * You can also drill cards *backwards*, that is, to type the front of the 347 | card after being prompted with the back of the card. Use the `--reverse` 348 | flag for this. 349 | 350 | ## Step 5: Customising nodes 351 | 352 | There are several additional optional features which can enhance the 353 | flexibility of deck creation. 354 | 355 | ### Terminology: Nodes, Links, Knowledge Graph 356 | 357 | First a note on terminology. The deck generation script and its main 358 | function are called 'graph' because the tuples are termed the 'links' 359 | of a 'knowledge graph'. Likewise, each of their two components is a 360 | 'node'. 361 | This is a more general notion than 'flashcard deck', 'flashcard', 362 | 'side', and it's the terminology we'll continue with when discussing 363 | further customisation options below. 364 | 365 | ### Topics 366 | 367 | The most basic enhancement is to add 'topics' to each link. This can later 368 | be used to filter for certain subgroups of cards for when the collection of 369 | decks and cards in the current directory is much larger. 370 | (see `mg drill --help` for information on how to specify topics). 371 | Specifying topics for a knowledge link requires adding a dot-separated string 372 | of topics as a third component to the link tuple in the generator function. 373 | For example, we could add the topics `de` and `digits` to all ten cards in 374 | our deck as follows: 375 | 376 | ```python 377 | D = ['null','eins','zwei','drei','vier','fünf','sechs','sieben','acht','neun'] 378 | def graph(): 379 | for i, n in enumerate(D): 380 | yield i, n, "de.digits" 381 | # + ^^^^^^^^^^^^^ 382 | ``` 383 | 384 | ### Custom display and comparison strings 385 | 386 | We might want to separate the strings we use for prompting from the 387 | strings we use for checking typed answers against, and we might want 388 | both of these to be different from the strings we use for indexing 389 | the memory model (see [Step 6](#step-6-modifying-a-deck-after-learning) 390 | below for more on indexing). 391 | 392 | I don't see any of these options as super useful in the case of our 393 | example, but in some other cases (such as when drilling German nouns 394 | separately from their genders, but still wanting to note the gender 395 | when displaying the answer) it can be very useful. 396 | 397 | To do it, replace the first two components of the tuples in the graph 398 | generator function with objects of type `mg.graph.Node`, which 399 | optionally takes these three separate types of string as constructor 400 | arguments: 401 | 402 | ```python 403 | from mg.graph import Node 404 | # ^^^^^^^^^^^^^^^^^^^^^^^ 405 | 406 | D = ['null','eins','zwei','drei','vier','fünf','sechs','sieben','acht','neun'] 407 | def graph(): 408 | for i, n in enumerate(D): 409 | yield ( 410 | Node(i, print_str=format(i, "06d"), match_str=i), 411 | Node(n, print_str=format(n, ">6s"), match_str=n), 412 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 413 | ) 414 | ``` 415 | 416 | For no particular reason, this example demonstrates formatting the digits 417 | and their translations as six characters wide in prompts and displayed 418 | answers (`print_str=`). The string used the indexing (the positional 419 | argument of the constructor) and answer-comparison (`match_str=`) are 420 | unchanged. The result: 421 | 422 | ``` 423 | prompt: 000003 424 | recall: drei 425 | answer: drei 426 | ``` 427 | 428 | In the future, it will be possible to specify custom answer-comparison 429 | functions, for example so as to allow case-insensitive matching or to 430 | perform other normalisation where appropriate. 431 | 432 | ### Text-to-Speech 433 | 434 | By installing the optional utility 435 | [`espeak`](https://github.com/espeak-ng/espeak-ng/) 436 | on your system, 437 | you will enable Text-to-Speech functionality in `mg`---to read out the 438 | prompts and correct answers to you as you practice! 439 | However, even with `espeak` installed, you have to tell `mg` that your 440 | nodes are to be spoken, and in which language. The same `mg.graph.Node` 441 | class is used for this, with the constructor arguments `speak_str` and 442 | `speak_voice`. 443 | 444 | For example, the following script has number prompts spoken in English 445 | (`espeak` voice `en`) and answers spoken in German (voice `de`). See 446 | `espeak`'s documentation for a list of voices. 447 | 448 | ```python 449 | from mg.graph import Node 450 | # ^^^^^^^^^^^^^^^^^^^^^^^ 451 | 452 | D = ['null','eins','zwei','drei','vier','fünf','sechs','sieben','acht','neun'] 453 | def graph(): 454 | for i, n in enumerate(D): 455 | yield ( 456 | Node(i, speak_str=i, speak_voice="en"), 457 | Node(n, speak_str=n, speak_voice="de"), 458 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 459 | ) 460 | ``` 461 | 462 | ### Duplicate nodes 463 | 464 | Sometimes it will be necessary to have multiple nodes (front or back of 465 | cards) in the same graph (deck) with the same value (specifically, the 466 | same index string). 467 | `mg` can handle this fine, so you don't need to go out of your way to 468 | uniquely index your cards. 469 | If multiple cards would have the same index string 470 | (TODO: should it be *same print string*?) 471 | `mg` will add a unique number (1, 2, ...) to the print string, so that 472 | you can learn which answer to give along with which version of the prompt. 473 | 474 | ## Step 6: Modifying a deck after learning 475 | 476 | To store the memory model parameters, `mg` creates a subdirectory of the 477 | current directory called `mgdata/`. The memory model parameters from cards 478 | in a `.mg` script are stored here inside a file with the same name plus a 479 | `.json` extension (in our example, that's `mgdata/de.digits.mg.json`). 480 | Within are the parameters for each card, keyed by a string made up of the 481 | card's front and back 'index strings' 482 | (see [above](#custom-display-and-comparison-strings)) 483 | and, if present, the optional topics (see [above](#topics)). 484 | 485 | The advantage of this is that you can reorder/insert into the generator 486 | script's output as much as you like and they will still be found. 487 | 488 | But the disadvantage is that if you change the value or even the spelling 489 | of a node, its index into the data file will change, and the memory 490 | parameters already there will be 'orphaned'. 491 | 492 | Sometimes, for example when you are simply correcting a card, 493 | you want to keep the same parameters. 494 | In this case, unfortunately, the only way to achieve this is to go into 495 | data file, find the entry with the old key, and update the key (ensuring 496 | no current `mg` sessions later save over your modifications, such as at 497 | the end of a learning or drill session). 498 | 499 | In other cases, you want to reset the parameters, as if you were deleting 500 | the old card and creating a new one. Then you can either leave the 501 | orphaned entry in the data file (it does no harm other than taking up 502 | space) or you can find and delete it manually. 503 | 504 | > #### Future: 505 | > 506 | > Unfortunately this step is quite clunky. 507 | > I'm planning to incorporate a `checkup` mode into the `mg` script which 508 | > which will help automatically detect and correct these broken references 509 | > to save users from dealing with the data files. 510 | --------------------------------------------------------------------------------