├── .gitignore ├── README.md ├── __init__.py ├── das3h.py ├── data └── dummy │ ├── afm.csv │ ├── das3h.csv │ ├── dash.csv │ ├── irt.csv │ ├── pfa.csv │ ├── preprocessed_data.csv │ └── q_mat.npz ├── dataio.py ├── encode.py ├── encode_parallel.py ├── prepare_data.py ├── requirements.txt ├── slides ├── Makefile ├── biblio.bib ├── figures │ ├── LogoCS1.png │ ├── aip.png │ ├── anki.png │ ├── comp_dim_das3h.pdf │ ├── comp_dim_irt.pdf │ ├── example_das3h_curve_1.pdf │ ├── example_das3h_curve_2.pdf │ ├── fm.pdf │ ├── fm2.pdf │ ├── item_skills_relations.pdf │ ├── leitner.png │ ├── logo_UP_saclay_final.png │ ├── logo_dataia.png │ ├── logo_hadamard.png │ ├── logo_inria.png │ ├── logo_labex_digicosme.png │ ├── logo_lri.jpeg │ ├── logo_paris_saclay_ds.png │ ├── logo_ups.png │ ├── pairwise_comp_all_datasets.pdf │ └── time_windows.pdf ├── jdse.md ├── jdse.pdf ├── lak2020.md ├── lak2020.pdf ├── limsi.md ├── limsi.pdf ├── lip6.md ├── lip6.pdf ├── presentation.md ├── presentation.pdf └── tables │ ├── comp_DAS3H_multiparams.tex │ ├── datasets_caracs.tex │ ├── datasets_caracs_jdse.tex │ ├── dummy_qmat.tex │ └── exp_results.tex ├── split_data.py ├── tests ├── __init__.py ├── test_encode_data.py └── test_this_queue.py └── utils └── this_queue.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | *.py[cod] 4 | .ipynb_checkpoints 5 | *.pptx 6 | *.aux 7 | *.bbl 8 | *.bcf 9 | *.blg 10 | *.log 11 | *.nav 12 | *.out 13 | *.run.xml 14 | *.snm 15 | *.tex.bbl 16 | *.tex.blg 17 | *.toc 18 | *.vrb 19 | *.docx 20 | *.ipynb 21 | 22 | libfm 23 | venv 24 | presentation.tex 25 | limsi.tex 26 | lip6.tex 27 | jdse.tex 28 | lak2020.tex 29 | TODO.md 30 | online_models.py 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## DAS3H 2 | 3 | This repository contains the Python code used for the experiments from our EDM 2019 paper: [_DAS3H: Modeling Student Learning and Forgetting for 4 | Optimally Scheduling Distributed Practice of Skills_](https://arxiv.org/abs/1905.06873). Authors: [Benoît Choffin](https://github.com/BenoitChoffin), [Fabrice Popineau](https://github.com/fpopineau), Yolaine Bourda, and [Jill-Jênn Vie](https://github.com/jilljenn). 5 | 6 | Code for this repository is partly borrowed from [jilljenn](http://jill-jenn.net/)'s [ktm repository](https://github.com/jilljenn/ktm). 7 | 8 | It is recommended to use a virtual environment for running our experiments. In order to use embedding dimensions *d* > 0, [libfm](https://github.com/srendle/libfm) needs to be installed as well: 9 | 10 | ``` 11 | git clone https://github.com/srendle/libfm 12 | cd libfm && git reset --hard 91f8504a15120ef6815d6e10cc7dee42eebaab0f && make all 13 | ``` 14 | 15 | #### Preparing data 16 | 17 | Three open access datasets were used for our experiments: 18 | * [ASSISTments 2012-2013](https://sites.google.com/site/assistmentsdata/home/2012-13-school-data-with-affect) (assistments12) 19 | * Bridge to Algebra 2006-2007 (bridge_algebra06) 20 | * Algebra I 2005-2006 (algebra05) 21 | 22 | The two last datasets come from the [KDD Cup 2010 EDM Challenge](http://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp). Datasets need to be downloaded and put inside each corresponding data folder in data. The main dataset (`train` for KDD Cup) should each time be renamed "data" + corresponding extension name. 23 | 24 | To preprocess each of the datasets: 25 | 26 | ``` 27 | python prepare_data.py --dataset --min_interactions 10 --remove_nan_skills 28 | ``` 29 | 30 | #### Encoding sparse features 31 | 32 | To encode sparse features on which the ML models will train, `encode.py` is used. The preprocessed dataset is automatically selected. For instance, DAS3H is "users, items, skills, wins, attempts, tw_kc": 33 | 34 | ``` 35 | python encode.py --dataset --users --items --skills --wins --attempts --tw_kc 36 | ``` 37 | 38 | | | users | items | skills | wins | fails | attempts | tw_kc | tw_items | 39 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 40 | | DAS3H | x | x | x | x | | x | x | | 41 | | DASH | x | x | | x | | x | | x | 42 | | IRT/MIRT | x | x | | | | | | | 43 | | PFA | | | x | x | x | | | | 44 | | AFM | | | x | | | x | | | 45 | 46 | A faster script for encoding DAS3H time windows is available [here](https://github.com/jilljenn/ktm/blob/master/encode_tw.py#L115). 47 | 48 | #### Running the models 49 | 50 | Code for running the experiments is in `das3h.py`. For instance, for performing cross-validation for DAS3H with embedding dimension *d*=5, on ASSISTments12: 51 | 52 | ``` 53 | python das3h.py data/assistments12/X-uiswat1.npz --dataset assistments12 --d 5 --users --items --skills --wins --attempts --tw_kc 54 | ``` 55 | 56 | ## Appendix: complete metrics tables 57 | Algebra 2005-2006 (PSLC DataShop) dataset: 58 | 59 | | model | dim | AUC | ACC | NLL | 60 | |:-:|:-----:|:-----:|:------:|:----:| 61 | | DAS3H | 0 | **0.826** ± 0.003 | **0.815** ± 0.007 | **0.414** ± 0.011 | 62 | | DAS3H | 5 | **0.818** ± 0.004 | **0.812** ± 0.007 | **0.421** ± 0.011 | 63 | | DAS3H | 20 | **0.817** ± 0.005 | **0.811** ± 0.004 | **0.422** ± 0.007 | 64 | | DASH | 5 | 0.775 ± 0.005 | 0.802 ± 0.010 | 0.458 ± 0.012 | 65 | | DASH | 20 | 0.774 ± 0.005 | 0.803 ± 0.014 | 0.456 ± 0.017 | 66 | | DASH | 0 | 0.773 ± 0.002 | 0.801 ± 0.004 | 0.454 ± 0.006 | 67 | | IRT | 0 | 0.771 ± 0.007 | 0.800 ± 0.009 | 0.456 ± 0.015 | 68 | | MIRTb | 20 | 0.770 ± 0.007 | 0.800 ± 0.006 | 0.460 ± 0.007 | 69 | | MIRTb | 5 | 0.770 ± 0.004 | 0.800 ± 0.008 | 0.459 ± 0.011| 70 | | PFA | 0 | 0.744 ± 0.004 | 0.782 ± 0.003 | 0.481 ± 0.004 | 71 | | AFM | 0 | 0.707 ± 0.005 | 0.774 ± 0.004 | 0.499 ± 0.006 | 72 | | PFA | 20 | 0.670 ± 0.010 | 0.748 ± 0.005 | 1.008 ± 0.047 | 73 | | PFA | 5 | 0.664 ± 0.010 | 0.735 ± 0.013 | 1.107 ± 0.079 | 74 | | AFM | 20 | 0.644 ± 0.005 | 0.750 ± 0.005 | 0.817 ± 0.076 | 75 | | AFM | 5 | 0.640 ± 0.007 | 0.742 ± 0.009 | 0.941 ± 0.056 | 76 | 77 | ASSISTments 2012-2013 dataset: 78 | 79 | | model | dim | AUC | ACC | NLL | 80 | |:-:|:-----:|:-----:|:------:|:----:| 81 | | DAS3H | 5 | **0.744** ± 0.002 | **0.737** ± 0.001 | **0.531** ± 0.001 | 82 | | DAS3H | 20 | **0.740** ± 0.001 | **0.736** ± 0.002 | **0.533** ± 0.003 | 83 | | DAS3H | 0 | **0.739** ± 0.001 | **0.736** ± 0.001 | **0.534** ± 0.002 | 84 | | DASH | 0 | 0.703 ± 0.002 | 0.719 ± 0.003 | 0.557 ± 0.004 | 85 | | DASH | 5 | 0.703 ± 0.001 | 0.720 ± 0.001 | 0.557 ± 0.001 | 86 | | DASH | 20 | 0.703 ± 0.002 | 0.720 ± 0.002 | 0.557 ± 0.002 | 87 | | IRT | 0 | 0.702 ± 0.001 | 0.719 ± 0.001 | 0.558 ± 0.001 | 88 | | MIRTb | 20 | 0.701 ± 0.001 | 0.720 ± 0.001 | 0.558 ± 0.001 | 89 | | MIRTb | 5 | 0.701 ± 0.002 | 0.719 ± 0.001 | 0.558 ± 0.001 | 90 | | PFA | 5 | 0.669 ± 0.002 | 0.709 ± 0.002 | 0.577 ± 0.002 | 91 | | PFA | 20 | 0.668 ± 0.002 | 0.709 ± 0.003 | 0.578 ± 0.003| 92 | | PFA | 0 | 0.668 ± 0.002 | 0.708 ± 0.001 | 0.579 ± 0.002 | 93 | | AFM | 5 | 0.610 ± 0.001 | 0.699 ± 0.002 | 0.597 ± 0.001 | 94 | | AFM | 20 | 0.609 ± 0.001 | 0.699 ± 0.003 | 0.597 ± 0.003 | 95 | | AFM | 0 | 0.608 ± 0.002 | 0.697 ± 0.002 | 0.598 ± 0.002 | 96 | 97 | Bridge to Algebra 2006-2007 (PSLC DataShop): 98 | 99 | | model | dim | AUC | ACC | NLL | 100 | |:-:|:-----:|:-----:|:------:|:----:| 101 | | DAS3H | 5 | **0.791** ± 0.005 | **0.848** ± 0.002 | **0.369** ± 0.005 | 102 | | DAS3H | 0 | **0.790** ± 0.004 | **0.846** ± 0.002 | **0.371** ± 0.004 | 103 | | DAS3H | 20 | 0.776 ± 0.023 | 0.838 ± 0.019 | 0.387 ± 0.027 | 104 | | DASH | 0 | 0.749 ± 0.002 | 0.840 ± 0.005 | 0.393 ± 0.007 | 105 | | DASH | 20 | 0.747 ± 0.003 | 0.840 ± 0.001 | 0.399 ± 0.002 | 106 | | IRT | 0 | 0.747 ± 0.002 | 0.839 ± 0.004 | 0.393 ± 0.007 | 107 | | DASH | 5 | 0.747 ± 0.003 | 0.840 ± 0.002 | 0.399 ± 0.002 | 108 | | MIRTb | 5 | 0.746 ± 0.002 | 0.840 ± 0.004 | 0.398 ± 0.006 | 109 | | MIRTb | 20 | 0.746 ± 0.004 | 0.839 ± 0.005 | 0.399 ± 0.007 | 110 | | PFA | 20 | 0.746 ± 0.003 | 0.839 ± 0.002 | 0.397 ± 0.004 | 111 | | PFA | 5 | 0.744 ± 0.007 | 0.838 ± 0.003 | 0.402 ± 0.007 | 112 | | PFA | 0 | 0.739 ± 0.003 | 0.835 ± 0.005 | 0.406 ± 0.008 | 113 | | AFM | 5 | 0.706 ± 0.002 | 0.836 ± 0.003 | 0.411 ± 0.004 | 114 | | AFM | 20 | 0.706 ± 0.002 | 0.836 ± 0.003 | 0.412 ± 0.004 | 115 | | AFM | 0 | 0.692 ± 0.002 | 0.833 ± 0.004 | 0.423 ± 0.006 | 116 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/__init__.py -------------------------------------------------------------------------------- /das3h.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import KFold 2 | from sklearn.metrics import roc_auc_score, accuracy_score, log_loss 3 | from sklearn.linear_model import LogisticRegression 4 | from scipy.sparse import load_npz, hstack, csr_matrix 5 | from collections import defaultdict 6 | import pandas as pd 7 | import pywFM 8 | import argparse 9 | import numpy as np 10 | import os 11 | import sys 12 | import dataio 13 | import json 14 | 15 | # Location of libFM's compiled binary file 16 | os.environ['LIBFM_PATH'] = os.path.join(os.path.dirname(__file__), 17 | 'libfm/bin/') 18 | 19 | parser = argparse.ArgumentParser(description='Run DAS3H and other student models.') 20 | parser.add_argument('X_file', type=str, nargs='?') 21 | parser.add_argument('--dataset', type=str, nargs='?', default='assistments12') 22 | parser.add_argument('--generalization', type=str, nargs='?', default='strongest') 23 | parser.add_argument('--d', type=int, nargs='?') 24 | parser.add_argument('--C', type=float, nargs='?', default=1.) 25 | parser.add_argument('--grid_search', type=bool, nargs='?', const=True, default=False) 26 | parser.add_argument('--feature_grouping', type=bool, nargs='?', const=True, default=False) 27 | parser.add_argument('--iter', type=int, nargs='?', default=300) 28 | parser.add_argument('--users', type=bool, nargs='?', const=True, default=False) 29 | parser.add_argument('--items', type=bool, nargs='?', const=True, default=False) 30 | parser.add_argument('--skills', type=bool, nargs='?', const=True, default=False) 31 | parser.add_argument('--wins', type=bool, nargs='?', const=True, default=False) 32 | parser.add_argument('--fails', type=bool, nargs='?', const=True, default=False) 33 | parser.add_argument('--attempts', type=bool, nargs='?', const=True, default=False) 34 | parser.add_argument('--tw_kc', type=bool, nargs='?', const=True, default=False) 35 | parser.add_argument('--tw_items', type=bool, nargs='?', const=True, default=False) 36 | options = parser.parse_args() 37 | 38 | experiment_args = vars(options) 39 | today = datetime.datetime.now() # save date of experiment 40 | DATASET_NAME = options.dataset 41 | CSV_FOLDER = dataio.build_new_paths(DATASET_NAME) 42 | 43 | # Build legend 44 | short_legend, full_legend, latex_legend, active_agents = dataio.get_legend(experiment_args) 45 | 46 | EXPERIMENT_FOLDER = os.path.join(CSV_FOLDER, options.generalization, "results", short_legend) 47 | dataio.prepare_folder(EXPERIMENT_FOLDER) 48 | 49 | # Load sparsely encoded datasets 50 | X = csr_matrix(load_npz(options.X_file)) 51 | y = X[:,0].toarray().flatten() 52 | qmat = load_npz(os.path.join(CSV_FOLDER, "q_mat.npz")) 53 | 54 | # FM parameters 55 | params = { 56 | 'task': 'classification', 57 | 'num_iter': options.iter, 58 | 'rlog': True, 59 | 'learning_method': 'mcmc', 60 | 'k2': options.d 61 | } 62 | 63 | if options.grid_search: 64 | dict_of_auc = defaultdict(lambda: []) 65 | dict_of_rmse = defaultdict(lambda: []) 66 | dict_of_nll = defaultdict(lambda: []) 67 | dict_of_acc = defaultdict(lambda: []) 68 | list_of_elapsed_times = [] 69 | 70 | # Define array of grouping variables 71 | if options.feature_grouping: 72 | with open(CSV_FOLDER+'/config.json') as json_file: 73 | config = json.load(json_file) 74 | arr_of_grouping = [] 75 | group_id = 0 76 | if options.users: 77 | arr_of_grouping.extend([group_id for i in range(config["n_users"])]) 78 | group_id += 1 79 | if options.items: 80 | arr_of_grouping.extend([group_id for i in range(config["n_items"])]) 81 | group_id += 1 82 | if options.skills: 83 | arr_of_grouping.extend([group_id for i in range(config["n_skills"])]) 84 | group_id += 1 85 | if options.wins: 86 | if options.tw_kc: # we group all win features together, regardless of the tw 87 | arr_of_grouping.extend([group_id for i in range(5*config["n_skills"])]) 88 | group_id += 1 89 | else: 90 | arr_of_grouping.extend([group_id for i in range(config["n_skills"])]) 91 | group_id += 1 92 | if options.fails: # to change if we allow for fails + tw 93 | arr_of_grouping.extend([group_id for i in range(config["n_skills"])]) 94 | group_id += 1 95 | if options.attempts: 96 | if options.tw_kc: # we group all attempt features together, regardless of the tw 97 | arr_of_grouping.extend([group_id for i in range(5*config["n_skills"])]) 98 | group_id += 1 99 | else: 100 | arr_of_grouping.extend([group_id for i in range(config["n_skills"])]) 101 | group_id += 1 102 | arr_of_grouping = np.array(arr_of_grouping) 103 | 104 | for i, folds_file in enumerate(sorted(glob.glob(os.path.join(CSV_FOLDER, options.generalization, "folds/test_fold*.npy")))): 105 | dataio.prepare_folder(os.path.join(EXPERIMENT_FOLDER, str(i))) 106 | dt = time.time() 107 | test_ids = np.load(folds_file) 108 | train_ids = list(set(range(X.shape[0])) - set(test_ids)) 109 | 110 | X_train = X[train_ids,1:] 111 | y_train = y[train_ids] 112 | X_test = X[test_ids,1:] 113 | y_test = y[test_ids] 114 | 115 | if options.grid_search: 116 | if options.d == 0: 117 | for c in [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]: 118 | print('fitting for c=...'.format(c)) 119 | estimators = [ 120 | ('maxabs', MaxAbsScaler()), 121 | ('lr', LogisticRegression(solver="saga", max_iter=options.iter, C=c)) 122 | ] 123 | pipe = Pipeline(estimators) 124 | pipe.fit(X_train, y_train) 125 | y_pred_test = pipe.predict_proba(X_test)[:, 1] 126 | dict_of_auc[c].append(roc_auc_score(y_test, y_pred_test)) 127 | dict_of_rmse[c].append(np.sqrt(mean_squared_error(y_test, y_pred_test))) 128 | dict_of_nll[c].append(log_loss(y_test, y_pred_test)) 129 | dict_of_acc[c].append(accuracy_score(y_test, np.round(y_pred_test))) 130 | list_of_elapsed_times.append(np.around(time.time() - dt,3)) 131 | else: 132 | for meta_value in ["no grouping","feature grouping"]: 133 | if meta_value == "no grouping": 134 | grouping = None 135 | else: 136 | grouping = arr_of_grouping 137 | print('fitting with {}...'.format(meta_value)) 138 | transformer = MaxAbsScaler().fit(X_train) 139 | fm = pywFM.FM(**params) 140 | model = fm.run(transformer.transform(X_train), y_train, 141 | transformer.transform(X_test), y_test, meta=grouping) 142 | y_pred_test = np.array(model.predictions) 143 | dict_of_auc[meta_value].append(roc_auc_score(y_test, y_pred_test)) 144 | dict_of_rmse[meta_value].append(np.sqrt(mean_squared_error(y_test, y_pred_test))) 145 | dict_of_nll[meta_value].append(log_loss(y_test, y_pred_test)) 146 | dict_of_acc[meta_value].append(accuracy_score(y_test, np.round(y_pred_test))) 147 | list_of_elapsed_times.append(np.around(time.time() - dt,3)) 148 | else: 149 | if options.d == 0: 150 | print('fitting...') 151 | estimators = [ 152 | ('maxabs', MaxAbsScaler()), 153 | ('lr', LogisticRegression(solver="saga", max_iter=options.iter, C=options.C)) 154 | ] 155 | pipe = Pipeline(estimators) 156 | pipe.fit(X_train, y_train) 157 | y_pred_test = pipe.predict_proba(X_test)[:, 1] 158 | else: 159 | if options.feature_grouping: 160 | grouping = arr_of_grouping 161 | else: 162 | grouping = None 163 | transformer = MaxAbsScaler().fit(X_train) 164 | fm = pywFM.FM(**params) 165 | model = fm.run(transformer.transform(X_train), y_train, 166 | transformer.transform(X_test), y_test, meta=grouping) 167 | y_pred_test = np.array(model.predictions) 168 | model.rlog.to_csv(os.path.join(EXPERIMENT_FOLDER, str(i), 'rlog.csv')) 169 | 170 | print(y_test) 171 | print(y_pred_test) 172 | ACC = accuracy_score(y_test, np.round(y_pred_test)) 173 | AUC = roc_auc_score(y_test, y_pred_test) 174 | print('auc', AUC) 175 | NLL = log_loss(y_test, y_pred_test) 176 | print('nll', NLL) 177 | RMSE = np.sqrt(mean_squared_error(y_test,y_pred_test)) 178 | 179 | elapsed_time = np.around(time.time() - dt,3) 180 | # Save experimental results 181 | with open(os.path.join(EXPERIMENT_FOLDER, str(i), 'results.json'), 'w') as f: 182 | f.write(json.dumps({ 183 | 'date': str(today), 184 | 'args': experiment_args, 185 | 'legends': { 186 | 'short': short_legend, 187 | 'full': full_legend, 188 | 'latex': latex_legend 189 | }, 190 | 'metrics': { 191 | 'ACC': ACC, 192 | 'AUC': AUC, 193 | 'NLL': NLL, 194 | 'RMSE': RMSE 195 | }, 196 | 'elapsed_time': elapsed_time 197 | }, indent=4)) 198 | 199 | if options.grid_search: 200 | list_of_hp = [] 201 | list_of_mean_metrics = [] 202 | for hp in dict_of_auc.keys(): 203 | list_of_hp.append(hp) 204 | list_of_mean_metrics.append(np.mean(dict_of_auc[hp])) 205 | optimal_hp = list_of_hp[np.argmax(list_of_mean_metrics)] 206 | print("Optimal set of HP found: {}".format(optimal_hp)) 207 | print("Overall AUC : {}".format(np.around(np.mean(dict_of_auc[optimal_hp]),3))) 208 | print("Overall RMSE : {}".format(np.around(np.mean(dict_of_rmse[optimal_hp]),3))) 209 | print("Overall NLL : {}".format(np.around(np.mean(dict_of_nll[optimal_hp]),3))) 210 | 211 | for i in range(len(list_of_elapsed_times)): 212 | with open(os.path.join(EXPERIMENT_FOLDER, str(i), 'results.json'), 'w') as f: 213 | f.write(json.dumps({ 214 | 'date': str(today), 215 | 'args': experiment_args, 216 | 'legends': { 217 | 'short': short_legend, 218 | 'full': full_legend, 219 | 'latex': latex_legend 220 | }, 221 | 'metrics': { 222 | 'ACC': dict_of_acc[optimal_hp][i], 223 | 'AUC': dict_of_auc[optimal_hp][i], 224 | 'NLL': dict_of_nll[optimal_hp][i], 225 | 'RMSE': dict_of_rmse[optimal_hp][i] 226 | }, 227 | 'elapsed_time': list_of_elapsed_times[i], 228 | 'optimal_hp': optimal_hp 229 | }, indent=4)) 230 | -------------------------------------------------------------------------------- /data/dummy/afm.csv: -------------------------------------------------------------------------------- 1 | s0;s1;s2;a0;a1;a2 2 | 1;0;1;0;0;0 3 | 0;1;1;0;0;0 4 | 0;1;1;0;0;1 5 | 1;0;1;0;0;1 6 | 0;1;0;0;1;0 7 | 1;0;1;1;0;2 8 | 0;1;0;0;2;0 9 | 0;1;0;0;1;0 -------------------------------------------------------------------------------- /data/dummy/das3h.csv: -------------------------------------------------------------------------------- 1 | u0;u1;i0;i1;i2;s0;s1;s2;w0,inf;w0,30;w0,7;w0,1;w0,1/24;w1,inf;w1,30;w1,7;w1,1;w1,1/24;w2,inf;w2,30;w2,7;w2,1;w2,1/24;a0,inf;a0,30;a0,7;a0,1;a0,1/24;a1,inf;a1,30;a1,7;a1,1;a1,1/24;a2,inf;a2,30;a2,7;a2,1;a2,1/24 2 | 1;0;1;0;0;1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0 3 | 0;1;0;0;1;0;1;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0 4 | 1;0;0;0;1;0;1;1;0;0;0;0;0;0;0;0;0;0;1;1;1;1;0;0;0;0;0;0;0;0;0;0;0;1;1;1;1;0 5 | 0;1;1;0;0;1;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0;0 6 | 1;0;0;1;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;1;0;0;0;0;0;0;0 7 | 0;1;1;0;0;1;0;1;1;1;1;0;0;0;0;0;0;0;1;1;1;0;0;1;1;1;0;0;0;0;0;0;0;2;2;1;0;0 8 | 1;0;0;1;0;0;1;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;2;0;0;0;0;0;0;0;0;0 9 | 0;1;0;1;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0;0;0;0;0;0;0 -------------------------------------------------------------------------------- /data/dummy/dash.csv: -------------------------------------------------------------------------------- 1 | u0;u1;i0;i1;i2;winf;w30;w7;w1;w1/24;ainf;a30;a7;a1;a1/24 2 | 1;0;1;0;0;0;0;0;0;0;0;0;0;0;0 3 | 0;1;0;0;1;0;0;0;0;0;0;0;0;0;0 4 | 1;0;0;0;1;0;0;0;0;0;0;0;0;0;0 5 | 0;1;1;0;0;0;0;0;0;0;0;0;0;0;0 6 | 1;0;0;1;0;0;0;0;0;0;0;0;0;0;0 7 | 0;1;1;0;0;1;1;1;0;0;1;1;1;0;0 8 | 1;0;0;1;0;1;0;0;0;0;1;0;0;0;0 9 | 0;1;0;1;0;0;0;0;0;0;0;0;0;0;0 -------------------------------------------------------------------------------- /data/dummy/irt.csv: -------------------------------------------------------------------------------- 1 | u0;u1;i0;i1;i2 2 | 1;0;1;0;0 3 | 0;1;0;0;1 4 | 1;0;0;0;1 5 | 0;1;1;0;0 6 | 1;0;0;1;0 7 | 0;1;1;0;0 8 | 1;0;0;1;0 9 | 0;1;0;1;0 -------------------------------------------------------------------------------- /data/dummy/pfa.csv: -------------------------------------------------------------------------------- 1 | s0;s1;s2;w0;w1;w2;f0;f1;f2 2 | 1;0;1;0;0;0;0;0;0 3 | 0;1;1;0;0;0;0;0;0 4 | 0;1;1;0;0;1;0;0;0 5 | 1;0;1;0;0;0;0;0;1 6 | 0;1;0;0;0;0;0;1;0 7 | 1;0;1;1;0;1;0;0;1 8 | 0;1;0;0;1;0;0;1;0 9 | 0;1;0;0;0;0;0;1;0 -------------------------------------------------------------------------------- /data/dummy/preprocessed_data.csv: -------------------------------------------------------------------------------- 1 | user_id,item_id,timestamp,correct,inter_id 2 | 0,0,0,1,0 3 | 1,2,172800,0,1 4 | 0,2,69120,0,2 5 | 1,0,864000,1,3 6 | 0,1,432000,1,4 7 | 1,0,1036800,0,5 8 | 0,1,3456000,0,6 9 | 1,1,1728000,1,7 10 | -------------------------------------------------------------------------------- /data/dummy/q_mat.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/data/dummy/q_mat.npz -------------------------------------------------------------------------------- /dataio.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import numpy as np 4 | from sklearn.model_selection import KFold 5 | 6 | def prepare_folder(path): 7 | """Create folder from path.""" 8 | if not os.path.isdir(path): 9 | os.makedirs(path) 10 | 11 | def build_new_paths(DATASET_NAME): 12 | """Create dataset folder path name.""" 13 | DATA_FOLDER = './data' 14 | CSV_FOLDER = os.path.join(DATA_FOLDER, DATASET_NAME) 15 | return CSV_FOLDER 16 | 17 | def get_legend(experiment_args): 18 | """Generate legend for an experiment. 19 | 20 | Argument: 21 | experiment_args -- experiment arguments (from ArgumentParser) 22 | 23 | Outputs: 24 | short -- short legend (str) 25 | full -- full legend (str) 26 | latex -- latex legend (str) 27 | active -- list of active variables 28 | """ 29 | dim = experiment_args['d'] 30 | short = '' 31 | full = '' 32 | agents = ['users', 'items', 'skills', 'wins', 'fails', 'attempts'] 33 | active = [] 34 | for agent in agents: 35 | if experiment_args.get(agent): 36 | short += agent[0] 37 | active.append(agent) 38 | if experiment_args.get('tw_kc'): 39 | short += 't1' 40 | active.append("tw_kc") 41 | elif experiment_args.get('tw_items'): 42 | short += 't2' 43 | active.append("tw_items") 44 | short += '_' # add embedding dimension after underscore 45 | short += str(dim) 46 | prefix = '' 47 | if set(active) == {'users', 'items'} and dim == 0: 48 | prefix = 'IRT: ' 49 | elif set(active) == {'users', 'items'} and dim > 0: 50 | prefix = 'MIRTb: ' 51 | elif set(active) == {'skills', 'attempts'}: 52 | prefix = 'AFM: ' 53 | elif set(active) == {'skills', 'wins', 'fails'}: 54 | prefix = 'PFA: ' 55 | elif set(active) == {'users', 'items', 'skills', 'wins', 'attempts', 'tw_kc'}: 56 | prefix = 'DAS3H: ' 57 | elif set(active) == {'users', 'items', 'wins', 'attempts', 'tw_items'}: 58 | prefix = 'DASH: ' 59 | full = prefix + ', '.join(active) + ' d = {:d}'.format(dim) 60 | latex = prefix + ', '.join(active) 61 | return short, full, latex, active 62 | 63 | def save_strongest_folds(full, dataset_name, nb_folds=5): 64 | all_users = full["user_id"].unique() 65 | kfold = KFold(nb_folds, shuffle=True) 66 | for i, (train, test) in enumerate(kfold.split(all_users)): 67 | path = "data/"+dataset_name+"/strongest/folds" 68 | prepare_folder(path) 69 | list_of_test_ids = [] 70 | for user_id in test: 71 | list_of_test_ids += list(full.query('user_id == {}'.format(user_id)).index) 72 | np.save(path+'/test_fold{}.npy'.format(i), np.array(list_of_test_ids)) 73 | 74 | def save_pseudostrong_folds(full, dataset_name, perc_initial=.2, nb_folds=5): 75 | all_users = full["user_id"].unique() 76 | kfold = KFold(nb_folds, shuffle=True) 77 | for i, (train, test) in enumerate(kfold.split(all_users)): 78 | path = "data/"+dataset_name+"/pseudostrong/folds" 79 | prepare_folder(path) 80 | list_of_test_ids = [] 81 | for user_id in test: 82 | fold = full.query('user_id == {}'.format(user_id)).sort_values('timestamp').index 83 | list_of_test_ids += list(fold[round(perc_initial*len(fold)):]) 84 | np.save(path+'/test_fold{}.npy'.format(i), np.array(list_of_test_ids)) 85 | 86 | -------------------------------------------------------------------------------- /encode.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.preprocessing import OneHotEncoder 4 | from utils.this_queue import OurQueue 5 | from collections import defaultdict, Counter 6 | from scipy import sparse 7 | import argparse 8 | import os 9 | from utils.this_queue import OurQueue 10 | 11 | parser = argparse.ArgumentParser(description='Encode datasets.') 12 | parser.add_argument('--dataset', type=str, nargs='?', default='assistments12') 13 | parser.add_argument('--users', type=bool, nargs='?', const=True, default=False) 14 | parser.add_argument('--items', type=bool, nargs='?', const=True, default=False) 15 | parser.add_argument('--skills', type=bool, nargs='?', const=True, default=False) 16 | parser.add_argument('--wins', type=bool, nargs='?', const=True, default=False) 17 | parser.add_argument('--fails', type=bool, nargs='?', const=True, default=False) 18 | parser.add_argument('--attempts', type=bool, nargs='?', const=True, default=False) 19 | parser.add_argument('--tw_kc', type=bool, nargs='?', const=True, default=False) 20 | parser.add_argument('--tw_items', type=bool, nargs='?', const=True, default=False) 21 | options = parser.parse_args() 22 | 23 | NB_OF_TIME_WINDOWS = 5 24 | 25 | def df_to_sparse(df, Q_mat, active_features, tw=None, verbose=True): 26 | """Build sparse features dataset from dense dataset and q-matrix. 27 | 28 | Arguments: 29 | df -- dense dataset, output from one function from prepare_data.py (pandas DataFrame) 30 | Q_mat -- q-matrix, output from one function from prepare_data.py (sparse array) 31 | active_features -- features used to build the dataset (list of strings) 32 | tw -- useful when script is *not* called from command line. 33 | verbose -- if True, print information on the encoding process (bool) 34 | 35 | Output: 36 | sparse_df -- sparse dataset. The 5 first columns of sparse_df are just the same columns as in df. 37 | 38 | Notes: 39 | * tw_kc and tw_items respectively encode time windows features instead of regular counter features 40 | at the skill and at the item level for wins and attempts, as decribed in our paper. As a consequence, 41 | these arguments can only be used along with the wins and/or attempts arguments. With tw_kc, one column 42 | per time window x skill is encoded, whereas with tw_items, one column per time window is encoded (it is 43 | assumed that items share the same time window biases). 44 | """ 45 | 46 | # Transform q-matrix into dictionary 47 | dict_q_mat = {i:set() for i in range(Q_mat.shape[0])} 48 | for elt in np.argwhere(Q_mat == 1): 49 | dict_q_mat[elt[0]].add(elt[1]) 50 | 51 | X={} 52 | if 'skills' in active_features: 53 | X["skills"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 54 | if 'attempts' in active_features: 55 | if tw == "tw_kc": 56 | X["attempts"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]*NB_OF_TIME_WINDOWS))) 57 | elif tw == "tw_items": 58 | X["attempts"] = sparse.csr_matrix(np.empty((0, NB_OF_TIME_WINDOWS))) 59 | else: 60 | X["attempts"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 61 | if 'wins' in active_features: 62 | if tw == "tw_kc": 63 | X["wins"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]*NB_OF_TIME_WINDOWS))) 64 | elif tw == "tw_items": 65 | X["wins"] = sparse.csr_matrix(np.empty((0, NB_OF_TIME_WINDOWS))) 66 | else: 67 | X["wins"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 68 | if 'fails' in active_features: 69 | X["fails"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 70 | 71 | X['df'] = np.empty((0,5)) # Keep track of the original dataset 72 | 73 | q = defaultdict(lambda: OurQueue()) # Prepare counters for time windows 74 | for stud_id in df["user_id"].unique(): 75 | df_stud = df[df["user_id"]==stud_id][["user_id", "item_id", "timestamp", "correct", "inter_id"]].copy() 76 | df_stud.sort_values(by="timestamp", inplace=True) # Sort values 77 | df_stud = np.array(df_stud) 78 | X['df'] = np.vstack((X['df'], df_stud)) 79 | 80 | if 'skills' in active_features: 81 | skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 82 | X['skills'] = sparse.vstack([X["skills"],sparse.csr_matrix(skills_temp)]) 83 | if "attempts" in active_features: 84 | skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 85 | if tw == "tw_kc": 86 | attempts = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS*Q_mat.shape[1])) 87 | for l, (item_id, t) in enumerate(zip(df_stud[:,1], df_stud[:,2])): 88 | for skill_id in dict_q_mat[item_id]: 89 | attempts[l, skill_id*NB_OF_TIME_WINDOWS:(skill_id+1)*NB_OF_TIME_WINDOWS] = np.log(1 + \ 90 | np.array(q[stud_id, skill_id].get_counters(t))) 91 | q[stud_id, skill_id].push(t) 92 | #attempts = np.empty((df_stud.shape[0],0)) 93 | #for l in LIST_OF_BOUNDARIES: 94 | # attempts_temp = np.zeros((df_stud.shape[0],Q_mat.shape[1])) # a_sw array 95 | # for i in range(1,attempts_temp.shape[0]): # 1st line is always full of zeros 96 | # list_of_indices = np.where(df_stud[i,2] - df_stud[:i,2] < l) 97 | # skills_temp = Q_mat[df_stud[:i,1].astype(int)][list_of_indices] 98 | # attempts_temp[i] = np.sum(skills_temp,0) 99 | # skills = Q_mat[df_stud[:,1].astype(int)] 100 | # attempts_temp = np.log(1+np.multiply(attempts_temp,skills)) # only keep KCs involved 101 | # attempts = np.hstack((attempts,attempts_temp)) 102 | elif tw == "tw_items": 103 | attempts = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS)) 104 | for l, (item_id, t) in enumerate(zip(df_stud[:,1], df_stud[:,2])): 105 | attempts[l] = np.log(1 + np.array(q[stud_id, item_id].get_counters(t))) 106 | q[stud_id, item_id].push(t) 107 | #attempts = np.empty((df_stud.shape[0],0)) 108 | #for l in LIST_OF_BOUNDARIES: 109 | # attempts_temp = np.zeros(df_stud.shape[0]) # a_sw array 110 | # for i in range(1,attempts_temp.shape[0]): # 1st line is always full of zeros 111 | # list_of_indices = np.where((df_stud[i,2] - df_stud[:i,2] < l) & (df_stud[i,1] == df_stud[:i,1])) 112 | # attempts_temp[i] = len(list_of_indices[0]) 113 | # attempts_temp = np.log(1+attempts_temp) 114 | # attempts = np.hstack((attempts,attempts_temp.reshape(-1,1))) 115 | else: 116 | attempts = np.multiply(np.cumsum(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)),0)[:-1],skills_temp) 117 | X['attempts'] = sparse.vstack([X['attempts'],sparse.csr_matrix(attempts)]) 118 | if "wins" in active_features: 119 | skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 120 | if tw == "tw_kc": 121 | wins = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS*Q_mat.shape[1])) 122 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 123 | for skill_id in dict_q_mat[item_id]: 124 | wins[l, skill_id*NB_OF_TIME_WINDOWS:(skill_id+1)*NB_OF_TIME_WINDOWS] = np.log(1 + \ 125 | np.array(q[stud_id, skill_id, "correct"].get_counters(t))) 126 | if correct: 127 | q[stud_id, skill_id, "correct"].push(t) 128 | #wins = np.empty((df_stud.shape[0],0)) 129 | #for l in LIST_OF_BOUNDARIES: 130 | # wins_temp = np.zeros((df_stud.shape[0],Q_mat.shape[1])) # c_sw array 131 | # for i in range(1,wins_temp.shape[0]): # 1st line is always full of zeros 132 | # list_of_indices = np.where(df_stud[i,2] - df_stud[:i,2] < l) 133 | # skills_temp = Q_mat[df_stud[:i,1].astype(int)][list_of_indices] 134 | # wins_temp[i] = np.sum(np.multiply(skills_temp,df_stud[:i,3][list_of_indices].reshape(-1,1)),0) 135 | # skills = Q_mat[df_stud[:,1].astype(int)] 136 | # wins_temp = np.log(1+np.multiply(wins_temp,skills)) # only keep KCs involved 137 | # wins = np.hstack((wins,wins_temp)) 138 | elif tw == "tw_items": 139 | wins = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS)) 140 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 141 | wins[l] = np.log(1 + np.array(q[stud_id, item_id, "correct"].get_counters(t))) 142 | if correct: 143 | q[stud_id, item_id, "correct"].push(t) 144 | #wins = np.empty((df_stud.shape[0],0)) 145 | #for l in LIST_OF_BOUNDARIES: 146 | # wins_temp = np.zeros(df_stud.shape[0]) # c_sw array 147 | # for i in range(1,wins_temp.shape[0]): # 1st line is always full of zeros 148 | # list_of_indices = np.where((df_stud[i,2] - df_stud[:i,2] < l) & (df_stud[i,1] == df_stud[:i,1])) 149 | # wins_temp[i] = np.log(1+np.sum(df_stud[:i,3][list_of_indices])) 150 | # wins = np.hstack((wins,wins_temp.reshape(-1,1))) 151 | else: 152 | wins = np.multiply(np.cumsum(np.multiply(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)), 153 | np.hstack((np.array([0]),df_stud[:,3])).reshape(-1,1)),0)[:-1],skills_temp) 154 | X['wins'] = sparse.vstack([X['wins'],sparse.csr_matrix(wins)]) 155 | if "fails" in active_features: 156 | skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 157 | fails = np.multiply(np.cumsum(np.multiply(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)), 158 | np.hstack((np.array([0]),1-df_stud[:,3])).reshape(-1,1)),0)[:-1],skills_temp) 159 | X["fails"] = sparse.vstack([X["fails"],sparse.csr_matrix(fails)]) 160 | if verbose: 161 | print(X["df"].shape) 162 | 163 | onehot = OneHotEncoder() 164 | if 'users' in active_features: 165 | X['users'] = onehot.fit_transform(X["df"][:,0].reshape(-1,1)) 166 | if verbose: 167 | print("Users encoded.") 168 | if 'items' in active_features: 169 | X['items'] = onehot.fit_transform(X["df"][:,1].reshape(-1,1)) 170 | if verbose: 171 | print("Items encoded.") 172 | sparse_df = sparse.hstack([sparse.csr_matrix(X['df']),sparse.hstack([X[agent] for agent in active_features])]).tocsr() 173 | return sparse_df 174 | 175 | if __name__ == "__main__": 176 | os.chdir(os.path.join('data', options.dataset)) 177 | all_features = ['users', 'items', 'skills', 'wins', 'fails', 'attempts'] 178 | active_features = [features for features in all_features if vars(options)[features]] 179 | features_suffix = ''.join([features[0] for features in active_features]) 180 | if vars(options)["tw_kc"]: 181 | features_suffix += 't1' 182 | tw = "tw_kc" 183 | elif vars(options)["tw_items"]: 184 | features_suffix += 't2' 185 | tw = "tw_items" 186 | else: 187 | tw = None 188 | #LIST_OF_BOUNDARIES = [1/24, 1, 7, 30, np.inf] 189 | 190 | df = pd.read_csv('preprocessed_data.csv', sep="\t") 191 | qmat = sparse.load_npz('q_mat.npz').toarray() 192 | X = df_to_sparse(df, qmat, active_features, tw=tw) 193 | sparse.save_npz('X-{:s}.npz'.format(features_suffix), X) 194 | 195 | -------------------------------------------------------------------------------- /encode_parallel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.preprocessing import OneHotEncoder 4 | from utils.this_queue import OurQueue 5 | from collections import defaultdict, Counter 6 | from scipy import sparse 7 | import argparse 8 | import os 9 | from tqdm import tqdm 10 | import time 11 | from joblib import Parallel, delayed 12 | 13 | parser = argparse.ArgumentParser(description='Encode datasets.') 14 | parser.add_argument('--dataset', type=str, nargs='?', default='assistments12') 15 | parser.add_argument('--users', type=bool, nargs='?', const=True, default=False) 16 | parser.add_argument('--items', type=bool, nargs='?', const=True, default=False) 17 | parser.add_argument('--skills', type=bool, nargs='?', const=True, default=False) 18 | parser.add_argument('--wins', type=bool, nargs='?', const=True, default=False) 19 | parser.add_argument('--fails', type=bool, nargs='?', const=True, default=False) 20 | parser.add_argument('--attempts', type=bool, nargs='?', const=True, default=False) 21 | parser.add_argument('--tw_kc', type=bool, nargs='?', const=True, default=False) 22 | parser.add_argument('--tw_items', type=bool, nargs='?', const=True, default=False) 23 | parser.add_argument('--log_counts', type=bool, nargs='?', const=True, default=False) 24 | options = parser.parse_args() 25 | 26 | NB_OF_TIME_WINDOWS = 5 27 | 28 | def encode_single_student(df, stud_id, Q_mat, active_features, NB_OF_TIME_WINDOWS, q, dict_q_mat, tw, 29 | wf_counters, log_counts, X): 30 | df_stud = df[df["user_id"]==stud_id][["user_id", "item_id", "timestamp", "correct"]].copy() 31 | df_stud_indices = np.array(df_stud.index).reshape(-1,1) 32 | df_stud.sort_values(by="timestamp", inplace=True) # Sort values 33 | df_stud = np.array(df_stud) 34 | X['df'] = np.hstack((df_stud[:,[0,1,3]], df_stud_indices)) 35 | 36 | skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 37 | if 'skills' in active_features: 38 | X['skills'] = sparse.csr_matrix(skills_temp) 39 | if "attempts" in active_features: 40 | if tw == "tw_kc": 41 | last_t = -1 ; list_of_skills = [] # in case multiple rows with the same timestamp 42 | attempts = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS*Q_mat.shape[1])) 43 | for l, (item_id, t) in enumerate(zip(df_stud[:,1], df_stud[:,2])): 44 | if (last_t != t) & (len(list_of_skills) > 0): 45 | for skill_id in list_of_skills: 46 | q[stud_id, skill_id].push(t) 47 | list_of_skills = [] 48 | for skill_id in dict_q_mat[item_id]: 49 | attempts[l, skill_id*NB_OF_TIME_WINDOWS:(skill_id+1)*NB_OF_TIME_WINDOWS] = np.log(1 + \ 50 | np.array(q[stud_id, skill_id].get_counters(t))) 51 | if last_t != t: 52 | q[stud_id, skill_id].push(t) 53 | else: 54 | list_of_skills.append(skill_id) 55 | last_t = t 56 | elif tw == "tw_items": 57 | last_t = -1 ; list_of_items = [] # in case multiple rows with the same timestamp 58 | attempts = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS)) 59 | for l, (item_id, t) in enumerate(zip(df_stud[:,1], df_stud[:,2])): 60 | if (last_t != t) & (len(list_of_items) > 0): 61 | for item in list_of_items: 62 | q[stud_id, item].push(t) 63 | list_of_items = [] 64 | attempts[l] = np.log(1 + np.array(q[stud_id, item_id].get_counters(t))) 65 | if last_t != t: 66 | q[stud_id, item_id].push(t) 67 | else: 68 | list_of_items.append(item_id) 69 | last_t = t 70 | else: 71 | last_t = -1 ; list_of_skills = [] # in case multiple rows with the same timestamp 72 | attempts = np.zeros((df_stud.shape[0], Q_mat.shape[1])) 73 | for l, (item_id, t) in enumerate(zip(df_stud[:,1], df_stud[:,2])): 74 | if (last_t != t) & (len(list_of_skills) > 0): 75 | for skill_id in list_of_skills: 76 | wf_counters[stud_id, skill_id] += 1 77 | list_of_skills = [] 78 | for skill_id in dict_q_mat[item_id]: 79 | if log_counts: 80 | attempts[l, skill_id] = np.log(1 + wf_counters[stud_id, skill_id]) 81 | else: 82 | attempts[l, skill_id] = wf_counters[stud_id, skill_id] 83 | if last_t != t: 84 | wf_counters[stud_id, skill_id] += 1 85 | else: 86 | list_of_skills.append(skill_id) 87 | last_t = t 88 | #attempts = np.multiply(np.cumsum(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)),0)[:-1],skills_temp) 89 | X['attempts'] = sparse.csr_matrix(attempts) 90 | if "wins" in active_features: 91 | #skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 92 | if tw == "tw_kc": 93 | last_t = -1 ; list_of_skills = [] # in case multiple rows with the same timestamp 94 | wins = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS*Q_mat.shape[1])) 95 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 96 | if (last_t != t) & (len(list_of_skills) > 0): 97 | for skill_id in list_of_skills: 98 | q[stud_id, skill_id, "correct"].push(t) 99 | list_of_skills = [] 100 | for skill_id in dict_q_mat[item_id]: 101 | wins[l, skill_id*NB_OF_TIME_WINDOWS:(skill_id+1)*NB_OF_TIME_WINDOWS] = np.log(1 + \ 102 | np.array(q[stud_id, skill_id, "correct"].get_counters(t))) 103 | if correct: 104 | if last_t != t: 105 | q[stud_id, skill_id, "correct"].push(t) 106 | else: 107 | list_of_skills.append(skill_id) 108 | last_t = t 109 | elif tw == "tw_items": 110 | last_t = -1 ; list_of_items = [] # in case multiple rows with the same timestamp 111 | wins = np.zeros((df_stud.shape[0], NB_OF_TIME_WINDOWS)) 112 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 113 | if (last_t != t) & (len(list_of_items) > 0): 114 | for item in list_of_items: 115 | q[stud_id, item].push(t) 116 | list_of_items = [] 117 | wins[l] = np.log(1 + np.array(q[stud_id, item_id, "correct"].get_counters(t))) 118 | if correct: 119 | if last_t != t: 120 | q[stud_id, item_id, "correct"].push(t) 121 | else: 122 | list_of_items.append(item_id) 123 | last_t = t 124 | else: 125 | last_t = -1 ; list_of_skills = [] # in case multiple rows with the same timestamp 126 | wins = np.zeros((df_stud.shape[0], Q_mat.shape[1])) 127 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 128 | if (last_t != t) & (len(list_of_skills) > 0): 129 | for skill_id in list_of_skills: 130 | wf_counters[stud_id, skill_id, "correct"] += 1 131 | list_of_skills = [] 132 | for skill_id in dict_q_mat[item_id]: 133 | if log_counts: 134 | wins[l, skill_id] = np.log(1 + wf_counters[stud_id, skill_id, "correct"]) 135 | else: 136 | wins[l, skill_id] = wf_counters[stud_id, skill_id, "correct"] 137 | if correct: 138 | if last_t != t: 139 | wf_counters[stud_id, skill_id, "correct"] += 1 140 | else: 141 | list_of_skills.append(skill_id) 142 | last_t = t 143 | #wins = np.multiply(np.cumsum(np.multiply(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)), 144 | # np.hstack((np.array([0]),df_stud[:,3])).reshape(-1,1)),0)[:-1],skills_temp) 145 | X['wins'] = sparse.csr_matrix(wins) 146 | if "fails" in active_features: 147 | last_t = -1 ; list_of_skills = [] # in case multiple rows with the same timestamp 148 | fails = np.zeros((df_stud.shape[0], Q_mat.shape[1])) 149 | for l, (item_id, t, correct) in enumerate(zip(df_stud[:,1], df_stud[:,2], df_stud[:,3])): 150 | if (last_t != t) & (len(list_of_skills) > 0): 151 | for skill_id in list_of_skills: 152 | wf_counters[stud_id, skill_id, "incorrect"] += 1 153 | list_of_skills = [] 154 | for skill_id in dict_q_mat[item_id]: 155 | fails[l, skill_id] = wf_counters[stud_id, skill_id, "incorrect"] 156 | if not correct: 157 | if last_t != t: 158 | wf_counters[stud_id, skill_id, "incorrect"] += 1 159 | else: 160 | list_of_skills.append(skill_id) 161 | last_t = t 162 | #skills_temp = Q_mat[df_stud[:,1].astype(int)].copy() 163 | #fails = np.multiply(np.cumsum(np.multiply(np.vstack((np.zeros(skills_temp.shape[1]),skills_temp)), 164 | # np.hstack((np.array([0]),1-df_stud[:,3])).reshape(-1,1)),0)[:-1],skills_temp) 165 | X["fails"] = sparse.csr_matrix(fails) 166 | #sparse_df = sparse.hstack([sparse.csr_matrix(X['df']), 167 | # sparse.hstack([X[agent] for agent in active_features if agent not in ["users","items"]])]).tocsr() 168 | #return sparse_df 169 | return X 170 | 171 | def df_to_sparse(df, Q_mat, active_features, tw=None, skip_sucessive=True, log_counts=False): 172 | """Build sparse features dataset from dense dataset and q-matrix. 173 | 174 | Arguments: 175 | df -- dense dataset, output from one function from prepare_data.py (pandas DataFrame) 176 | Q_mat -- q-matrix, output from one function from prepare_data.py (sparse array) 177 | active_features -- features used to build the dataset (list of strings) 178 | tw -- useful when script is *not* called from command line. 179 | 180 | Output: 181 | sparse_df -- sparse dataset. The 5 first columns of sparse_df are just the same columns as in df. 182 | 183 | Notes: 184 | * tw_kc and tw_items respectively encode time windows features instead of regular counter features 185 | at the skill and at the item level for wins and attempts, as decribed in our paper. As a consequence, 186 | these arguments can only be used along with the wins and/or attempts arguments. With tw_kc, one column 187 | per time window x skill is encoded, whereas with tw_items, one column per time window is encoded (it is 188 | assumed that items share the same time window biases). 189 | """ 190 | 191 | # Transform q-matrix into dictionary 192 | dt = time.time() 193 | dict_q_mat = {i:set() for i in range(Q_mat.shape[0])} 194 | for elt in np.argwhere(Q_mat == 1): 195 | dict_q_mat[elt[0]].add(elt[1]) 196 | 197 | X={} 198 | if 'skills' in active_features: 199 | X["skills"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 200 | if 'attempts' in active_features: 201 | if tw == "tw_kc": 202 | X["attempts"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]*NB_OF_TIME_WINDOWS))) 203 | elif tw == "tw_items": 204 | X["attempts"] = sparse.csr_matrix(np.empty((0, NB_OF_TIME_WINDOWS))) 205 | else: 206 | X["attempts"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 207 | if 'wins' in active_features: 208 | if tw == "tw_kc": 209 | X["wins"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]*NB_OF_TIME_WINDOWS))) 210 | elif tw == "tw_items": 211 | X["wins"] = sparse.csr_matrix(np.empty((0, NB_OF_TIME_WINDOWS))) 212 | else: 213 | X["wins"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 214 | if 'fails' in active_features: 215 | X["fails"] = sparse.csr_matrix(np.empty((0, Q_mat.shape[1]))) 216 | 217 | X['df'] = np.empty((0,4)) # Keep only track of line index + user/item id + correctness 218 | 219 | q = defaultdict(lambda: OurQueue()) # Prepare counters for time windows 220 | wf_counters = defaultdict(lambda: 0) 221 | if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 222 | res = Parallel(n_jobs=-1,verbose=10)(delayed(encode_single_student)(df, stud_id, Q_mat, active_features, NB_OF_TIME_WINDOWS, q, dict_q_mat, tw, 223 | wf_counters, log_counts, X) for stud_id in df["user_id"].unique()) 224 | for X_stud in res: 225 | for key in X_stud.keys(): 226 | if key == "df": 227 | X[key] = np.vstack((X[key],X_stud[key])) 228 | else: 229 | X[key] = sparse.vstack([X[key],X_stud[key]]).tocsr() 230 | #sparse_df = sparse.vstack([sparse.csr_matrix(X_stud) for X_stud in res]).tocsr() #df["correct"].values.reshape(-1,1)), 231 | # sparse.hstack([X[agent] for agent in active_features])]).tocsr() 232 | #sparse_df = sparse_df[np.argsort(sparse_df[:,3])] # sort matrix by original index 233 | #X_df = sparse_df[:,:5] 234 | #sparse_df = sparse_df[:,5:] 235 | onehot = OneHotEncoder() 236 | if 'users' in active_features: 237 | if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 238 | X['users'] = onehot.fit_transform(X["df"][:,0].reshape(-1,1)) 239 | else: 240 | X['users'] = onehot.fit_transform(df["user_id"].values.reshape(-1,1)) 241 | if 'items' in active_features: 242 | if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 243 | X['items'] = onehot.fit_transform(X["df"][:,1].reshape(-1,1)) 244 | else: 245 | X['items'] = onehot.fit_transform(df["item_id"].values.reshape(-1,1)) 246 | if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 247 | sparse_df = sparse.hstack([sparse.csr_matrix(X['df'])[:,-2].reshape(-1,1), 248 | sparse.hstack([X[agent] for agent in active_features])]).tocsr() 249 | #sparse_df = sparse_df[np.argsort(sparse.csr_matrix(X["df"])[:,-1])] # sort matrix by original index 250 | sparse_df = sparse_df[np.argsort(X["df"][:,-1])] # sort matrix by original index 251 | else: 252 | sparse_df = sparse.hstack([sparse.csr_matrix(df["correct"].values.reshape(-1,1)), 253 | sparse.hstack([X[agent] for agent in active_features])]).tocsr() 254 | # No need to sort sparse matrix here 255 | print("Preprocessed data in: ", time.time()-dt) 256 | #return sparse_df 257 | #if 'users' in active_features: 258 | # if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 259 | # sparse_df = sparse.hstack([onehot.fit_transform(X_df[:,0].reshape(-1,1))]) 260 | # else: 261 | # X_users = onehot.fit_transform(df["user_id"].values.reshape(-1,1)) 262 | #if 'items' in active_features: 263 | # if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 264 | # X_items = onehot.fit_transform(X_df[:,1].reshape(-1,1)) 265 | # else: 266 | # X_items = onehot.fit_transform(df["item_id"].values.reshape(-1,1)) 267 | #if len(set(active_features).intersection({"skills","attempts","wins","fails"})) > 0: 268 | # sparse_df = sparse.hstack([]) 269 | # sparse_df = sparse.hstack([sparse.csr_matrix(X['df'][:,-2].reshape(-1,1)), 270 | # sparse.hstack([X[agent] for agent in active_features])]).tocsr() 271 | # sparse_df = sparse_df[np.argsort(X["df"][:,-1])] # sort matrix by original index 272 | #else: 273 | # sparse_df = sparse.hstack([sparse.csr_matrix(df["correct"].values.reshape(-1,1)), 274 | # sparse.hstack([X[agent] for agent in active_features])]).tocsr() 275 | # No need to sort sparse matrix here 276 | #print("Preprocessed data in: ", time.time()-dt) 277 | return sparse_df 278 | 279 | if __name__ == "__main__": 280 | dt = time.time() 281 | os.chdir("data/"+options.dataset) 282 | all_features = ['users', 'items', 'skills', 'wins', 'fails', 'attempts'] 283 | active_features = [features for features in all_features if vars(options)[features]] 284 | features_suffix = ''.join([features[0] for features in active_features]) 285 | if vars(options)["tw_kc"]: 286 | features_suffix += 't1' 287 | tw = "tw_kc" 288 | elif vars(options)["tw_items"]: 289 | features_suffix += 't2' 290 | tw = "tw_items" 291 | elif vars(options)["log_counts"]: 292 | features_suffix += 'l' 293 | tw = None 294 | else: 295 | tw = None 296 | 297 | df = pd.read_csv('preprocessed_data.csv') 298 | qmat = sparse.load_npz('q_mat.npz').toarray() 299 | print('Loading data:', df.shape[0], 'samples in ', time.time() - dt, "seconds") 300 | X = df_to_sparse(df, qmat, active_features, tw=tw, log_counts=options.log_counts) 301 | sparse.save_npz('X-{:s}.npz'.format(features_suffix), X) 302 | -------------------------------------------------------------------------------- /prepare_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy import sparse 4 | import argparse 5 | import os 6 | import json 7 | 8 | parser = argparse.ArgumentParser(description='Prepare datasets.') 9 | parser.add_argument('--dataset', type=str, nargs='?', default='assistments12') 10 | parser.add_argument('--min_interactions', type=int, nargs='?', default=10) 11 | parser.add_argument('--remove_nan_skills', type=bool, nargs='?', const=True, default=False) 12 | parser.add_argument('--verbose', type=bool, nargs='?', const=True, default=False) 13 | options = parser.parse_args() 14 | 15 | 16 | def prepare_assistments12(min_interactions_per_user, remove_nan_skills, verbose): 17 | """Preprocess ASSISTments 2012-2013 dataset. 18 | 19 | Arguments: 20 | min_interactions_per_user -- minimum number of interactions per student 21 | remove_nan_skills -- if True, remove interactions with no skill tag 22 | 23 | Outputs: 24 | df -- preprocessed ASSISTments dataset (pandas DataFrame) 25 | Q_mat -- corresponding q-matrix (item-skill relationships sparse array) 26 | """ 27 | df = pd.read_csv("data/assistments12/data.csv") 28 | if verbose: 29 | initial_shape = df.shape[0] 30 | print("Opened ASSISTments 2012 data. Output: {} samples.".format(initial_shape)) 31 | 32 | df["timestamp"] = df["start_time"] 33 | df["timestamp"] = pd.to_datetime(df["timestamp"]) 34 | df["timestamp"] = df["timestamp"] - df["timestamp"].min() 35 | df["timestamp"] = df["timestamp"].apply(lambda x: x.total_seconds()).astype(np.int64) 36 | #df.sort_values(by="timestamp", inplace=True) 37 | #df.reset_index(inplace=True, drop=True) 38 | if remove_nan_skills: 39 | df = df[~df["skill_id"].isnull()] 40 | if verbose: 41 | print("Removed {} samples with NaN skills.".format(df.shape[0]-initial_shape)) 42 | initial_shape = df.shape[0] 43 | else: 44 | df.loc[df["skill_id"].isnull(), "skill_id"] = -1 45 | 46 | df = df[df.correct.isin([0,1])] # Remove potential continuous outcomes 47 | if verbose: 48 | print("Removed {} samples with non-binary outcomes.".format(df.shape[0]-initial_shape)) 49 | initial_shape = df.shape[0] 50 | df['correct'] = df['correct'].astype(np.int32) # Cast outcome as int32 51 | 52 | df = df.groupby("user_id").filter(lambda x: len(x) >= min_interactions_per_user) 53 | if verbose: 54 | print('Removed {} samples (users with less than {} interactions).'.format((df.shape[0]-initial_shape, 55 | min_interactions_per_user))) 56 | initial_shape = df.shape[0] 57 | 58 | df["user_id"] = np.unique(df["user_id"], return_inverse=True)[1] 59 | df["item_id"] = np.unique(df["problem_id"], return_inverse=True)[1] 60 | df["skill_id"] = np.unique(df["skill_id"], return_inverse=True)[1] 61 | 62 | #df.reset_index(inplace=True, drop=True) # Add unique identifier of the row 63 | #df["inter_id"] = df.index 64 | 65 | # Build Q-matrix 66 | Q_mat = np.zeros((df["item_id"].nunique(), df["skill_id"].nunique())) 67 | item_skill = np.array(df[["item_id", "skill_id"]]) 68 | for i in range(len(item_skill)): 69 | Q_mat[item_skill[i,0],item_skill[i,1]] = 1 70 | if verbose: 71 | print("Computed q-matrix. Shape: {}.".format(Q_mat.shape)) 72 | 73 | #df = df[['user_id', 'item_id', 'timestamp', 'correct', "inter_id"]] 74 | df = df[['user_id', 'item_id', 'timestamp', 'correct']] 75 | # Remove potential duplicates 76 | df.drop_duplicates(inplace=True) 77 | if verbose: 78 | print("Removed {} duplicated samples.".format(df.shape[0] - initial_shape)) 79 | initial_shape = df.shape[0] 80 | 81 | df.sort_values(by="timestamp", inplace=True) 82 | df.reset_index(inplace=True, drop=True) 83 | print("Data preprocessing done. Final output: {} samples.".format((df.shape[0]))) 84 | # Save data 85 | sparse.save_npz("data/assistments12/q_mat.npz", sparse.csr_matrix(Q_mat)) 86 | df.to_csv("data/assistments12/preprocessed_data.csv", index=False) 87 | 88 | with open('data/assistments12/config.json', 'w') as f: 89 | f.write(json.dumps({ 90 | 'n_users': df.user_id.nunique(), 91 | 'n_items': df.item_id.nunique(), 92 | 'n_skills': Q_mat.shape[1] 93 | }, indent=4)) 94 | 95 | return df, Q_mat 96 | 97 | def prepare_assistments09(min_interactions_per_user, remove_nan_skills, verbose): 98 | """Preprocess ASSISTments 2009-2010 dataset. 99 | Requires the collapsed version: skill_builder_data_corrected_collapsed.csv 100 | Download it on: https://sites.google.com/site/assistmentsdata/home/assistment-2009-2010-data/skill-builder-data-2009-2010 (the last link) 101 | 102 | Actually thanks to the ASSISTments team, we had access to another file, 103 | timestamp_data.csv, that contains the timestamps. 104 | This extra file does not seem openly available yet. 105 | 106 | Arguments: 107 | min_interactions_per_user -- minimum number of interactions per student 108 | remove_nan_skills -- if True, remove interactions with no skill tag 109 | 110 | Outputs: 111 | df -- preprocessed ASSISTments dataset (pandas DataFrame) 112 | Q_mat -- corresponding q-matrix (item-skill relationships sparse array) 113 | """ 114 | 115 | df = pd.read_csv("data/assistments09/skill_builder_data_corrected_collapsed.csv", 116 | encoding = "latin1", index_col=False) 117 | df.drop(['Unnamed: 0'], axis=1, inplace=True) 118 | if verbose: 119 | initial_shape = df.shape[0] 120 | print("Opened ASSISTments 2009 data. Output: {} samples.".format(initial_shape)) 121 | timestamps = pd.read_csv("data/assistments09/timestamp_data.csv") 122 | 123 | df = df.merge(timestamps, left_on="order_id", right_on="problem_log_id", how="inner") 124 | df["timestamp"] = df["start_time"] 125 | df["timestamp"] = pd.to_datetime(df["timestamp"]) 126 | df["timestamp"] = df["timestamp"] - df["timestamp"].min() 127 | df["timestamp"] = df["timestamp"].apply(lambda x: x.total_seconds()).astype(np.int64) 128 | #df.sort_values(by="timestamp", inplace=True) 129 | #df.reset_index(inplace=True, drop=True) 130 | 131 | # Remove NaN skills 132 | if remove_nan_skills: 133 | initial_shape = df.shape[0] # in case the merge above removed some samples 134 | df = df[~df["skill_id"].isnull()] 135 | if verbose: 136 | print("Removed {} samples with NaN skills.".format(df.shape[0]-initial_shape)) 137 | initial_shape = df.shape[0] 138 | else: 139 | df.loc[df["skill_id"].isnull(), "skill_id"] = -1 140 | 141 | df = df[df.correct.isin([0,1])] # Remove potential continuous outcomes 142 | if verbose: 143 | print("Removed {} samples with non-binary outcomes.".format(df.shape[0]-initial_shape)) 144 | initial_shape = df.shape[0] 145 | df['correct'] = df['correct'].astype(np.int32) # Cast outcome as int32 146 | 147 | df = df.groupby("user_id").filter(lambda x: len(x) >= min_interactions_per_user) 148 | if verbose: 149 | print('Removed {} samples (users with less than {} interactions).'.format((df.shape[0]-initial_shape, 150 | min_interactions_per_user))) 151 | initial_shape = df.shape[0] 152 | 153 | df["item_id"] = np.unique(df["problem_id"], return_inverse=True)[1] 154 | df["user_id"] = np.unique(df["user_id"], return_inverse=True)[1] 155 | 156 | # Build q-matrix 157 | listOfKC = [] 158 | for kc_raw in df["skill_id"].unique(): 159 | for elt in str(kc_raw).split('_'): 160 | listOfKC.append(str(int(float(elt)))) 161 | listOfKC = np.unique(listOfKC) 162 | 163 | dict1_kc = {} ; dict2_kc = {} 164 | for k, v in enumerate(listOfKC): 165 | dict1_kc[v] = k 166 | dict2_kc[k] = v 167 | 168 | # Build Q-matrix 169 | Q_mat = np.zeros((len(df["item_id"].unique()), len(listOfKC))) 170 | item_skill = np.array(df[["item_id","skill_id"]]) 171 | for i in range(len(item_skill)): 172 | splitted_kc = str(item_skill[i,1]).split('_') 173 | for kc in splitted_kc: 174 | Q_mat[item_skill[i,0],dict1_kc[str(int(float(kc)))]] = 1 175 | if verbose: 176 | print("Computed q-matrix. Shape: {}.".format(Q_mat.shape)) 177 | 178 | df = df[['user_id', 'item_id', 'timestamp', 'correct']] 179 | # Remove potential duplicates 180 | df.drop_duplicates(inplace=True) 181 | if verbose: 182 | print("Removed {} duplicated samples.".format(df.shape[0] - initial_shape)) 183 | initial_shape = df.shape[0] 184 | 185 | df.sort_values(by="timestamp", inplace=True) 186 | df.reset_index(inplace=True, drop=True) 187 | print("Data preprocessing done. Final output: {} samples.".format((df.shape[0]))) 188 | 189 | # Save data 190 | sparse.save_npz("data/assistments09/q_mat.npz", sparse.csr_matrix(Q_mat)) 191 | df.to_csv("data/assistments09/preprocessed_data.csv", index=False) 192 | 193 | with open('data/assistments09/config.json', 'w') as f: 194 | f.write(json.dumps({ 195 | 'n_users': df.user_id.nunique(), 196 | 'n_items': df.item_id.nunique(), 197 | 'n_skills': Q_mat.shape[1] 198 | }, indent=4)) 199 | 200 | return df, Q_mat 201 | 202 | def prepare_kddcup10(data_name, min_interactions_per_user, kc_col_name, 203 | remove_nan_skills, verbose, drop_duplicates=True): 204 | """Preprocess KDD Cup 2010 datasets. 205 | 206 | Arguments: 207 | data_name -- "bridge_algebra06" or "algebra05" 208 | min_interactions_per_user -- minimum number of interactions per student 209 | kc_col_name -- Skills id column 210 | remove_nan_skills -- if True, remove interactions with no skill tag 211 | drop_duplicates -- if True, drop duplicates from dataset 212 | 213 | Outputs: 214 | df -- preprocessed ASSISTments dataset (pandas DataFrame) 215 | Q_mat -- corresponding q-matrix (item-skill relationships sparse array) 216 | """ 217 | folder_path = os.path.join("data", data_name) 218 | df = pd.read_csv(folder_path + "/data.txt", delimiter='\t').rename(columns={ 219 | 'Anon Student Id': 'user_id', 220 | 'Problem Name': 'pb_id', 221 | 'Step Name': 'step_id', 222 | kc_col_name: 'kc_id', 223 | 'First Transaction Time': 'timestamp', 224 | 'Correct First Attempt': 'correct' 225 | })[['user_id', 'pb_id', 'step_id' ,'correct', 'timestamp', 'kc_id']] 226 | if verbose: 227 | initial_shape = df.shape[0] 228 | print("Opened KDD Cup 2010 data. Output: {} samples.".format(initial_shape)) 229 | 230 | df["timestamp"] = pd.to_datetime(df["timestamp"]) 231 | df["timestamp"] = df["timestamp"] - df["timestamp"].min() 232 | df["timestamp"] = df["timestamp"].apply(lambda x: x.total_seconds()).astype(np.int64) 233 | #df.sort_values(by="timestamp",inplace=True) 234 | #df.reset_index(inplace=True,drop=True) 235 | 236 | if remove_nan_skills: 237 | df = df[~df["kc_id"].isnull()] 238 | if verbose: 239 | print("Removed {} samples with NaN skills.".format(df.shape[0]-initial_shape)) 240 | initial_shape = df.shape[0] 241 | else: 242 | df.loc[df["kc_id"].isnull(), "kc_id"] = 'NaN' 243 | 244 | df = df[df.correct.isin([0,1])] # Remove potential continuous outcomes 245 | if verbose: 246 | print("Removed {} samples with non-binary outcomes.".format(df.shape[0]-initial_shape)) 247 | initial_shape = df.shape[0] 248 | df['correct'] = df['correct'].astype(np.int32) # Cast outcome as int32 249 | 250 | df = df.groupby("user_id").filter(lambda x: len(x) >= min_interactions_per_user) 251 | if verbose: 252 | print('Removed {} samples (users with less than {} interactions).'.format((df.shape[0]-initial_shape, 253 | min_interactions_per_user))) 254 | initial_shape = df.shape[0] 255 | 256 | # Create variables 257 | df["item_id"] = df["pb_id"]+":"+df["step_id"] 258 | df = df[['user_id', 'item_id', 'kc_id', 'correct', 'timestamp']] 259 | 260 | # Transform ids into numeric 261 | df["item_id"] = np.unique(df["item_id"], return_inverse=True)[1] 262 | df["user_id"] = np.unique(df["user_id"], return_inverse=True)[1] 263 | 264 | #if drop_duplicates: 265 | # df.drop_duplicates(subset=["user_id", "item_id", "timestamp"], inplace=True) 266 | 267 | # Create list of KCs 268 | listOfKC = [] 269 | for kc_raw in df["kc_id"].unique(): 270 | for elt in kc_raw.split('~~'): 271 | listOfKC.append(elt) 272 | listOfKC = np.unique(listOfKC) 273 | 274 | dict1_kc = {} 275 | dict2_kc = {} 276 | for k, v in enumerate(listOfKC): 277 | dict1_kc[v] = k 278 | dict2_kc[k] = v 279 | 280 | #df.reset_index(inplace=True, drop=True) # Add unique identifier of the row 281 | #df["inter_id"] = df.index 282 | 283 | # Build Q-matrix 284 | Q_mat = np.zeros((len(df["item_id"].unique()), len(listOfKC))) 285 | item_skill = np.array(df[["item_id","kc_id"]]) 286 | for i in range(len(item_skill)): 287 | splitted_kc = item_skill[i,1].split('~~') 288 | for kc in splitted_kc: 289 | Q_mat[item_skill[i,0],dict1_kc[kc]] = 1 290 | if verbose: 291 | print("Computed q-matrix. Shape: {}.".format(Q_mat.shape)) 292 | 293 | #df = df[['user_id', 'item_id', 'timestamp', 'correct', 'inter_id']] 294 | df = df[['user_id', 'item_id', 'timestamp', 'correct']] 295 | # Remove potential duplicates 296 | df.drop_duplicates(inplace=True) 297 | if verbose: 298 | print("Removed {} duplicated samples.".format(df.shape[0] - initial_shape)) 299 | initial_shape = df.shape[0] 300 | 301 | df.sort_values(by="timestamp", inplace=True) 302 | df.reset_index(inplace=True, drop=True) 303 | print("Data preprocessing done. Final output: {} samples.".format((df.shape[0]))) 304 | 305 | # Save data 306 | sparse.save_npz(folder_path + "/q_mat.npz", sparse.csr_matrix(Q_mat)) 307 | df.to_csv(folder_path + "/preprocessed_data.csv", index=False) 308 | 309 | with open(folder_path + '/config.json', 'w') as f: 310 | f.write(json.dumps({ 311 | 'n_users': df.user_id.nunique(), 312 | 'n_items': df.item_id.nunique(), 313 | 'n_skills': Q_mat.shape[1] 314 | }, indent=4)) 315 | 316 | return df, Q_mat 317 | 318 | def prepare_robomission(min_interactions_per_user, verbose): 319 | """Preprocess Robomission dataset. 320 | Retrieved from https://github.com/adaptive-learning/adaptive-learning-research/tree/master/data/robomission-2019-12 321 | 322 | Arguments: 323 | min_interactions_per_user -- minimum number of interactions per student 324 | 325 | Outputs: 326 | df -- preprocessed Robomission dataset (pandas DataFrame) 327 | Q_mat -- corresponding q-matrix (item-skill relationships sparse array) 328 | """ 329 | 330 | df = pd.read_csv("data/robomission/attempts.csv") # from robomission-2019-12-10 331 | if verbose: 332 | initial_shape = df.shape[0] 333 | print("Opened Robomission data. Output: {} samples.".format(initial_shape)) 334 | 335 | df["correct"] = df["solved"].astype(np.int32) 336 | df["timestamp"] = df["start"] 337 | df["timestamp"] = pd.to_datetime(df["timestamp"]) 338 | df["timestamp"] = df["timestamp"] - df["timestamp"].min() 339 | df["timestamp"] = df["timestamp"].apply(lambda x: x.total_seconds()).astype(np.int64) 340 | #df.sort_values(by="timestamp",inplace=True) 341 | #df.reset_index(inplace=True,drop=True) 342 | df = df.groupby("student").filter(lambda x: len(x) >= options.min_interactions) 343 | if verbose: 344 | print('Removed {} samples (users with less than {} interactions).'.format((df.shape[0]-initial_shape, 345 | min_interactions_per_user))) 346 | initial_shape = df.shape[0] 347 | 348 | # Change user/item identifiers 349 | df["user_id"] = np.unique(df["student"], return_inverse=True)[1] 350 | df["item_id"] = np.unique(df["problem"], return_inverse=True)[1] 351 | 352 | #df.reset_index(inplace=True, drop=True) # Add unique identifier of the row 353 | #df["inter_id"] = df.index 354 | 355 | #df = df[['user_id', 'item_id', 'timestamp', 'correct', "inter_id"]] 356 | df = df[['user_id', 'item_id', 'timestamp', 'correct']] 357 | # Remove potential duplicates 358 | df.drop_duplicates(inplace=True) 359 | if verbose: 360 | print("Removed {} duplicated samples.".format(df.shape[0] - initial_shape)) 361 | initial_shape = df.shape[0] 362 | 363 | df.sort_values(by="timestamp",inplace=True) 364 | df.reset_index(inplace=True, drop=True) 365 | print("Data preprocessing done. Final output: {} samples.".format((df.shape[0]))) 366 | 367 | # Sort q-matrix by item id 368 | Q_mat = pd.read_csv("data/robomission/qmatrix.csv") 369 | Q_mat.sort_values(by="id",inplace=True) 370 | Q_mat = Q_mat.values[:,1:] 371 | 372 | # Save data 373 | sparse.save_npz("data/robomission/q_mat.npz", sparse.csr_matrix(Q_mat)) 374 | df.to_csv("data/robomission/preprocessed_data.csv", index=False) 375 | 376 | with open('data/robomission/config.json', 'w') as f: 377 | f.write(json.dumps({ 378 | 'n_users': df.user_id.nunique(), 379 | 'n_items': df.item_id.nunique(), 380 | 'n_skills': Q_mat.shape[1] 381 | }, indent=4)) 382 | 383 | return df, Q_mat 384 | 385 | if __name__ == "__main__": 386 | if options.dataset == "assistments12": 387 | df, Q_mat = prepare_assistments12(min_interactions_per_user=options.min_interactions, 388 | remove_nan_skills=options.remove_nan_skills, 389 | verbose=options.verbose) 390 | if options.dataset == "asssistments09": 391 | df, Q_mat = prepare_assistments09(min_interactions_per_user=options.min_interactions, 392 | remove_nan_skills=options.remove_nan_skills, 393 | verbose=options.verbose) 394 | elif options.dataset == "bridge_algebra06": 395 | df, Q_mat = prepare_kddcup10(data_name="bridge_algebra06", 396 | min_interactions_per_user=options.min_interactions, 397 | kc_col_name="KC(SubSkills)", 398 | remove_nan_skills=options.remove_nan_skills, 399 | verbose=options.verbose) 400 | elif options.dataset == "algebra05": 401 | df, Q_mat = prepare_kddcup10(data_name="algebra05", 402 | min_interactions_per_user=options.min_interactions, 403 | kc_col_name="KC(Default)", 404 | remove_nan_skills=options.remove_nan_skills, 405 | verbose=options.verbose) 406 | elif options.dataset == "robomission": 407 | df, Q_mat = prepare_robomission(min_interactions_per_user=options.min_interactions, 408 | verbose=options.verbose) 409 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | scikit-learn 4 | pandas 5 | pywFM -------------------------------------------------------------------------------- /slides/Makefile: -------------------------------------------------------------------------------- 1 | edm: 2 | pandoc -s --bibliography=biblio.bib --biblatex presentation.md -t beamer -o presentation.tex 3 | pdflatex presentation 4 | biber presentation 5 | pdflatex presentation 6 | open presentation.pdf 7 | 8 | edm_simple: 9 | pandoc presentation.md -t beamer -o presentation.pdf 10 | open presentation.pdf 11 | 12 | jdse: 13 | pandoc -s --bibliography=biblio.bib --biblatex jdse.md -t beamer -o jdse.tex 14 | pdflatex jdse 15 | biber jdse 16 | pdflatex jdse 17 | open jdse.pdf 18 | 19 | jdse_simple: 20 | pandoc jdse.md -t beamer -o jdse.pdf 21 | open jdse.pdf 22 | 23 | limsi: 24 | pandoc -s --bibliography=biblio.bib --biblatex limsi.md -t beamer -o limsi.tex 25 | pdflatex limsi 26 | biber limsi 27 | pdflatex limsi 28 | open limsi.pdf 29 | 30 | limsi_simple: 31 | pandoc limsi.md -t beamer -o limsi.pdf 32 | open limsi.pdf 33 | 34 | lak: 35 | pandoc -s --bibliography=biblio.bib --biblatex lak2020.md -t beamer -o lak2020.tex 36 | pdflatex lak2020 37 | biber lak2020 38 | pdflatex lak2020 39 | open lak2020.pdf 40 | 41 | lak_simple: 42 | pandoc lak2020.md -t beamer -o lak2020.pdf 43 | open lak2020.pdf 44 | 45 | lip6: 46 | pandoc -s --bibliography=biblio.bib --biblatex lip6.md -t beamer -o lip6.tex 47 | pdflatex lip6 48 | biber lip6 49 | pdflatex lip6 50 | open lip6.pdf 51 | 52 | lip6_simple: 53 | pandoc lip6.md -t beamer -o lip6.pdf 54 | open lip6.pdf 55 | 56 | clean: 57 | rm -f *.aux *.bbl *.bcf *.blg *.log *.nav *.out *.run.xml *.snm *.synctex \ 58 | *.synctex.gz *.toc *.vrb 59 | rm -f */*.log */*.aux -------------------------------------------------------------------------------- /slides/biblio.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{settles2016trainable, 2 | title={A trainable spaced repetition model for language learning}, 3 | author={Settles, Burr and Meeder, Brendan}, 4 | booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, 5 | volume={1}, 6 | pages={1848--1858}, 7 | year={2016} 8 | } 9 | 10 | @inproceedings{lan2016dealbreaker, 11 | title={Dealbreaker: A nonlinear latent variable model for educational data}, 12 | author={Lan, Andrew and Goldstein, Tom and Baraniuk, Richard and Studer, Christoph}, 13 | booktitle={International Conference on Machine Learning}, 14 | pages={266--275}, 15 | year={2016} 16 | } 17 | 18 | @article{tatsuoka1983rule, 19 | title={Rule space: An approach for dealing with misconceptions based on item response theory}, 20 | author={Tatsuoka, Kikumi K}, 21 | journal={Journal of educational measurement}, 22 | volume={20}, 23 | number={4}, 24 | pages={345--354}, 25 | year={1983}, 26 | publisher={Wiley Online Library} 27 | } 28 | 29 | @article{tabibian2019enhancing, 30 | title={Enhancing human learning via spaced repetition optimization}, 31 | author={Tabibian, Behzad and Upadhyay, Utkarsh and De, Abir and Zarezade, Ali and Sch{\"o}lkopf, Bernhard and Gomez-Rodriguez, Manuel}, 32 | journal={Proceedings of the National Academy of Sciences}, 33 | volume={116}, 34 | number={10}, 35 | pages={3988--3993}, 36 | year={2019}, 37 | publisher={National Acad Sciences} 38 | } 39 | 40 | @inproceedings{reddy2017accelerating, 41 | title={Accelerating Human Learning with Deep Reinforcement Learning}, 42 | author={Reddy, Siddharth and Levine, Sergey and Dragan, Anca}, 43 | booktitle={NIPS’17 Workshop: Teaching Machines, Robots, and Humans}, 44 | year={2017} 45 | } 46 | 47 | @article{pimsleur1967memory, 48 | title={A memory schedule}, 49 | author={Pimsleur, Paul}, 50 | journal={The Modern Language Journal}, 51 | volume={51}, 52 | number={2}, 53 | pages={73--75}, 54 | year={1967}, 55 | publisher={JSTOR} 56 | } 57 | 58 | @article{wozniak1994optimization, 59 | title={Optimization of repetition spacing in the practice of learning}, 60 | author={Wozniak, PA and Gorzelanczyk, Edward J}, 61 | journal={Acta neurobiologiae experimentalis}, 62 | volume={54}, 63 | pages={59--59}, 64 | year={1994}, 65 | publisher={NENCKI INSTITUTE OF EXPERIMENTAL BIOLOGY WARSAW} 66 | } 67 | 68 | @incollection{mozer2016predicting, 69 | title={Predicting and improving memory retention: Psychological theory matters in the big data era}, 70 | author={Mozer, Michael C and Lindsey, Robert V}, 71 | booktitle={Big Data in Cognitive Science}, 72 | pages={43--73}, 73 | year={2016}, 74 | publisher={Psychology Press} 75 | } 76 | 77 | @inproceedings{wilson2016back, 78 | title={Back to the basics: Bayesian extensions of {IRT} outperform neural networks for proficiency estimation}, 79 | author={Kevin H. Wilson and Yan Karklin and Bojian Han and Chaitanya Ekanadham}, 80 | booktitle = {Proceedings of the 9th International Conference on Educational Data Mining, {EDM} 2016}, 81 | pages = {539--544}, 82 | year = {2016} 83 | } 84 | 85 | @inproceedings{pavlik2009performance, 86 | title={Performance Factors Analysis - {A} New Alternative to Knowledge Tracing}, 87 | author={Philip I. Pavlik and Hao Cen and Kenneth R. Koedinger}, 88 | booktitle = {Proceedings of the 14th International Conference on Artificial Intelligence in Education, {AIED} 2009}, 89 | pages={531--538}, 90 | year={2009} 91 | } 92 | 93 | @incollection{roediger2011intricacies, 94 | title={Intricacies of spaced retrieval: A resolution}, 95 | author={Roediger III, Henry L and Karpicke, Jeffrey D}, 96 | booktitle={Successful Remembering and Successful Forgetting}, 97 | pages={41--66}, 98 | year={2011}, 99 | publisher={Psychology Press} 100 | } 101 | 102 | @article{weinstein2018teaching, 103 | title={Teaching the science of learning}, 104 | author={Weinstein, Yana and Madan, Christopher R and Sumeracki, Megan A}, 105 | journal={Cognitive Research: Principles and Implications}, 106 | volume={3}, 107 | number={1}, 108 | pages={2}, 109 | year={2018}, 110 | publisher={SpringerOpen} 111 | } 112 | 113 | @article{cepeda2008spacing, 114 | title={Spacing effects in learning: A temporal ridgeline of optimal retention}, 115 | author={Cepeda, Nicholas J and Vul, Edward and Rohrer, Doug and Wixted, John T and Pashler, Harold}, 116 | journal={Psychological science}, 117 | volume={19}, 118 | number={11}, 119 | pages={1095--1102}, 120 | year={2008}, 121 | publisher={SAGE Publications Sage CA: Los Angeles, CA} 122 | } 123 | 124 | @book{van2013handbook, 125 | title={Handbook of modern item response theory}, 126 | author={van der Linden, Wim J and Hambleton, Ronald K}, 127 | year={2013}, 128 | publisher={Springer Science \& Business Media} 129 | } 130 | 131 | @article{lindsey2014improving, 132 | title={Improving students’ long-term knowledge retention through personalized review}, 133 | author={Lindsey, Robert V and Shroyer, Jeffery D and Pashler, Harold and Mozer, Michael C}, 134 | journal={Psychological science}, 135 | volume={25}, 136 | number={3}, 137 | pages={639--647}, 138 | year={2014}, 139 | publisher={Sage Publications Sage CA: Los Angeles, CA} 140 | } 141 | 142 | @article{barzagar2019distributing, 143 | title={Distributing mathematical practice of third and seventh graders: Applicability of the spacing effect in the classroom}, 144 | author={Barzagar Nazari, Katharina and Ebersbach, Mirjam}, 145 | journal={Applied Cognitive Psychology}, 146 | volume={33}, 147 | number={2}, 148 | pages={288--298}, 149 | year={2019}, 150 | publisher={Wiley Online Library} 151 | } 152 | 153 | @inproceedings{Vie2019, 154 | Author = {{Vie}, Jill-J{\^e}nn and {Kashima}, Hisashi}, 155 | Booktitle = {Proceedings of the 33th {AAAI} Conference on Artificial Intelligence}, 156 | Title = {{Knowledge Tracing Machines: Factorization Machines for Knowledge Tracing}}, 157 | Pages = {to appear}, 158 | Url = {http://arxiv.org/abs/1811.03388}, 159 | Year = 2019} 160 | 161 | @article{feng2009addressing, 162 | title={Addressing the assessment challenge with an online system that tutors as it assesses}, 163 | author={Feng, Mingyu and Heffernan, Neil and Koedinger, Kenneth}, 164 | journal={User Modeling and User-Adapted Interaction}, 165 | volume={19}, 166 | number={3}, 167 | pages={243--266}, 168 | year={2009}, 169 | publisher={Springer} 170 | } 171 | 172 | @article{anderson1997act, 173 | title={{ACT-R}: A theory of higher level cognition and its relation to visual attention}, 174 | author={Anderson, John R and Matessa, Michael and Lebiere, Christian}, 175 | journal={Human-Computer Interaction}, 176 | volume={12}, 177 | number={4}, 178 | pages={439--462}, 179 | year={1997}, 180 | publisher={L. Erlbaum Associates Inc.} 181 | } 182 | 183 | @inproceedings{pashler2009predicting, 184 | title={Predicting the optimal spacing of study: A multiscale context model of memory}, 185 | author={Pashler, Harold and Cepeda, Nicholas and Lindsey, Robert V and Vul, Ed and Mozer, Michael C}, 186 | booktitle={Advances in neural information processing systems}, 187 | pages={1321--1329}, 188 | year={2009} 189 | } 190 | 191 | @article{metzler2009does, 192 | title={Does adaptive training work?}, 193 | author={Metzler-Baddeley, Claudia and Baddeley, Roland J}, 194 | journal={Applied Cognitive Psychology: The Official Journal of the Society for Applied Research in Memory and Cognition}, 195 | volume={23}, 196 | number={2}, 197 | pages={254--266}, 198 | year={2009}, 199 | publisher={Wiley Online Library} 200 | } 201 | 202 | @article{rendle2012factorization, 203 | title={Factorization machines with libfm}, 204 | author={Rendle, Steffen}, 205 | journal={ACM Transactions on Intelligent Systems and Technology (TIST)}, 206 | volume={3}, 207 | number={3}, 208 | pages={57}, 209 | year={2012}, 210 | publisher={ACM} 211 | } 212 | 213 | @article{pedregosa2011scikit, 214 | title={Scikit-learn: Machine learning in {P}ython}, 215 | author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others}, 216 | journal={Journal of machine learning research}, 217 | volume={12}, 218 | number={Oct}, 219 | pages={2825--2830}, 220 | year={2011} 221 | } 222 | 223 | @article{roediger2006test, 224 | title={Test-enhanced learning: Taking memory tests improves long-term retention}, 225 | author={Roediger III, Henry L and Karpicke, Jeffrey D}, 226 | journal={Psychological science}, 227 | volume={17}, 228 | number={3}, 229 | pages={249--255}, 230 | year={2006}, 231 | publisher={SAGE Publications Sage CA: Los Angeles, CA} 232 | } 233 | 234 | @article{wellman1993explaining, 235 | title={Explaining ``explaining away''}, 236 | author={Wellman, Michael P and Henrion, Max}, 237 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 238 | volume={15}, 239 | number={3}, 240 | pages={287--292}, 241 | year={1993}, 242 | publisher={IEEE} 243 | } 244 | 245 | @article{khajah2014maximizing, 246 | title={Maximizing students' retention via spaced review: Practical guidance from computational models of memory}, 247 | author={Khajah, Mohammad M and Lindsey, Robert V and Mozer, Michael C}, 248 | journal={Topics in cognitive science}, 249 | volume={6}, 250 | number={1}, 251 | pages={157--169}, 252 | year={2014}, 253 | publisher={Wiley Online Library} 254 | } 255 | 256 | @article{guzman2007adaptive, 257 | title={Adaptive testing for hierarchical student models}, 258 | author={Guzm{\'a}n, Eduardo and Conejo, Ricardo and P{\'e}rez-de-la-Cruz, Jos{\'e}-Luis}, 259 | journal={User Modeling and User-Adapted Interaction}, 260 | volume={17}, 261 | number={1-2}, 262 | pages={119--157}, 263 | year={2007}, 264 | publisher={Springer} 265 | } 266 | 267 | @article{pashler2007organizing, 268 | title={Organizing Instruction and Study to Improve Student Learning. {IES} Practice Guide. {NCER} 2007-2004.}, 269 | author={Pashler, Harold and Bain, Patrice M and Bottge, Brian A and Graesser, Arthur and Koedinger, Kenneth and McDaniel, Mark and Metcalfe, Janet}, 270 | journal={National Center for Education Research}, 271 | year={2007}, 272 | publisher={ERIC} 273 | } 274 | 275 | @article{carpenter2008effects, 276 | title={The effects of tests on learning and forgetting}, 277 | author={Carpenter, Shana K and Pashler, Harold and Wixted, John T and Vul, Edward}, 278 | journal={Memory \& Cognition}, 279 | volume={36}, 280 | number={2}, 281 | pages={438--448}, 282 | year={2008}, 283 | publisher={Springer} 284 | } 285 | 286 | @article{adesope2017rethinking, 287 | title={Rethinking the use of tests: A meta-analysis of practice testing}, 288 | author={Adesope, Olusola O and Trevisan, Dominic A and Sundararajan, Narayankripa}, 289 | journal={Review of Educational Research}, 290 | volume={87}, 291 | number={3}, 292 | pages={659--701}, 293 | year={2017}, 294 | publisher={Sage Publications Sage CA: Los Angeles, CA} 295 | } 296 | 297 | @article{mettler2016comparison, 298 | title={A comparison of adaptive and fixed schedules of practice.}, 299 | author={Mettler, Everett and Massey, Christine M and Kellman, Philip J}, 300 | journal={Journal of Experimental Psychology: General}, 301 | volume={145}, 302 | number={7}, 303 | pages={897}, 304 | year={2016}, 305 | publisher={American Psychological Association} 306 | } 307 | 308 | @article{vlach2012distributing, 309 | title={Distributing learning over time: The spacing effect in children’s acquisition and generalization of science concepts}, 310 | author={Vlach, Haley A and Sandhofer, Catherine M}, 311 | journal={Child development}, 312 | volume={83}, 313 | number={4}, 314 | pages={1137--1144}, 315 | year={2012}, 316 | publisher={Wiley Online Library} 317 | } 318 | 319 | @inproceedings{roediger2018remembering, 320 | title={Remembering What We Learn}, 321 | author={Roediger III, Henry L and McDermott, Kathleen B}, 322 | booktitle={Cerebrum: the Dana Forum on Brain Science}, 323 | volume={2018}, 324 | year={2018}, 325 | organization={Dana Foundation} 326 | } 327 | 328 | @inproceedings{reddy2016unbounded, 329 | title={Unbounded human learning: Optimal scheduling for spaced repetition}, 330 | author={Reddy, Siddharth and Labutov, Igor and Banerjee, Siddhartha and Joachims, Thorsten}, 331 | booktitle={Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining}, 332 | pages={1815--1824}, 333 | year={2016}, 334 | organization={ACM} 335 | } 336 | 337 | @inproceedings{rafferty2011faster, 338 | title={Faster teaching by {POMDP} planning}, 339 | author={Rafferty, Anna N and Brunskill, Emma and Griffiths, Thomas L and Shafto, Patrick}, 340 | booktitle={International Conference on Artificial Intelligence in Education}, 341 | pages={280--287}, 342 | year={2011}, 343 | organization={Springer} 344 | } 345 | 346 | @inproceedings{lan2016contextual, 347 | title={A Contextual Bandits Framework for Personalized Learning Action Selection}, 348 | author={Andrew S. Lan and Richard G. Baraniuk}, 349 | booktitle={Proceedings of the 9th International Conference on Educational Data Mining, {EDM} 2016}, 350 | pages={424--429}, 351 | year={2016} 352 | } 353 | 354 | @article{corbett1994knowledge, 355 | title={Knowledge tracing: Modeling the acquisition of procedural knowledge}, 356 | author={Corbett, Albert T and Anderson, John R}, 357 | journal={User modeling and user-adapted interaction}, 358 | volume={4}, 359 | number={4}, 360 | pages={253--278}, 361 | year={1994}, 362 | publisher={Springer} 363 | } 364 | 365 | @inproceedings{piech2015deep, 366 | title={Deep knowledge tracing}, 367 | author={Piech, Chris and Bassen, Jonathan and Huang, Jonathan and Ganguli, Surya and Sahami, Mehran and Guibas, Leonidas J and Sohl-Dickstein, Jascha}, 368 | booktitle={Advances in neural information processing systems}, 369 | pages={505--513}, 370 | year={2015} 371 | } 372 | 373 | @inproceedings{zhang2017dynamic, 374 | title={Dynamic key-value memory networks for knowledge tracing}, 375 | author={Zhang, Jiani and Shi, Xingjian and King, Irwin and Yeung, Dit-Yan}, 376 | booktitle={Proceedings of the 26th international conference on World Wide Web}, 377 | pages={765--774}, 378 | year={2017}, 379 | organization={International World Wide Web Conferences Steering Committee} 380 | } 381 | 382 | @inproceedings{ekanadham2017t, 383 | title={{T-SKIRT:} Online Estimation of Student Proficiency in an Adaptive Learning System}, 384 | author={Ekanadham, Chaitanya and Karklin, Yan}, 385 | booktitle={Machine Learning for Education Workshop at ICML}, 386 | year={2015} 387 | } 388 | 389 | @article{leitner1972so, 390 | title={So lernt man lernen [How to learn]}, 391 | author={Leitner, S}, 392 | journal={Freiburg im Breisgau, Germany: Herder}, 393 | year={1972} 394 | } 395 | 396 | @article{ebbinghaus2013memory, 397 | title={Memory: A contribution to experimental psychology}, 398 | author={Ebbinghaus, Hermann}, 399 | journal={Annals of neurosciences}, 400 | volume={20}, 401 | number={4}, 402 | pages={155}, 403 | year={2013}, 404 | publisher={Karger Publishers} 405 | } 406 | 407 | @article{pavlik2008using, 408 | title={Using a model to compute the optimal schedule of practice.}, 409 | author={Pavlik, Philip I and Anderson, John R}, 410 | journal={Journal of Experimental Psychology: Applied}, 411 | volume={14}, 412 | number={2}, 413 | pages={101}, 414 | year={2008}, 415 | publisher={American Psychological Association} 416 | } 417 | 418 | @article{whitehill2018approximately, 419 | title={Approximately optimal teaching of approximately optimal learners}, 420 | author={Whitehill, Jacob and Movellan, Javier}, 421 | journal={IEEE Transactions on Learning Technologies}, 422 | volume={11}, 423 | number={2}, 424 | pages={152--164}, 425 | year={2018}, 426 | publisher={IEEE} 427 | } 428 | 429 | @article{bjork1994memory, 430 | title={Memory and Metamemory Considerations in the Training of Human Beings}, 431 | author={Bjork, Robert A}, 432 | journal={Metacognition: Knowing about knowing}, 433 | volume={185}, 434 | year={1994} 435 | } 436 | 437 | @inproceedings{yudelson2013individualized, 438 | title={Individualized bayesian knowledge tracing models}, 439 | author={Yudelson, Michael V and Koedinger, Kenneth R and Gordon, Geoffrey J}, 440 | booktitle={International Conference on Artificial Intelligence in Education}, 441 | pages={171--180}, 442 | year={2013}, 443 | organization={Springer} 444 | } 445 | 446 | @inproceedings{wilson2016estimating, 447 | title={Estimating student proficiency: Deep learning is not the panacea}, 448 | author={Wilson, Kevin H and Xiong, Xiaolu and Khajah, Mohammad and Lindsey, Robert V and Zhao, Siyuan and Karklin, Yan and Van Inwegen, Eric G and Han, Bojian and Ekanadham, Chaitanya and Beck, Joseph E and others}, 449 | booktitle={Neural Information Processing Systems, Workshop on Machine Learning for Education}, 450 | pages={3}, 451 | year={2016} 452 | } 453 | 454 | @inproceedings{cen2006learning, 455 | title={Learning factors analysis--a general method for cognitive model evaluation and improvement}, 456 | author={Cen, Hao and Koedinger, Kenneth and Junker, Brian}, 457 | booktitle={International Conference on Intelligent Tutoring Systems}, 458 | pages={164--175}, 459 | year={2006}, 460 | organization={Springer} 461 | } 462 | 463 | @misc{dataAlgebra, 464 | author={Stamper, J. and Niculescu-Mizil, A. and Ritter, S. and Gordon, G.J. and Koedinger, K.R.}, 465 | title={Algebra {I} 2005-2006 and {B}ridge to {A}lgebra 2006-2007. {D}evelopment data sets from {KDD} {C}up 2010 {E}ducational {D}ata {M}ining {C}hallenge. }, 466 | howpublished={Find them at \url{http://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp}} 467 | } 468 | 469 | 470 | @misc{dataBridgeAlgebra, 471 | author={Stamper, J. and Niculescu-Mizil, A. and Ritter, S. and Gordon, G.J. and Koedinger, K.R.}, 472 | title={Bridge to Algebra 2006-2007. {C}hallenge data set from {KDD} {C}up 2010 {E}ducational {D}ata {M}ining {C}hallenge. }, 473 | howpublished={Find it at \url{http://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp}} 474 | } 475 | 476 | @misc{dataAlgebra05, 477 | author={Stamper, J. and Niculescu-Mizil, A. and Ritter, S. and Gordon, G.J. and Koedinger, K.R.}, 478 | title={Algebra {I} 2005-2006. {C}hallenge data set from {KDD} {C}up 2010 {E}ducational {D}ata {M}ining {C}hallenge.}, 479 | howpublished={Find it at \url{http://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp}} 480 | } 481 | 482 | @article{galyardt2015move, 483 | title={Move your lamp post: Recent data reflects learner knowledge better than older data}, 484 | author={Galyardt, April and Goldin, Ilya}, 485 | journal={Journal of Educational Data Mining}, 486 | volume={7}, 487 | number={2}, 488 | pages={83--108}, 489 | year={2015}, 490 | publisher={International Working Group on Educational Data Mining} 491 | } 492 | 493 | @article{gong2011construct, 494 | title={How to construct more accurate student models: Comparing and optimizing knowledge tracing and performance factor analysis}, 495 | author={Gong, Yue and Beck, Joseph E and Heffernan, Neil T}, 496 | journal={International Journal of Artificial Intelligence in Education}, 497 | volume={21}, 498 | number={1-2}, 499 | pages={27--46}, 500 | year={2011}, 501 | publisher={IOS Press} 502 | } 503 | 504 | @article{pan2018transfer, 505 | title={Transfer of test-enhanced learning: Meta-analytic review and synthesis.}, 506 | author={Pan, Steven C and Rickard, Timothy C}, 507 | journal={Psychological bulletin}, 508 | volume={144}, 509 | number={7}, 510 | pages={710}, 511 | year={2018}, 512 | publisher={American Psychological Association} 513 | } 514 | 515 | @article{walsh2018mechanisms, 516 | title={Mechanisms underlying the spacing effect in learning: A comparison of three computational models.}, 517 | author={Walsh, Matthew M and Gluck, Kevin A and Gunzelmann, Glenn and Jastrzembski, Tiffany and Krusmark, Michael and Myung, Jay I and Pitt, Mark A and Zhou, Ran}, 518 | journal={Journal of Experimental Psychology: General}, 519 | volume={147}, 520 | number={9}, 521 | pages={1325}, 522 | year={2018}, 523 | publisher={American Psychological Association} 524 | } 525 | 526 | @inproceedings{minn2018deep, 527 | title={Deep knowledge tracing and dynamic student classification for knowledge tracing}, 528 | author={Minn, Sein and Yu, Yi and Desmarais, Michel C and Zhu, Feida and Vie, Jill-J{\^e}nn}, 529 | booktitle={2018 IEEE International Conference on Data Mining (ICDM)}, 530 | pages={1182--1187}, 531 | year={2018}, 532 | organization={IEEE} 533 | } -------------------------------------------------------------------------------- /slides/figures/LogoCS1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/LogoCS1.png -------------------------------------------------------------------------------- /slides/figures/aip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/aip.png -------------------------------------------------------------------------------- /slides/figures/anki.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/anki.png -------------------------------------------------------------------------------- /slides/figures/comp_dim_das3h.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/comp_dim_das3h.pdf -------------------------------------------------------------------------------- /slides/figures/comp_dim_irt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/comp_dim_irt.pdf -------------------------------------------------------------------------------- /slides/figures/example_das3h_curve_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/example_das3h_curve_1.pdf -------------------------------------------------------------------------------- /slides/figures/example_das3h_curve_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/example_das3h_curve_2.pdf -------------------------------------------------------------------------------- /slides/figures/fm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/fm.pdf -------------------------------------------------------------------------------- /slides/figures/fm2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/fm2.pdf -------------------------------------------------------------------------------- /slides/figures/item_skills_relations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/item_skills_relations.pdf -------------------------------------------------------------------------------- /slides/figures/leitner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/leitner.png -------------------------------------------------------------------------------- /slides/figures/logo_UP_saclay_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_UP_saclay_final.png -------------------------------------------------------------------------------- /slides/figures/logo_dataia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_dataia.png -------------------------------------------------------------------------------- /slides/figures/logo_hadamard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_hadamard.png -------------------------------------------------------------------------------- /slides/figures/logo_inria.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_inria.png -------------------------------------------------------------------------------- /slides/figures/logo_labex_digicosme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_labex_digicosme.png -------------------------------------------------------------------------------- /slides/figures/logo_lri.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_lri.jpeg -------------------------------------------------------------------------------- /slides/figures/logo_paris_saclay_ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_paris_saclay_ds.png -------------------------------------------------------------------------------- /slides/figures/logo_ups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/logo_ups.png -------------------------------------------------------------------------------- /slides/figures/pairwise_comp_all_datasets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/pairwise_comp_all_datasets.pdf -------------------------------------------------------------------------------- /slides/figures/time_windows.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/figures/time_windows.pdf -------------------------------------------------------------------------------- /slides/jdse.md: -------------------------------------------------------------------------------- 1 | % DAS3H: Modeling Student Learning and Forgetting for Optimally Scheduling Distributed Practice of Skills 2 | % \alert{Benoît Choffin}, Fabrice Popineau, Yolaine Bourda & Jill-Jênn Vie\newline\newline\scriptsize LRI/CentraleSupélec - University of Paris-Saclay | RIKEN AIP 3 | % JDSE 2019, Gif-sur-Yvette | September 12, 2019 4 | --- 5 | theme: Frankfurt 6 | institute: \includegraphics[height=1.2cm]{figures/logo_lri.jpeg} \quad \includegraphics[height=1.3cm]{figures/LogoCS1.png} \quad \includegraphics[height=1.3cm]{figures/logo_ups.png} \quad \includegraphics[height=1cm]{figures/logo_labex_digicosme.png} \quad \includegraphics[height=1cm]{figures/logo_dataia.png} \quad \includegraphics[height=1cm]{figures/logo_hadamard.png} \quad \includegraphics[height=1cm]{figures/logo_paris_saclay_ds.png} 7 | section-titles: false 8 | handout: true 9 | biblio-style: authoryear 10 | header-includes: 11 | - \usepackage{booktabs} 12 | - \usepackage{makecell} 13 | - \usepackage{multicol} 14 | - \usepackage{multirow} 15 | - \usepackage{subfig} 16 | - \usepackage{bm} 17 | - \DeclareMathOperator\logit{logit} 18 | biblatexoptions: 19 | - maxbibnames=99 20 | - maxcitenames=5 21 | --- 22 | 23 | # Introduction 24 | 25 | ## Mitigating human forgetting with spaced repetition 26 | 27 | * Human learners face a constant trade-off between **acquiring new knowledge** and **reviewing old knowledge** \bigskip 28 | * Cognitive science provides simple + robust learning strategies for improving LT memory 29 | * \alert{Spaced repetition} 30 | * \alert{Testing} \bigskip 31 | * Can we do better? **Yes**, by providing students with an _adaptive_ and _personalized_ spacing scheduler. 32 | 33 | ## Mitigating human forgetting with spaced repetition 34 | 35 | \raisebox{.5cm}{\includegraphics[width=0.5\textwidth]{figures/leitner.png}}\includegraphics[width=0.5\textwidth]{figures/anki.png} 36 | 37 | Ex. select the item whose memory strength is closest to a threshold $\theta$ [\cite{lindsey2014improving}] $\rightarrow$ "almost forgotten" 38 | 39 | ## Beyond flashcard memorization 40 | 41 | **Problem**: these algorithms are designed for optimizing _pure memorization_ (of facts, vocabulary,...) 42 | 43 | * In real-world educational settings, students also need to learn to master and remember a set of **skills** 44 | 45 | * In that case, specific items are the only way to practice one or multiple skills because _we do not have to memorize the content directly_ 46 | 47 | * Traditional adaptive spacing schedulers are **not applicable for learning skills** 48 | 49 | ## Extension to skill practice and review 50 | 51 | \begin{minipage}{0.4\linewidth} 52 | \textcolor{blue!80}{Item}-\textcolor{green!50!black}{skill} relationships require expert labor and are synthesized inside a binary q-matrix $\rightarrow$ 53 | \end{minipage}\begin{minipage}{0.6\linewidth} 54 | \scriptsize 55 | \input{tables/dummy_qmat.tex} 56 | \end{minipage} 57 | 58 | \centering 59 | \includegraphics[width=10cm]{figures/item_skills_relations.pdf} 60 | 61 | ## Limitations of student models 62 | 63 | We need to be able to infer skill memory strength and dynamics, however in the student modeling literature: 64 | 65 | * some models leverage item-skills relationships 66 | * some others incorporate forgetting 67 | 68 | But none does both! 69 | 70 | ## Our contribution 71 | 72 | We take a model-based approach for this task. 73 | 74 | 1. Traditional adaptive spacing algorithms can be extended to review and practice skills (not only flashcards).\bigskip 75 | 2. We developed a new student _learning_ and _forgetting_ model that leverages item-skill relationships: \alert{\textbf{DAS3H}}. 76 | * DAS3H outperforms 4 SOTA student models on 3 datasets. 77 | * Incorporating skill info + forgetting effect improves over models that consider one or the other. 78 | * Using precise temporal information on past skill practice + assuming different learning/forgetting curves \alert{for different skills} improves performance. 79 | 80 | ## Outline 81 | 82 | 1. Our model DAS3H\bigskip 83 | 2. Experiments\bigskip 84 | 3. Conclusion 85 | 86 | # DAS3H 87 | 88 | ## DASH 89 | 90 | $\rightarrow$ DASH = item **D**ifficulty, student **A**bility, and **S**tudent **H**istory 91 | 92 | DASH [\cite{lindsey2014improving}] bridges the gap between _Factor Analysis models_ and _memory models_: 93 | 94 | $$\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma(\alpha_s - \delta_j + h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}))$$ 95 | 96 | where: 97 | 98 | * $Y_{s,j,t}$ binary correctness of student $s$ answering item $j$ at time $t$; 99 | * $\sigma$ logistic function; 100 | * $\alpha_s$ ability of student $s$; 101 | * $\delta_j$ difficulty of item $j$; 102 | * $h_{\theta}$ summarizes the effect of the $l-1$ previous attempts of $s$ on $j$ at times $\mathrm{t}_{s,j,1:l-1}$ + the binary outcomes $\mathrm{y}_{s,j,1:l-1}$. 103 | 104 | ## DASH 105 | 106 | Lindsey et al. chose: 107 | \begin{align*} 108 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \sum_{w=0}^{W-1} & \theta_{2w+1}\log(1+c_{s,j,w}) \\ 109 | &- \theta_{2w+2}\log(1+a_{s,j,w}) 110 | \end{align*} 111 | 112 | where: 113 | 114 | * $w$ indexes a set of expanding \alert{time windows}; 115 | * $c_{s,j,w}$ number of correct answers of $s$ on $j$ in time window $w$; 116 | * $a_{s,j,w}$ number of attempts of $s$ on $j$ in time window $w$; 117 | * $\theta$ is _learned_ by DASH. 118 | 119 | ## Our model DAS3H 120 | 121 | We extend DASH in **3 ways**: 122 | \begin{enumerate} 123 | \item Extension to handle multiple skills tagging: new temporal module $h_{\theta}$ that also takes the multiple skills into account. 124 | \begin{itemize} 125 | \item Influence of the temporal distribution of past attempts and outcomes can differ from one skill to another. 126 | \end{itemize} 127 | \item Estimation of easiness parameters for \textit{each} item $j$ and skill $k$; 128 | \item Use of KTMs [\cite{Vie2019}] instead of mere logistic regression for multidimensional feature embeddings and pairwise interactions. 129 | \end{enumerate} 130 | 131 | ## Our model DAS3H 132 | 133 | $\rightarrow$ DAS3H = item **D**ifficulty, student **A**bility, **S**kill and **S**tudent **S**kill practice **H**istory 134 | 135 | For an embedding dimension of $d=0$, DAS3H is: 136 | 137 | $\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma (\alpha_s - \delta_j + \underbrace{\alert{\sum_{k \in KC(j)} \beta_k}}_{\text{skill easiness biases}} +h_{\theta}\left(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}\right))$. 138 | 139 | We choose: 140 | \begin{align*} 141 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \alert{\sum_{k \in KC(j)}}&\sum_{w=0}^{W-1}\theta_{\alert{k},2w+1}\log(1+c_{s,\alert{k},w})\\ 142 | &- \theta_{\alert{k},2w+2}\log(1+a_{s,\alert{k},w}). 143 | \end{align*} 144 | 145 | $\rightarrow$ Now, $h_{\theta}$ can be seen as a sum of _skill_ memory strengths! 146 | 147 | # Experiments 148 | 149 | ## Experiments 150 | 151 | 1. Experimental setting 152 | 153 | 2. Contenders \& datasets 154 | 155 | 3. Main results 156 | 157 | 4. Further analyses 158 | 159 | ## Experimental setting 160 | 161 | * **5-fold cross-validation** at the student level: predicting binary outcomes on \alert{unseen} students (_strong generalization_)\bigskip 162 | * Distributional assumptions to \alert{avoid overfitting}: 163 | * When $d=0$: L2 regularization/$\mathcal{N}(0,1)$ prior 164 | * When $d > 0$: hierarchical distributional scheme\bigskip 165 | * Same time windows as Lindsey et al.: {1/24,1,7,30,+$\infty$} 166 | 167 | ## Contenders \& datasets 168 | 169 | * 5 contenders (\alert{DAS3H}, DASH, IRT/MIRT, PFA, AFM) $\times$ 3 embedding dimensions (0, 5 \& 20) 170 | 171 | \tiny 172 | | | users | items | skills | wins | fails | attempts | tw [KC] | tw [items] | 173 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 174 | | **DAS3H** | x | x | x | x | | x | x | | 175 | | DASH | x | x | | x | | x | | x | 176 | | IRT/MIRT | x | x | | | | | | | 177 | | PFA | | | x | x | x | | | | 178 | | AFM | | | x | | | x | | | 179 | 180 | \normalsize 181 | * 3 datasets: ASSISTments 2012-2013, Bridge to Algebra 2006-2007 \& Algebra I 2005-2006 (KDD Cup 2010) 182 | * Data consists of logs of student-item interactions on 2 ITS 183 | 184 | \tiny 185 | \input{tables/datasets_caracs_jdse.tex} 186 | 187 | ## Main results 188 | 189 | \input{tables/exp_results.tex} 190 | $\rightarrow$ On every dataset, **DAS3H outperforms** the other models (between +0.04 and +0.05 AUC compared to DASH). 191 | 192 | ## Importance of time windows 193 | 194 | \centering 195 | \begin{figure} 196 | \includegraphics[width=5.5cm]{figures/pairwise_comp_all_datasets.pdf} 197 | \caption{AUC comparison on DAS3H \textit{with} and \textit{without} time windows features (all datasets, 5-fold cross-validation).} 198 | \end{figure} 199 | \vspace{-3mm} 200 | \raggedright 201 | Without time windows, $h_{\theta}$ counts past wins and attempts in DAS3H. 202 | $\rightarrow$ Using \alert{temporal distribution of past skill practice} instead of simple win/fail counters improves AUC performance: the _**when**_ matters. 203 | 204 | ## Importance of different learning/forgetting curves per skill 205 | \scriptsize 206 | \input{tables/comp_DAS3H_multiparams.tex} 207 | 208 | \normalsize 209 | $\rightarrow$ Assuming **different learning and forgetting curves for different skills** in DAS3H consistently yields better predictive power: some skills are easier to learn and slower to forget. 210 | 211 | # Conclusion 212 | 213 | ## In a nutshell 214 | 215 | * Human forgetting is _ubiquitous_ but luckily: 216 | * \alert{Cognitive science} gives us efficient and simple learning strategies 217 | * \alert{ML} can build us tools to **personalize these strategies** and further improve LT memory retention 218 | 219 | * Adaptive spacing algorithms have been focusing on _pure memorization_ (e.g. vocabulary learning) 220 | * They can be used for \alert{optimizing practice and retention of skills} 221 | 222 | * Our student model **DAS3H** 223 | * incorporates information on _skills_ **and** _forgetting_ to predict learner performance 224 | * shows higher predictive power than other SOTA student models 225 | * fits our model-based approach for optimally scheduling skill review 226 | 227 | ## Thanks for your attention! 228 | 229 | A longer version of our paper is available at: 230 | 231 | \centering 232 | `https://arxiv.org/abs/1905.06873` 233 | 234 | \raggedright 235 | Python code is freely available on my GitHub page: 236 | 237 | \centering 238 | `https://github.com/BenoitChoffin/das3h` ! 239 | 240 | \raggedright 241 | To send me questions about our paper or my research work: 242 | 243 | \centering 244 | `benoit.choffin@lri.fr` 245 | 246 | -------------------------------------------------------------------------------- /slides/jdse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/jdse.pdf -------------------------------------------------------------------------------- /slides/lak2020.md: -------------------------------------------------------------------------------- 1 | % DAS3H: Modeling Student Learning and Forgetting for Optimally Scheduling Distributed Practice of Skills 2 | % \alert{Benoît Choffin}, Fabrice Popineau, Yolaine Bourda & Jill-Jênn Vie\newline\newline\scriptsize LRI/CentraleSupélec - University of Paris-Saclay | Inria Lille 3 | % LAK 2020, Best Paper at EDM 2019 | March 25, 2020 4 | --- 5 | theme: Frankfurt 6 | institute: \includegraphics[height=1.2cm]{figures/logo_lri.jpeg} \quad \includegraphics[height=1.3cm]{figures/LogoCS1.png} \quad \includegraphics[height=1.3cm]{figures/logo_UP_saclay_final.png} \quad \includegraphics[height=1cm]{figures/logo_inria.png} 7 | section-titles: false 8 | handout: true 9 | biblio-style: authoryear 10 | header-includes: 11 | - \usepackage{booktabs} 12 | - \usepackage{makecell} 13 | - \usepackage{multicol} 14 | - \usepackage{multirow} 15 | - \usepackage{subfig} 16 | - \usepackage{bm} 17 | - \DeclareMathOperator\logit{logit} 18 | biblatexoptions: 19 | - maxbibnames=99 20 | - maxcitenames=5 21 | --- 22 | 23 | # Introduction 24 | 25 | ## Mitigating human forgetting with spaced repetition 26 | 27 | * Human learners face a constant trade-off between **acquiring new knowledge** and **reviewing old knowledge** \bigskip 28 | * Cognitive science provides simple + robust learning strategies for improving LT memory 29 | * \alert{Spaced repetition} 30 | * \alert{Testing} \bigskip 31 | * Can we do better? **Yes**, by providing students with an _adaptive_ and _personalized_ spacing scheduler. 32 | 33 | ## Mitigating human forgetting with spaced repetition 34 | 35 | \raisebox{.5cm}{\includegraphics[width=0.5\textwidth]{figures/leitner.png}}\includegraphics[width=0.5\textwidth]{figures/anki.png} 36 | 37 | Ex. select the item whose memory strength is closest to a threshold $\theta$ [\cite{lindsey2014improving}] $\rightarrow$ "almost forgotten" 38 | 39 | ## Beyond flashcard memorization 40 | 41 | **Problem**: these algorithms are designed for optimizing _pure memorization_ (of facts, vocabulary,...) 42 | 43 | * In real-world educational settings, students also need to learn to master and remember a set of **skills** 44 | 45 | * In that case, specific items are the only way to practice one or multiple skills because _we do not have to memorize the content directly_ 46 | 47 | * Traditional adaptive spacing schedulers are **not applicable for learning skills** 48 | 49 | ## Extension to skill practice and review 50 | 51 | \begin{minipage}{0.4\linewidth} 52 | \textcolor{blue!80}{Item}-\textcolor{green!50!black}{skill} relationships require expert labor and are synthesized inside a binary q-matrix $\rightarrow$ 53 | \end{minipage}\begin{minipage}{0.6\linewidth} 54 | \scriptsize 55 | \input{tables/dummy_qmat.tex} 56 | \end{minipage} 57 | 58 | \centering 59 | \includegraphics[width=10cm]{figures/item_skills_relations.pdf} 60 | 61 | ## Limitations of student models 62 | 63 | We need to be able to infer skill memory strength and dynamics, however in the student modeling literature: 64 | 65 | * some models leverage item-skills relationships 66 | * some others incorporate forgetting 67 | 68 | But none does both! 69 | 70 | ## Our contribution 71 | 72 | We take a model-based approach for this task. 73 | 74 | 1. Traditional adaptive spacing algorithms can be extended to review and practice skills (not only flashcards).\bigskip 75 | 2. We developed a new student _learning_ and _forgetting_ model that leverages item-skill relationships: \alert{\textbf{DAS3H}}. 76 | * DAS3H outperforms 4 SOTA student models on 3 datasets. 77 | * Incorporating skill info + forgetting effect improves over models that consider one or the other. 78 | * Using precise temporal information on past skill practice + assuming different learning/forgetting curves \alert{for different skills} improves performance. 79 | 80 | ## Outline 81 | 82 | 1. DASH\bigskip 83 | 2. Our model DAS3H\bigskip 84 | 3. Experiments\bigskip 85 | 4. Conclusion 86 | 87 | # DASH 88 | 89 | ## DASH 90 | 91 | $\rightarrow$ DASH = item **D**ifficulty, student **A**bility, and **S**tudent **H**istory 92 | 93 | DASH [\cite{lindsey2014improving}] bridges the gap between _Factor Analysis models_ and _memory models_: 94 | 95 | $$\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma(\alpha_s - \delta_j + h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}))$$ 96 | 97 | where: 98 | 99 | * $Y_{s,j,t}$ binary correctness of student $s$ answering item $j$ at time $t$; 100 | * $\sigma$ logistic function; 101 | * $\alpha_s$ ability of student $s$; 102 | * $\delta_j$ difficulty of item $j$; 103 | * $h_{\theta}$ summarizes the effect of the $l-1$ previous attempts of $s$ on $j$ at times $\mathrm{t}_{s,j,1:l-1}$ + the binary outcomes $\mathrm{y}_{s,j,1:l-1}$. 104 | 105 | ## DASH 106 | 107 | Lindsey et al. chose: 108 | \begin{align*} 109 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \sum_{w=0}^{W-1} & \theta_{2w+1}\log(1+c_{s,j,w}) \\ 110 | &- \theta_{2w+2}\log(1+a_{s,j,w}) 111 | \end{align*} 112 | 113 | where: 114 | 115 | * $w$ indexes a set of expanding \alert{time windows}; 116 | * $c_{s,j,w}$ number of correct answers of $s$ on $j$ in time window $w$; 117 | * $a_{s,j,w}$ number of attempts of $s$ on $j$ in time window $w$; 118 | * $\theta$ is _learned_ by DASH. 119 | 120 | ## DASH 121 | 122 | Assuming that the set of time windows is \{1, 7, 14, $+\infty$\}: 123 | 124 | \centering 125 | \includegraphics[width=10cm]{figures/time_windows.pdf} 126 | 127 | ## DASH 128 | 129 | DASH: 130 | 131 | * accounts for both _learning_ and _forgetting_ processes; 132 | 133 | * induces diminishing returns of practice inside a time window (log-counts); 134 | 135 | * has a time module $h_{\theta}$ inspired by ACT-R [\cite{anderson1997act}] and MCM [\cite{pashler2009predicting}]. 136 | 137 | # DAS3H 138 | 139 | ## From DASH to DAS3H 140 | 141 | * DASH 142 | * outperforms a hierarchical Bayesian IRT on Lindsey et al. experimental data (vocabulary learning). 143 | * was successfully used to adaptively personalize item review in a real-world cognitive psychology experiment. 144 | \bigskip 145 | * However, DASH 146 | * does not handle multiple skill item tagging $\rightarrow$ useful to account for knowledge transfer from one item to another. 147 | * assumes that memory decays at the same rate for every KC. 148 | 149 | ## Our model DAS3H 150 | 151 | We extend DASH in **3 ways**: 152 | \begin{enumerate} 153 | \item Extension to handle multiple skills tagging: new temporal module $h_{\theta}$ that also takes the multiple skills into account. 154 | \begin{itemize} 155 | \item Influence of the temporal distribution of past attempts and outcomes can differ from one skill to another. 156 | \end{itemize} 157 | \item Estimation of easiness parameters for \textit{each} item $j$ and skill $k$; 158 | \item Use of KTMs [\cite{Vie2019}] instead of mere logistic regression for multidimensional feature embeddings and pairwise interactions. 159 | \end{enumerate} 160 | 161 | ## Knowledge Tracing Machines (KTMs) 162 | 163 | Just pick features (ex. \textcolor{blue!80}{user}, \textcolor{orange}{item}, \textcolor{green!50!black}{skill}) and you get a student model 164 | 165 | Each feature $k$ is modeled by bias $\alert{w_k}$ and embedding $\alert{\bm{v_k}}$.\vspace{2mm} 166 | \begin{columns} 167 | \begin{column}{0.47\linewidth} 168 | \includegraphics[width=\linewidth]{figures/fm.pdf} 169 | \end{column} 170 | \begin{column}{0.53\linewidth} 171 | \includegraphics[width=\linewidth]{figures/fm2.pdf} 172 | \end{column} 173 | \end{columns}\vspace{-2mm} 174 | 175 | \hfill $\logit p(\bm{x}) = \mu + \underbrace{\sum_{k = 1}^N \alert{w_k} x_k}_\textnormal{logistic regression} + \underbrace{\sum_{1 \leq k < l \leq N} x_k x_l \langle \alert{\bm{v_k}}, \alert{\bm{v_l}} \rangle}_\textnormal{pairwise relationships}$ 176 | 177 | \small 178 | \fullcite{Vie2019} 179 | 180 | ## Our model DAS3H 181 | 182 | $\rightarrow$ DAS3H = item **D**ifficulty, student **A**bility, **S**kill and **S**tudent **S**kill practice **H**istory 183 | 184 | For an embedding dimension of $d=0$, DAS3H is: 185 | 186 | $\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma (\alpha_s - \delta_j + \underbrace{\alert{\sum_{k \in KC(j)} \beta_k}}_{\text{skill easiness biases}} +h_{\theta}\left(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}\right))$. 187 | 188 | We choose: 189 | \begin{align*} 190 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \alert{\sum_{k \in KC(j)}}&\sum_{w=0}^{W-1}\theta_{\alert{k},2w+1}\log(1+c_{s,\alert{k},w})\\ 191 | &- \theta_{\alert{k},2w+2}\log(1+a_{s,\alert{k},w}). 192 | \end{align*} 193 | 194 | $\rightarrow$ Now, $h_{\theta}$ can be seen as a sum of _skill_ memory strengths! 195 | 196 | # Experiments 197 | 198 | ## Experiments 199 | 200 | 1. Experimental setting 201 | 202 | 2. Contenders 203 | 204 | 3. Datasets 205 | 206 | 4. Main results 207 | 208 | 5. Further analyses 209 | 210 | ## Experimental setting 211 | 212 | \begin{block}{How to compare ML models?} 213 | Train the models on one part of the dataset 214 | 215 | Test on the other part 216 | 217 | Gather prediction metrics, compare the models 218 | \end{block} 219 | 220 | * **5-fold cross-validation** at the student level: predicting binary outcomes on \alert{unseen} students (_strong generalization_) 221 | * Distributional assumptions to \alert{avoid overfitting}: 222 | * When $d=0$: L2 regularization/$\mathcal{N}(0,1)$ prior 223 | * When $d > 0$: hierarchical distributional scheme 224 | * Same time windows as Lindsey et al.: {1/24,1,7,30,+$\infty$} 225 | 226 | ## Contenders 227 | 228 | 5 contenders: 229 | 230 | * \alert{DAS3H} 231 | * DASH [\cite{lindsey2014improving}] 232 | * IRT/MIRT [\cite{van2013handbook}] 233 | * PFA [\cite{pavlik2009performance}] 234 | * AFM [\cite{cen2006learning}] 235 | 236 | Every model was cast within the KTM framework $\rightarrow$ 3 embedding dimensions (0, 5 \& 20) + sparse feature encoding. 237 | 238 | \tiny 239 | | | users | items | skills | wins | fails | attempts | tw [KC] | tw [items] | 240 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 241 | | **DAS3H** | x | x | x | x | | x | x | | 242 | | DASH | x | x | | x | | x | | x | 243 | | IRT/MIRT | x | x | | | | | | | 244 | | PFA | | | x | x | x | | | | 245 | | AFM | | | x | | | x | | | 246 | 247 | ## Datasets 248 | 249 | * 3 datasets: ASSISTments 2012-2013, Bridge to Algebra 2006-2007 \& Algebra I 2005-2006 (KDD Cup 2010) 250 | * Data consists of logs of student-item interactions on 2 ITS 251 | * Selected because they contain _both_ timestamps and items with multiple skills $\rightarrow$ rare species in the EDM datasets fauna 252 | * Preprocessing scheme: removed users with < 10 interactions, interactions with \texttt{NaN} skills, duplicates 253 | 254 | \tiny 255 | \input{tables/datasets_caracs.tex} 256 | 257 | ## Main results 258 | 259 | \input{tables/exp_results.tex} 260 | $\rightarrow$ On every dataset, **DAS3H outperforms** the other models (between +0.04 and +0.05 AUC compared to DASH). 261 | 262 | ## Main results 263 | 264 | \begin{figure}% 265 | \centering 266 | \subfloat[DAS3H]{{\includegraphics[width=5cm]{figures/comp_dim_das3h.pdf} }}% 267 | \: 268 | \subfloat[IRT]{{\includegraphics[width=5cm]{figures/comp_dim_irt.pdf} }}% 269 | \caption{AUC comparison on two models for $d=0, 5$ and 20 (all datasets, 5-fold cross-validation).}% 270 | \label{dim_com}% 271 | \end{figure} 272 | \vspace{-5mm} 273 | \raggedright 274 | $\rightarrow$ The impact of the multidim feature embeddings is small and not consistent across datasets and models (+ unstable sometimes). 275 | 276 | ## Importance of time windows 277 | 278 | \centering 279 | \begin{figure} 280 | \includegraphics[width=5.5cm]{figures/pairwise_comp_all_datasets.pdf} 281 | \caption{AUC comparison on DAS3H \textit{with} and \textit{without} time windows features (all datasets, 5-fold cross-validation).} 282 | \end{figure} 283 | \vspace{-3mm} 284 | \raggedright 285 | Without time windows, $h_{\theta}$ counts past wins and attempts in DAS3H. 286 | $\rightarrow$ Using \alert{temporal distribution of past skill practice} instead of simple win/fail counters improves AUC performance: the _**when**_ matters. 287 | 288 | ## Importance of different learning/forgetting curves per skill 289 | \scriptsize 290 | \input{tables/comp_DAS3H_multiparams.tex} 291 | 292 | \normalsize 293 | $\rightarrow$ Assuming **different learning and forgetting curves for different skills** in DAS3H consistently yields better predictive power: some skills are easier to learn and slower to forget. 294 | 295 | # Conclusion 296 | 297 | ## In a nutshell 298 | 299 | * Human forgetting is _ubiquitous_ but luckily: 300 | * \alert{Cognitive science} gives us efficient and simple learning strategies 301 | * \alert{ML} can build us tools to **personalize these strategies** and further improve LT memory retention 302 | 303 | * Adaptive spacing algorithms have been focusing on _pure memorization_ (e.g. vocabulary learning) 304 | * They can be used for \alert{optimizing practice and retention of skills} 305 | 306 | * Our student model **DAS3H** 307 | * incorporates information on _skills_ **and** _forgetting_ to predict learner performance 308 | * shows higher predictive power than other SOTA student models 309 | * fits our model-based approach for optimally scheduling skill review 310 | 311 | ## Thanks for your attention! 312 | 313 | Our paper is available at: 314 | 315 | \centering 316 | `https://arxiv.org/abs/1905.06873` 317 | 318 | \raggedright 319 | Python code is freely available on my GitHub page: 320 | 321 | \centering 322 | `https://github.com/BenoitChoffin/das3h` ! 323 | 324 | \raggedright 325 | To send me questions about our paper or my research work: 326 | 327 | \centering 328 | `benoit.choffin@lri.fr` 329 | 330 | -------------------------------------------------------------------------------- /slides/lak2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/lak2020.pdf -------------------------------------------------------------------------------- /slides/limsi.md: -------------------------------------------------------------------------------- 1 | % DAS3H: Modeling Student Learning and Forgetting for Optimally Scheduling Distributed Practice of Skills 2 | % \alert{Benoît Choffin}, Fabrice Popineau, Yolaine Bourda & Jill-Jênn Vie\newline\newline\scriptsize LRI/CentraleSupélec - University of Paris-Saclay | RIKEN AIP 3 | % LIMSI, Orsay | October 1, 2019 4 | --- 5 | theme: Frankfurt 6 | institute: \includegraphics[height=1.2cm]{figures/logo_lri.jpeg} \quad \includegraphics[height=1.3cm]{figures/LogoCS1.png} \quad \includegraphics[height=1.3cm]{figures/logo_UP_saclay_final.png} \quad \includegraphics[height=1cm]{figures/aip.png} 7 | section-titles: false 8 | handout: true 9 | biblio-style: authoryear 10 | header-includes: 11 | - \usepackage{booktabs} 12 | - \usepackage{makecell} 13 | - \usepackage{multicol} 14 | - \usepackage{multirow} 15 | - \usepackage{subfig} 16 | - \usepackage{bm} 17 | - \DeclareMathOperator\logit{logit} 18 | biblatexoptions: 19 | - maxbibnames=99 20 | - maxcitenames=5 21 | --- 22 | 23 | # Introduction 24 | 25 | ## Mitigating human forgetting with spaced repetition 26 | 27 | * Human learners face a constant trade-off between **acquiring new knowledge** and **reviewing old knowledge** \bigskip 28 | * Cognitive science provides simple + robust learning strategies for improving LT memory 29 | * \alert{Spaced repetition} 30 | * \alert{Testing} \bigskip 31 | * Can we do better? **Yes**, by providing students with an _adaptive_ and _personalized_ spacing scheduler. 32 | 33 | ## Mitigating human forgetting with spaced repetition 34 | 35 | \raisebox{.5cm}{\includegraphics[width=0.5\textwidth]{figures/leitner.png}}\includegraphics[width=0.5\textwidth]{figures/anki.png} 36 | 37 | Ex. select the item whose memory strength is closest to a threshold $\theta$ [\cite{lindsey2014improving}] $\rightarrow$ "almost forgotten" 38 | 39 | ## Beyond flashcard memorization 40 | 41 | **Problem**: these algorithms are designed for optimizing _pure memorization_ (of facts, vocabulary,...) 42 | 43 | * In real-world educational settings, students also need to learn to master and remember a set of **skills** 44 | 45 | * In that case, specific items are the only way to practice one or multiple skills because _we do not have to memorize the content directly_ 46 | 47 | * Traditional adaptive spacing schedulers are **not applicable for learning skills** 48 | 49 | ## Extension to skill practice and review 50 | 51 | \begin{minipage}{0.4\linewidth} 52 | \textcolor{blue!80}{Item}-\textcolor{green!50!black}{skill} relationships require expert labor and are synthesized inside a binary q-matrix $\rightarrow$ 53 | \end{minipage}\begin{minipage}{0.6\linewidth} 54 | \scriptsize 55 | \input{tables/dummy_qmat.tex} 56 | \end{minipage} 57 | 58 | \centering 59 | \includegraphics[width=10cm]{figures/item_skills_relations.pdf} 60 | 61 | ## Limitations of student models 62 | 63 | We need to be able to infer skill memory strength and dynamics, however in the student modeling literature: 64 | 65 | * some models leverage item-skills relationships 66 | * some others incorporate forgetting 67 | 68 | But none does both! 69 | 70 | ## Our contribution 71 | 72 | We take a model-based approach for this task. 73 | 74 | 1. Traditional adaptive spacing algorithms can be extended to review and practice skills (not only flashcards).\bigskip 75 | 2. We developed a new student _learning_ and _forgetting_ model that leverages item-skill relationships: \alert{\textbf{DAS3H}}. 76 | * DAS3H outperforms 4 SOTA student models on 3 datasets. 77 | * Incorporating skill info + forgetting effect improves over models that consider one or the other. 78 | * Using precise temporal information on past skill practice + assuming different learning/forgetting curves \alert{for different skills} improves performance. 79 | 80 | ## Outline 81 | 82 | 1. DASH\bigskip 83 | 2. Our model DAS3H\bigskip 84 | 3. Experiments\bigskip 85 | 4. Conclusion 86 | 87 | # DASH 88 | 89 | ## DASH 90 | 91 | $\rightarrow$ DASH = item **D**ifficulty, student **A**bility, and **S**tudent **H**istory 92 | 93 | DASH [\cite{lindsey2014improving}] bridges the gap between _Factor Analysis models_ and _memory models_: 94 | 95 | $$\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma(\alpha_s - \delta_j + h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}))$$ 96 | 97 | where: 98 | 99 | * $Y_{s,j,t}$ binary correctness of student $s$ answering item $j$ at time $t$; 100 | * $\sigma$ logistic function; 101 | * $\alpha_s$ ability of student $s$; 102 | * $\delta_j$ difficulty of item $j$; 103 | * $h_{\theta}$ summarizes the effect of the $l-1$ previous attempts of $s$ on $j$ at times $\mathrm{t}_{s,j,1:l-1}$ + the binary outcomes $\mathrm{y}_{s,j,1:l-1}$. 104 | 105 | ## DASH 106 | 107 | Lindsey et al. chose: 108 | \begin{align*} 109 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \sum_{w=0}^{W-1} & \theta_{2w+1}\log(1+c_{s,j,w}) \\ 110 | &- \theta_{2w+2}\log(1+a_{s,j,w}) 111 | \end{align*} 112 | 113 | where: 114 | 115 | * $w$ indexes a set of expanding \alert{time windows}; 116 | * $c_{s,j,w}$ number of correct answers of $s$ on $j$ in time window $w$; 117 | * $a_{s,j,w}$ number of attempts of $s$ on $j$ in time window $w$; 118 | * $\theta$ is _learned_ by DASH. 119 | 120 | ## DASH 121 | 122 | Assuming that the set of time windows is \{1, 7, 14, $+\infty$\}: 123 | 124 | \centering 125 | \includegraphics[width=10cm]{figures/time_windows.pdf} 126 | 127 | ## DASH 128 | 129 | DASH: 130 | 131 | * accounts for both _learning_ and _forgetting_ processes; 132 | 133 | * induces diminishing returns of practice inside a time window (log-counts); 134 | 135 | * has a time module $h_{\theta}$ inspired by ACT-R [\cite{anderson1997act}] and MCM [\cite{pashler2009predicting}]. 136 | 137 | # DAS3H 138 | 139 | ## From DASH to DAS3H 140 | 141 | * DASH 142 | * outperforms a hierarchical Bayesian IRT on Lindsey et al. experimental data (vocabulary learning). 143 | * was successfully used to adaptively personalize item review in a real-world cognitive psychology experiment. 144 | \bigskip 145 | * However, DASH 146 | * does not handle multiple skill item tagging $\rightarrow$ useful to account for knowledge transfer from one item to another. 147 | * assumes that memory decays at the same rate for every KC. 148 | 149 | ## Our model DAS3H 150 | 151 | We extend DASH in **3 ways**: 152 | \begin{enumerate} 153 | \item Extension to handle multiple skills tagging: new temporal module $h_{\theta}$ that also takes the multiple skills into account. 154 | \begin{itemize} 155 | \item Influence of the temporal distribution of past attempts and outcomes can differ from one skill to another. 156 | \end{itemize} 157 | \item Estimation of easiness parameters for \textit{each} item $j$ and skill $k$; 158 | \item Use of KTMs [\cite{Vie2019}] instead of mere logistic regression for multidimensional feature embeddings and pairwise interactions. 159 | \end{enumerate} 160 | 161 | ## Knowledge Tracing Machines (KTMs) 162 | 163 | Just pick features (ex. \textcolor{blue!80}{user}, \textcolor{orange}{item}, \textcolor{green!50!black}{skill}) and you get a student model 164 | 165 | Each feature $k$ is modeled by bias $\alert{w_k}$ and embedding $\alert{\bm{v_k}}$.\vspace{2mm} 166 | \begin{columns} 167 | \begin{column}{0.47\linewidth} 168 | \includegraphics[width=\linewidth]{figures/fm.pdf} 169 | \end{column} 170 | \begin{column}{0.53\linewidth} 171 | \includegraphics[width=\linewidth]{figures/fm2.pdf} 172 | \end{column} 173 | \end{columns}\vspace{-2mm} 174 | 175 | \hfill $\logit p(\bm{x}) = \mu + \underbrace{\sum_{k = 1}^N \alert{w_k} x_k}_\textnormal{logistic regression} + \underbrace{\sum_{1 \leq k < l \leq N} x_k x_l \langle \alert{\bm{v_k}}, \alert{\bm{v_l}} \rangle}_\textnormal{pairwise relationships}$ 176 | 177 | \small 178 | \fullcite{Vie2019} 179 | 180 | ## Our model DAS3H 181 | 182 | $\rightarrow$ DAS3H = item **D**ifficulty, student **A**bility, **S**kill and **S**tudent **S**kill practice **H**istory 183 | 184 | For an embedding dimension of $d=0$, DAS3H is: 185 | 186 | $\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma (\alpha_s - \delta_j + \underbrace{\alert{\sum_{k \in KC(j)} \beta_k}}_{\text{skill easiness biases}} +h_{\theta}\left(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}\right))$. 187 | 188 | We choose: 189 | \begin{align*} 190 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \alert{\sum_{k \in KC(j)}}&\sum_{w=0}^{W-1}\theta_{\alert{k},2w+1}\log(1+c_{s,\alert{k},w})\\ 191 | &- \theta_{\alert{k},2w+2}\log(1+a_{s,\alert{k},w}). 192 | \end{align*} 193 | 194 | $\rightarrow$ Now, $h_{\theta}$ can be seen as a sum of _skill_ memory strengths! 195 | 196 | # Experiments 197 | 198 | ## Experiments 199 | 200 | 1. Experimental setting 201 | 202 | 2. Contenders 203 | 204 | 3. Datasets 205 | 206 | 4. Main results 207 | 208 | 5. Further analyses 209 | 210 | ## Experimental setting 211 | 212 | \begin{block}{How to compare ML models?} 213 | Train the models on one part of the dataset 214 | 215 | Test on the other part 216 | 217 | Gather prediction metrics, compare the models 218 | \end{block} 219 | 220 | * **5-fold cross-validation** at the student level: predicting binary outcomes on \alert{unseen} students (_strong generalization_) 221 | * Distributional assumptions to \alert{avoid overfitting}: 222 | * When $d=0$: L2 regularization/$\mathcal{N}(0,1)$ prior 223 | * When $d > 0$: hierarchical distributional scheme 224 | * Same time windows as Lindsey et al.: {1/24,1,7,30,+$\infty$} 225 | 226 | ## Contenders 227 | 228 | 5 contenders: 229 | 230 | * \alert{DAS3H} 231 | * DASH [\cite{lindsey2014improving}] 232 | * IRT/MIRT [\cite{van2013handbook}] 233 | * PFA [\cite{pavlik2009performance}] 234 | * AFM [\cite{cen2006learning}] 235 | 236 | Every model was cast within the KTM framework $\rightarrow$ 3 embedding dimensions (0, 5 \& 20) + sparse feature encoding. 237 | 238 | \tiny 239 | | | users | items | skills | wins | fails | attempts | tw [KC] | tw [items] | 240 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 241 | | **DAS3H** | x | x | x | x | | x | x | | 242 | | DASH | x | x | | x | | x | | x | 243 | | IRT/MIRT | x | x | | | | | | | 244 | | PFA | | | x | x | x | | | | 245 | | AFM | | | x | | | x | | | 246 | 247 | ## Datasets 248 | 249 | * 3 datasets: ASSISTments 2012-2013, Bridge to Algebra 2006-2007 \& Algebra I 2005-2006 (KDD Cup 2010) 250 | * Data consists of logs of student-item interactions on 2 ITS 251 | * Selected because they contain _both_ timestamps and items with multiple skills $\rightarrow$ rare species in the EDM datasets fauna 252 | * Preprocessing scheme: removed users with < 10 interactions, interactions with \texttt{NaN} skills, duplicates 253 | 254 | \tiny 255 | \input{tables/datasets_caracs.tex} 256 | 257 | ## Main results 258 | 259 | \input{tables/exp_results.tex} 260 | $\rightarrow$ On every dataset, **DAS3H outperforms** the other models (between +0.04 and +0.05 AUC compared to DASH). 261 | 262 | ## Main results 263 | 264 | \begin{figure}% 265 | \centering 266 | \subfloat[DAS3H]{{\includegraphics[width=5cm]{figures/comp_dim_das3h.pdf} }}% 267 | \: 268 | \subfloat[IRT]{{\includegraphics[width=5cm]{figures/comp_dim_irt.pdf} }}% 269 | \caption{AUC comparison on two models for $d=0, 5$ and 20 (all datasets, 5-fold cross-validation).}% 270 | \label{dim_com}% 271 | \end{figure} 272 | \vspace{-5mm} 273 | \raggedright 274 | $\rightarrow$ The impact of the multidim feature embeddings is small and not consistent across datasets and models (+ unstable sometimes). 275 | 276 | ## Importance of time windows 277 | 278 | \centering 279 | \begin{figure} 280 | \includegraphics[width=5.5cm]{figures/pairwise_comp_all_datasets.pdf} 281 | \caption{AUC comparison on DAS3H \textit{with} and \textit{without} time windows features (all datasets, 5-fold cross-validation).} 282 | \end{figure} 283 | \vspace{-3mm} 284 | \raggedright 285 | Without time windows, $h_{\theta}$ counts past wins and attempts in DAS3H. 286 | $\rightarrow$ Using \alert{temporal distribution of past skill practice} instead of simple win/fail counters improves AUC performance: the _**when**_ matters. 287 | 288 | ## Importance of different learning/forgetting curves per skill 289 | \scriptsize 290 | \input{tables/comp_DAS3H_multiparams.tex} 291 | 292 | \normalsize 293 | $\rightarrow$ Assuming **different learning and forgetting curves for different skills** in DAS3H consistently yields better predictive power: some skills are easier to learn and slower to forget. 294 | 295 | # Conclusion 296 | 297 | ## In a nutshell 298 | 299 | * Human forgetting is _ubiquitous_ but luckily: 300 | * \alert{Cognitive science} gives us efficient and simple learning strategies 301 | * \alert{ML} can build us tools to **personalize these strategies** and further improve LT memory retention 302 | 303 | * Adaptive spacing algorithms have been focusing on _pure memorization_ (e.g. vocabulary learning) 304 | * They can be used for \alert{optimizing practice and retention of skills} 305 | 306 | * Our student model **DAS3H** 307 | * incorporates information on _skills_ **and** _forgetting_ to predict learner performance 308 | * shows higher predictive power than other SOTA student models 309 | * fits our model-based approach for optimally scheduling skill review 310 | 311 | ## Thanks for your attention! 312 | 313 | Our paper is already available at: 314 | 315 | \centering 316 | `https://arxiv.org/abs/1905.06873` 317 | 318 | \raggedright 319 | Python code is freely available on my GitHub page: 320 | 321 | \centering 322 | `https://github.com/BenoitChoffin/das3h` ! 323 | 324 | \raggedright 325 | To send me questions about our paper or my research work: 326 | 327 | \centering 328 | `benoit.choffin@lri.fr` 329 | 330 | -------------------------------------------------------------------------------- /slides/limsi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/limsi.pdf -------------------------------------------------------------------------------- /slides/lip6.md: -------------------------------------------------------------------------------- 1 | % DAS3H: Modeling Student Learning and Forgetting for Optimally Scheduling Distributed Practice of Skills 2 | % \alert{Benoît Choffin}, Fabrice Popineau, Yolaine Bourda & Jill-Jênn Vie\newline\newline\scriptsize LRI/CentraleSupélec - University of Paris-Saclay | Inria Lille 3 | % LIP6, Cyberspace | April 28, 2020 4 | --- 5 | theme: Frankfurt 6 | institute: \includegraphics[height=1.2cm]{figures/logo_lri.jpeg} \quad \includegraphics[height=1.3cm]{figures/LogoCS1.png} \quad \includegraphics[height=1.3cm]{figures/logo_UP_saclay_final.png} \quad \includegraphics[height=1cm]{figures/logo_inria.png} 7 | section-titles: false 8 | handout: true 9 | biblio-style: authoryear 10 | header-includes: 11 | - \usepackage{booktabs} 12 | - \usepackage{makecell} 13 | - \usepackage{multicol} 14 | - \usepackage{multirow} 15 | - \usepackage{subfig} 16 | - \usepackage{bm} 17 | - \DeclareMathOperator\logit{logit} 18 | biblatexoptions: 19 | - maxbibnames=99 20 | - maxcitenames=5 21 | --- 22 | 23 | # Introduction 24 | 25 | ## Mitigating human forgetting with spaced repetition 26 | 27 | * Human learners face a constant trade-off between **acquiring new knowledge** and **reviewing old knowledge** \bigskip 28 | * Cognitive science provides simple + robust learning strategies for improving LT memory 29 | * \alert{Spaced repetition} 30 | * \alert{Testing} \bigskip 31 | * Can we do better? **Yes**, by providing students with an _adaptive_ and _personalized_ spacing scheduler. 32 | 33 | ## Mitigating human forgetting with spaced repetition 34 | 35 | \raisebox{.5cm}{\includegraphics[width=0.5\textwidth]{figures/leitner.png}}\includegraphics[width=0.5\textwidth]{figures/anki.png} 36 | 37 | \begin{block}{Model-based} 38 | Ex. select the item whose memory strength is closest to a threshold $\theta$ [\cite{lindsey2014improving}] $\rightarrow$ "almost forgotten" 39 | \end{block} 40 | 41 | \begin{block}{Model-free} 42 | Ex. Reinforcement Learning methods such as [\cite{reddy2017accelerating}] 43 | \end{block} 44 | 45 | ## Beyond flashcard memorization 46 | 47 | **Problem**: these algorithms are designed for optimizing _pure memorization_ (of facts, vocabulary,...) 48 | 49 | * In real-world educational settings, students also need to learn to master and remember a set of **skills** 50 | 51 | * In that case, specific items are the only way to practice one or multiple skills because _we do not have to memorize the content directly_ 52 | 53 | * Traditional adaptive spacing schedulers are **not applicable for learning skills** 54 | 55 | ## Extension to skill practice and review 56 | 57 | \begin{minipage}{0.4\linewidth} 58 | \textcolor{blue!80}{Item}-\textcolor{green!50!black}{skill} relationships require expert labor and are synthesized inside a binary q-matrix $\rightarrow$ 59 | \end{minipage}\begin{minipage}{0.6\linewidth} 60 | \scriptsize 61 | \input{tables/dummy_qmat.tex} 62 | \end{minipage} 63 | 64 | \centering 65 | \includegraphics[width=10cm]{figures/item_skills_relations.pdf} 66 | 67 | ## Limitations of student models 68 | 69 | We need to be able to infer skill memory strength and dynamics, however in the student modeling literature: 70 | 71 | * some models leverage item-skills relationships 72 | * some others incorporate forgetting 73 | 74 | But none does both! 75 | 76 | ## Our contribution 77 | 78 | We take a model-based approach for this task. 79 | 80 | 1. Traditional adaptive spacing algorithms can be extended to review and practice skills (not only flashcards).\bigskip 81 | 2. We developed a new student _learning_ and _forgetting_ model that leverages item-skill relationships: \alert{\textbf{DAS3H}}. 82 | * DAS3H outperforms 4 SOTA student models on 3 datasets. 83 | * Incorporating skill info + forgetting effect improves over models that consider one or the other. 84 | * Using precise temporal information on past skill practice + assuming different learning/forgetting curves \alert{for different skills} improves performance. 85 | 86 | ## Outline 87 | 88 | 1. DASH\bigskip 89 | 2. Our model DAS3H\bigskip 90 | 3. Experiments\bigskip 91 | 4. Conclusion 92 | 93 | # DASH 94 | 95 | ## Knowledge tracing 96 | 97 | Predict future student performance given their history 98 | 99 | \centering 100 | \includegraphics[width=10cm]{figures/dkt_knowledge_tracing.png} 101 | \raggedright 102 | 103 | Given $(q_t,a_t)_{t \leq T}$ for former students 104 | 105 | $q_t$ is the question index, $a_t \in \{0,1\}$ is the correctness 106 | 107 | For new students, given $(q_t,a_t)_{t \leq T}$ and $q_{T+1}$, guess $a_{T+1}$ 108 | 109 | ## DASH 110 | 111 | $\rightarrow$ DASH = item **D**ifficulty, student **A**bility, and **S**tudent **H**istory 112 | 113 | DASH [\cite{lindsey2014improving}] bridges the gap between _Factor Analysis models_ and _memory models_: 114 | 115 | $$\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma(\alpha_s - \delta_j + h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}))$$ 116 | 117 | where: 118 | 119 | * $Y_{s,j,t}$ binary correctness of student $s$ answering item $j$ at time $t$; 120 | * $\sigma$ logistic function; 121 | * $\alpha_s$ ability of student $s$; 122 | * $\delta_j$ difficulty of item $j$; 123 | * $h_{\theta}$ summarizes the effect of the $l-1$ previous attempts of $s$ on $j$ at times $\mathrm{t}_{s,j,1:l-1}$ + the binary outcomes $\mathrm{y}_{s,j,1:l-1}$. 124 | 125 | ## DASH 126 | 127 | Lindsey et al. chose: 128 | \begin{align*} 129 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \sum_{w=0}^{W-1} & \theta_{2w+1}\log(1+c_{s,j,w}) \\ 130 | &- \theta_{2w+2}\log(1+a_{s,j,w}) 131 | \end{align*} 132 | 133 | where: 134 | 135 | * $w$ indexes a set of expanding \alert{time windows}; 136 | * $c_{s,j,w}$ number of correct answers of $s$ on $j$ in time window $w$; 137 | * $a_{s,j,w}$ number of attempts of $s$ on $j$ in time window $w$; 138 | * $\theta$ is _learned_ by DASH. 139 | 140 | ## DASH 141 | 142 | Assuming that the set of time windows is \{1, 7, 14, $+\infty$\}: 143 | 144 | \centering 145 | \includegraphics[width=10cm]{figures/time_windows.pdf} 146 | 147 | ## DASH 148 | 149 | DASH: 150 | 151 | * accounts for both _learning_ and _forgetting_ processes; 152 | 153 | * induces diminishing returns of practice inside a time window (log-counts); 154 | 155 | * has a time module $h_{\theta}$ inspired by ACT-R [\cite{anderson1997act}] and MCM [\cite{pashler2009predicting}]. 156 | 157 | # DAS3H 158 | 159 | ## From DASH to DAS3H 160 | 161 | * DASH 162 | * outperforms a hierarchical Bayesian IRT on Lindsey et al. experimental data (vocabulary learning). 163 | * was successfully used to adaptively personalize item review in a real-world cognitive psychology experiment. 164 | \bigskip 165 | * However, DASH 166 | * does not handle multiple skill item tagging $\rightarrow$ useful to account for knowledge transfer from one item to another. 167 | * assumes that memory decays at the same rate for every KC. 168 | 169 | ## Our model DAS3H 170 | 171 | We extend DASH in **3 ways**: 172 | \begin{enumerate} 173 | \item Extension to handle multiple skills tagging: new temporal module $h_{\theta}$ that also takes the multiple skills into account. 174 | \begin{itemize} 175 | \item Influence of the temporal distribution of past attempts and outcomes can differ from one skill to another. 176 | \end{itemize} 177 | \item Estimation of easiness parameters for \textit{each} item $j$ and skill $k$; 178 | \item Use of KTMs [\cite{Vie2019}] instead of mere logistic regression for multidimensional feature embeddings and pairwise interactions. 179 | \end{enumerate} 180 | 181 | ## Knowledge Tracing Machines (KTMs) 182 | 183 | Just pick features (ex. \textcolor{blue!80}{user}, \textcolor{orange}{item}, \textcolor{green!50!black}{skill}) and you get a student model 184 | 185 | Each feature $k$ is modeled by bias $\alert{w_k}$ and embedding $\alert{\bm{v_k}}$.\vspace{2mm} 186 | \begin{columns} 187 | \begin{column}{0.47\linewidth} 188 | \includegraphics[width=\linewidth]{figures/fm.pdf} 189 | \end{column} 190 | \begin{column}{0.53\linewidth} 191 | \includegraphics[width=\linewidth]{figures/fm2.pdf} 192 | \end{column} 193 | \end{columns}\vspace{-2mm} 194 | 195 | \hfill $\logit p(\bm{x}) = \mu + \underbrace{\sum_{k = 1}^N \alert{w_k} x_k}_\textnormal{logistic regression} + \underbrace{\sum_{1 \leq k < l \leq N} x_k x_l \langle \alert{\bm{v_k}}, \alert{\bm{v_l}} \rangle}_\textnormal{pairwise relationships}$ 196 | 197 | \small 198 | \fullcite{Vie2019} 199 | 200 | ## Our model DAS3H 201 | 202 | $\rightarrow$ DAS3H = item **D**ifficulty, student **A**bility, **S**kill and **S**tudent **S**kill practice **H**istory 203 | 204 | For an embedding dimension of $d=0$, DAS3H is: 205 | 206 | $\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma (\alpha_s - \delta_j + \underbrace{\alert{\sum_{k \in KC(j)} \beta_k}}_{\text{skill easiness biases}} +h_{\theta}\left(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}\right))$. 207 | 208 | We choose: 209 | \begin{align*} 210 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \alert{\sum_{k \in KC(j)}}&\sum_{w=0}^{W-1}\theta_{\alert{k},2w+1}\log(1+c_{s,\alert{k},w})\\ 211 | &- \theta_{\alert{k},2w+2}\log(1+a_{s,\alert{k},w}). 212 | \end{align*} 213 | 214 | $\rightarrow$ Now, $h_{\theta}$ can be seen as a sum of _skill_ memory strengths! 215 | 216 | ## Example DAS3H forgetting curves 217 | 218 | \begin{figure}% 219 | \centering 220 | \subfloat[Slow forgetting]{{\includegraphics[width=5cm]{figures/example_das3h_curve_1.pdf} }}% 221 | \: 222 | \subfloat[Rapid forgetting]{{\includegraphics[width=5cm]{figures/example_das3h_curve_2.pdf} }}% 223 | \caption{Example forgetting curves randomly generated from our DAS3H model}% 224 | \label{example_curves}% 225 | \end{figure} 226 | \vspace{-5mm} 227 | 228 | # Experiments 229 | 230 | ## Experiments 231 | 232 | 1. Experimental setting 233 | 234 | 2. Contenders 235 | 236 | 3. Datasets 237 | 238 | 4. Main results 239 | 240 | 5. Further analyses 241 | 242 | ## Experimental setting 243 | 244 | \begin{block}{How to compare ML models?} 245 | Train the models on one part of the dataset 246 | 247 | Test on the other part 248 | 249 | Gather prediction metrics, compare the models 250 | \end{block} 251 | 252 | * **5-fold cross-validation** at the student level: predicting binary outcomes on \alert{unseen} students (_strong generalization_) 253 | * Distributional assumptions to \alert{avoid overfitting}: 254 | * When $d=0$: L2 regularization/$\mathcal{N}(0,1)$ prior 255 | * When $d > 0$: hierarchical distributional scheme 256 | * Same time windows as Lindsey et al.: {1/24,1,7,30,+$\infty$} 257 | 258 | ## Contenders 259 | 260 | 5 contenders: 261 | 262 | * \alert{DAS3H} 263 | * DASH [\cite{lindsey2014improving}] 264 | * IRT/MIRT [\cite{van2013handbook}] 265 | * PFA [\cite{pavlik2009performance}] 266 | * AFM [\cite{cen2006learning}] 267 | 268 | Every model was cast within the KTM framework $\rightarrow$ 3 embedding dimensions (0, 5 \& 20) + sparse feature encoding. 269 | 270 | \tiny 271 | | | users | items | skills | wins | fails | attempts | tw [KC] | tw [items] | 272 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 273 | | **DAS3H** | x | x | x | x | | x | x | | 274 | | DASH | x | x | | x | | x | | x | 275 | | IRT/MIRT | x | x | | | | | | | 276 | | PFA | | | x | x | x | | | | 277 | | AFM | | | x | | | x | | | 278 | 279 | ## Datasets 280 | 281 | * 3 datasets: ASSISTments 2012-2013, Bridge to Algebra 2006-2007 \& Algebra I 2005-2006 (KDD Cup 2010) 282 | * Data consists of logs of student-item interactions on 2 ITS 283 | * Selected because they contain _both_ timestamps and items with multiple skills $\rightarrow$ rare species in the EDM datasets fauna 284 | * Preprocessing scheme: removed users with < 10 interactions, interactions with \texttt{NaN} skills, duplicates 285 | 286 | \tiny 287 | \input{tables/datasets_caracs.tex} 288 | 289 | ## Main results 290 | 291 | \input{tables/exp_results.tex} 292 | $\rightarrow$ On every dataset, **DAS3H outperforms** the other models (between +0.04 and +0.05 AUC compared to DASH). 293 | 294 | ## Main results 295 | 296 | \begin{figure}% 297 | \centering 298 | \subfloat[DAS3H]{{\includegraphics[width=5cm]{figures/comp_dim_das3h.pdf} }}% 299 | \: 300 | \subfloat[IRT]{{\includegraphics[width=5cm]{figures/comp_dim_irt.pdf} }}% 301 | \caption{AUC comparison on two models for $d=0, 5$ and 20 (all datasets, 5-fold cross-validation).}% 302 | \label{dim_com}% 303 | \end{figure} 304 | \vspace{-5mm} 305 | \raggedright 306 | $\rightarrow$ The impact of the multidim feature embeddings is small and not consistent across datasets and models (+ unstable sometimes). 307 | 308 | ## Importance of time windows 309 | 310 | \centering 311 | \begin{figure} 312 | \includegraphics[width=5.5cm]{figures/pairwise_comp_all_datasets.pdf} 313 | \caption{AUC comparison on DAS3H \textit{with} and \textit{without} time windows features (all datasets, 5-fold cross-validation).} 314 | \end{figure} 315 | \vspace{-3mm} 316 | \raggedright 317 | Without time windows, $h_{\theta}$ counts past wins and attempts in DAS3H. 318 | $\rightarrow$ Using \alert{temporal distribution of past skill practice} instead of simple win/fail counters improves AUC performance: the _**when**_ matters. 319 | 320 | ## Importance of different learning/forgetting curves per skill 321 | \scriptsize 322 | \input{tables/comp_DAS3H_multiparams.tex} 323 | 324 | \normalsize 325 | $\rightarrow$ Assuming **different learning and forgetting curves for different skills** in DAS3H consistently yields better predictive power: some skills are easier to learn and slower to forget. 326 | 327 | # Conclusion 328 | 329 | ## In a nutshell 330 | 331 | * Human forgetting is _ubiquitous_ but luckily: 332 | * \alert{Cognitive science} gives us efficient and simple learning strategies 333 | * \alert{ML} can build us tools to **personalize these strategies** and further improve LT memory retention 334 | 335 | * Adaptive spacing algorithms have been focusing on _pure memorization_ (e.g. vocabulary learning) 336 | * They can be used for \alert{optimizing practice and retention of skills} 337 | 338 | * Our student model **DAS3H** 339 | * incorporates information on _skills_ **and** _forgetting_ to predict learner performance 340 | * shows higher predictive power than other SOTA student models 341 | * fits our model-based approach for optimally scheduling skill review 342 | 343 | ## Thanks for your attention! 344 | 345 | Our paper is available at: 346 | 347 | \centering 348 | `https://arxiv.org/abs/1905.06873` 349 | 350 | \raggedright 351 | Python code is freely available on my GitHub page: 352 | 353 | \centering 354 | `https://github.com/BenoitChoffin/das3h` ! 355 | 356 | \raggedright 357 | To send me questions about our paper or my research work: 358 | 359 | \centering 360 | `benoit.choffin@lri.fr` 361 | 362 | -------------------------------------------------------------------------------- /slides/lip6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/lip6.pdf -------------------------------------------------------------------------------- /slides/presentation.md: -------------------------------------------------------------------------------- 1 | % DAS3H: Modeling Student Learning and Forgetting for Optimally Scheduling Distributed Practice of Skills 2 | % \alert{Benoît Choffin}, Fabrice Popineau, Yolaine Bourda & Jill-Jênn Vie\newline\newline\scriptsize LRI/CentraleSupélec - University of Paris-Saclay | RIKEN AIP 3 | % EDM 2019, Montréal | July 4, 2019 4 | --- 5 | theme: Frankfurt 6 | institute: \includegraphics[height=1.2cm]{figures/logo_lri.jpeg} \quad \includegraphics[height=1.3cm]{figures/LogoCS1.png} \quad \includegraphics[height=1.3cm]{figures/logo_UP_saclay_final.png} \quad \includegraphics[height=1cm]{figures/aip.png} 7 | section-titles: false 8 | handout: true 9 | biblio-style: authoryear 10 | header-includes: 11 | - \usepackage{booktabs} 12 | - \usepackage{makecell} 13 | - \usepackage{multicol} 14 | - \usepackage{multirow} 15 | - \usepackage{subfig} 16 | - \usepackage{bm} 17 | - \DeclareMathOperator\logit{logit} 18 | biblatexoptions: 19 | - maxbibnames=99 20 | - maxcitenames=5 21 | --- 22 | 23 | # Introduction 24 | 25 | ## Mitigating human forgetting with spaced repetition 26 | 27 | * Human learners face a constant trade-off between **acquiring new knowledge** and **reviewing old knowledge** \bigskip 28 | * Cognitive science provides simple + robust learning strategies for improving LT memory 29 | * \alert{Spaced repetition} 30 | * \alert{Testing} \bigskip 31 | * Can we do better? **Yes**, by providing students with an _adaptive_ and _personalized_ spacing scheduler. 32 | 33 | ## Mitigating human forgetting with spaced repetition 34 | 35 | \raisebox{.5cm}{\includegraphics[width=0.5\textwidth]{figures/leitner.png}}\includegraphics[width=0.5\textwidth]{figures/anki.png} 36 | 37 | Ex. select the item whose memory strength is closest to a threshold $\theta$ [\cite{lindsey2014improving}] $\rightarrow$ "almost forgotten" 38 | 39 | ## Beyond flashcard memorization 40 | 41 | **Problem**: these algorithms are designed for optimizing _pure memorization_ (of facts, vocabulary,...) 42 | 43 | * In real-world educational settings, students also need to learn to master and remember a set of **skills** 44 | 45 | * In that case, specific items are the only way to practice one or multiple skills because _we do not have to memorize the content directly_ 46 | 47 | * Traditional adaptive spacing schedulers are **not applicable for learning skills** 48 | 49 | ## Extension to skill practice and review 50 | 51 | \begin{minipage}{0.4\linewidth} 52 | \textcolor{blue!80}{Item}-\textcolor{green!50!black}{skill} relationships require expert labor and are synthesized inside a binary q-matrix $\rightarrow$ 53 | \end{minipage}\begin{minipage}{0.6\linewidth} 54 | \scriptsize 55 | \input{tables/dummy_qmat.tex} 56 | \end{minipage} 57 | 58 | \centering 59 | \includegraphics[width=10cm]{figures/item_skills_relations.pdf} 60 | 61 | ## Limitations of student models 62 | 63 | We need to be able to infer skill memory strength and dynamics, however in the student modeling literature: 64 | 65 | * some models leverage item-skills relationships 66 | * some others incorporate forgetting 67 | 68 | But none does both! 69 | 70 | ## Our contribution 71 | 72 | We take a model-based approach for this task. 73 | 74 | 1. Traditional adaptive spacing algorithms can be extended to review and practice skills (not only flashcards).\bigskip 75 | 2. We developed a new student _learning_ and _forgetting_ model that leverages item-skill relationships: \alert{\textbf{DAS3H}}. 76 | * DAS3H outperforms 4 SOTA student models on 3 datasets. 77 | * Incorporating skill info + forgetting effect improves over models that consider one or the other. 78 | * Using precise temporal information on past skill practice + assuming different learning/forgetting curves \alert{for different skills} improves performance. 79 | 80 | ## Outline 81 | 82 | 1. DASH\bigskip 83 | 2. Our model DAS3H\bigskip 84 | 3. Experiments\bigskip 85 | 4. Conclusion 86 | 87 | # DASH 88 | 89 | ## DASH 90 | 91 | $\rightarrow$ DASH = item **D**ifficulty, student **A**bility, and **S**tudent **H**istory 92 | 93 | DASH [\cite{lindsey2014improving}] bridges the gap between _Factor Analysis models_ and _memory models_: 94 | 95 | $$\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma(\alpha_s - \delta_j + h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}))$$ 96 | 97 | where: 98 | 99 | * $Y_{s,j,t}$ binary correctness of student $s$ answering item $j$ at time $t$; 100 | * $\sigma$ logistic function; 101 | * $\alpha_s$ ability of student $s$; 102 | * $\delta_j$ difficulty of item $j$; 103 | * $h_{\theta}$ summarizes the effect of the $l-1$ previous attempts of $s$ on $j$ at times $\mathrm{t}_{s,j,1:l-1}$ + the binary outcomes $\mathrm{y}_{s,j,1:l-1}$. 104 | 105 | ## DASH 106 | 107 | Lindsey et al. chose: 108 | \begin{align*} 109 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \sum_{w=0}^{W-1} & \theta_{2w+1}\log(1+c_{s,j,w}) \\ 110 | &- \theta_{2w+2}\log(1+a_{s,j,w}) 111 | \end{align*} 112 | 113 | where: 114 | 115 | * $w$ indexes a set of expanding \alert{time windows}; 116 | * $c_{s,j,w}$ number of correct answers of $s$ on $j$ in time window $w$; 117 | * $a_{s,j,w}$ number of attempts of $s$ on $j$ in time window $w$; 118 | * $\theta$ is _learned_ by DASH. 119 | 120 | ## DASH 121 | 122 | Assuming that the set of time windows is \{1, 7, 14, $+\infty$\}: 123 | 124 | \centering 125 | \includegraphics[width=10cm]{figures/time_windows.pdf} 126 | 127 | ## DASH 128 | 129 | DASH: 130 | 131 | * accounts for both _learning_ and _forgetting_ processes; 132 | 133 | * induces diminishing returns of practice inside a time window (log-counts); 134 | 135 | * has a time module $h_{\theta}$ inspired by ACT-R [\cite{anderson1997act}] and MCM [\cite{pashler2009predicting}]. 136 | 137 | # DAS3H 138 | 139 | ## From DASH to DAS3H 140 | 141 | * DASH 142 | * outperforms a hierarchical Bayesian IRT on Lindsey et al. experimental data (vocabulary learning). 143 | * was successfully used to adaptively personalize item review in a real-world cognitive psychology experiment. 144 | \bigskip 145 | * However, DASH 146 | * does not handle multiple skill item tagging $\rightarrow$ useful to account for knowledge transfer from one item to another. 147 | * assumes that memory decays at the same rate for every KC. 148 | 149 | ## Our model DAS3H 150 | 151 | We extend DASH in **3 ways**: 152 | \begin{enumerate} 153 | \item Extension to handle multiple skills tagging: new temporal module $h_{\theta}$ that also takes the multiple skills into account. 154 | \begin{itemize} 155 | \item Influence of the temporal distribution of past attempts and outcomes can differ from one skill to another. 156 | \end{itemize} 157 | \item Estimation of easiness parameters for \textit{each} item $j$ and skill $k$; 158 | \item Use of KTMs [\cite{Vie2019}] instead of mere logistic regression for multidimensional feature embeddings and pairwise interactions. 159 | \end{enumerate} 160 | 161 | ## Knowledge Tracing Machines (KTMs) 162 | 163 | Just pick features (ex. \textcolor{blue!80}{user}, \textcolor{orange}{item}, \textcolor{green!50!black}{skill}) and you get a student model 164 | 165 | Each feature $k$ is modeled by bias $\alert{w_k}$ and embedding $\alert{\bm{v_k}}$.\vspace{2mm} 166 | \begin{columns} 167 | \begin{column}{0.47\linewidth} 168 | \includegraphics[width=\linewidth]{figures/fm.pdf} 169 | \end{column} 170 | \begin{column}{0.53\linewidth} 171 | \includegraphics[width=\linewidth]{figures/fm2.pdf} 172 | \end{column} 173 | \end{columns}\vspace{-2mm} 174 | 175 | \hfill $\logit p(\bm{x}) = \mu + \underbrace{\sum_{k = 1}^N \alert{w_k} x_k}_\textnormal{logistic regression} + \underbrace{\sum_{1 \leq k < l \leq N} x_k x_l \langle \alert{\bm{v_k}}, \alert{\bm{v_l}} \rangle}_\textnormal{pairwise relationships}$ 176 | 177 | \small 178 | \fullcite{Vie2019} 179 | 180 | ## Our model DAS3H 181 | 182 | $\rightarrow$ DAS3H = item **D**ifficulty, student **A**bility, **S**kill and **S**tudent **S**kill practice **H**istory 183 | 184 | For an embedding dimension of $d=0$, DAS3H is: 185 | 186 | $\mathbb{P}\left(Y_{s,j,t}=1\right)=\sigma (\alpha_s - \delta_j + \underbrace{\alert{\sum_{k \in KC(j)} \beta_k}}_{\text{skill easiness biases}} +h_{\theta}\left(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}\right))$. 187 | 188 | We choose: 189 | \begin{align*} 190 | h_{\theta}(\mathrm{t}_{s,j,1:l},\mathrm{y}_{s,j,1:l-1}) = \alert{\sum_{k \in KC(j)}}&\sum_{w=0}^{W-1}\theta_{\alert{k},2w+1}\log(1+c_{s,\alert{k},w})\\ 191 | &- \theta_{\alert{k},2w+2}\log(1+a_{s,\alert{k},w}). 192 | \end{align*} 193 | 194 | $\rightarrow$ Now, $h_{\theta}$ can be seen as a sum of _skill_ memory strengths! 195 | 196 | # Experiments 197 | 198 | ## Experiments 199 | 200 | 1. Experimental setting 201 | 202 | 2. Contenders 203 | 204 | 3. Datasets 205 | 206 | 4. Main results 207 | 208 | 5. Further analyses 209 | 210 | ## Experimental setting 211 | 212 | \begin{block}{How to compare ML models?} 213 | Train the models on one part of the dataset 214 | 215 | Test on the other part 216 | 217 | Gather prediction metrics, compare the models 218 | \end{block} 219 | 220 | * **5-fold cross-validation** at the student level: predicting binary outcomes on \alert{unseen} students (_strong generalization_) 221 | * Distributional assumptions to \alert{avoid overfitting}: 222 | * When $d=0$: L2 regularization/$\mathcal{N}(0,1)$ prior 223 | * When $d > 0$: hierarchical distributional scheme 224 | * Same time windows as Lindsey et al.: {1/24,1,7,30,+$\infty$} 225 | 226 | ## Contenders 227 | 228 | 5 contenders: 229 | 230 | * \alert{DAS3H} 231 | * DASH [\cite{lindsey2014improving}] 232 | * IRT/MIRT [\cite{van2013handbook}] 233 | * PFA [\cite{pavlik2009performance}] 234 | * AFM [\cite{cen2006learning}] 235 | 236 | Every model was cast within the KTM framework $\rightarrow$ 3 embedding dimensions (0, 5 \& 20) + sparse feature encoding. 237 | 238 | \tiny 239 | | | users | items | skills | wins | fails | attempts | tw [KC] | tw [items] | 240 | |:-:|:-----:|:-----:|:------:|:----:|:-----:|:--------:|:-----:|:--------:| 241 | | **DAS3H** | x | x | x | x | | x | x | | 242 | | DASH | x | x | | x | | x | | x | 243 | | IRT/MIRT | x | x | | | | | | | 244 | | PFA | | | x | x | x | | | | 245 | | AFM | | | x | | | x | | | 246 | 247 | ## Datasets 248 | 249 | * 3 datasets: ASSISTments 2012-2013, Bridge to Algebra 2006-2007 \& Algebra I 2005-2006 (KDD Cup 2010) 250 | * Data consists of logs of student-item interactions on 2 ITS 251 | * Selected because they contain _both_ timestamps and items with multiple skills $\rightarrow$ rare species in the EDM datasets fauna 252 | * Preprocessing scheme: removed users with < 10 interactions, interactions with \texttt{NaN} skills, duplicates 253 | 254 | \tiny 255 | \input{tables/datasets_caracs.tex} 256 | 257 | ## Main results 258 | 259 | \input{tables/exp_results.tex} 260 | $\rightarrow$ On every dataset, **DAS3H outperforms** the other models (between +0.04 and +0.05 AUC compared to DASH). 261 | 262 | ## Main results 263 | 264 | \begin{figure}% 265 | \centering 266 | \subfloat[DAS3H]{{\includegraphics[width=5cm]{figures/comp_dim_das3h.pdf} }}% 267 | \: 268 | \subfloat[IRT]{{\includegraphics[width=5cm]{figures/comp_dim_irt.pdf} }}% 269 | \caption{AUC comparison on two models for $d=0, 5$ and 20 (all datasets, 5-fold cross-validation).}% 270 | \label{dim_com}% 271 | \end{figure} 272 | \vspace{-5mm} 273 | \raggedright 274 | $\rightarrow$ The impact of the multidim feature embeddings is small and not consistent across datasets and models (+ unstable sometimes). 275 | 276 | ## Importance of time windows 277 | 278 | \centering 279 | \begin{figure} 280 | \includegraphics[width=5.5cm]{figures/pairwise_comp_all_datasets.pdf} 281 | \caption{AUC comparison on DAS3H \textit{with} and \textit{without} time windows features (all datasets, 5-fold cross-validation).} 282 | \end{figure} 283 | \vspace{-3mm} 284 | \raggedright 285 | Without time windows, $h_{\theta}$ counts past wins and attempts in DAS3H. 286 | $\rightarrow$ Using \alert{temporal distribution of past skill practice} instead of simple win/fail counters improves AUC performance: the _**when**_ matters. 287 | 288 | ## Importance of different learning/forgetting curves per skill 289 | \scriptsize 290 | \input{tables/comp_DAS3H_multiparams.tex} 291 | 292 | \normalsize 293 | $\rightarrow$ Assuming **different learning and forgetting curves for different skills** in DAS3H consistently yields better predictive power: some skills are easier to learn and slower to forget. 294 | 295 | # Conclusion 296 | 297 | ## In a nutshell 298 | 299 | * Human forgetting is _ubiquitous_ but luckily: 300 | * \alert{Cognitive science} gives us efficient and simple learning strategies 301 | * \alert{ML} can build us tools to **personalize these strategies** and further improve LT memory retention 302 | 303 | * Adaptive spacing algorithms have been focusing on _pure memorization_ (e.g. vocabulary learning) 304 | * They can be used for \alert{optimizing practice and retention of skills} 305 | 306 | * Our student model **DAS3H** 307 | * incorporates information on _skills_ **and** _forgetting_ to predict learner performance 308 | * shows higher predictive power than other SOTA student models 309 | * fits our model-based approach for optimally scheduling skill review 310 | 311 | ## Thanks for your attention! 312 | 313 | Our paper is already available at: 314 | 315 | \centering 316 | `https://arxiv.org/abs/1905.06873` 317 | 318 | \raggedright 319 | Python code is freely available on my GitHub page: 320 | 321 | \centering 322 | `https://github.com/BenoitChoffin/das3h` ! 323 | 324 | \raggedright 325 | To send me questions about our paper or my research work: 326 | 327 | \centering 328 | `benoit.choffin@lri.fr` 329 | 330 | -------------------------------------------------------------------------------- /slides/presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/slides/presentation.pdf -------------------------------------------------------------------------------- /slides/tables/comp_DAS3H_multiparams.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | \begin{tabular}{ccccc} 4 | \toprule 5 | & $d$ & bridge06 & algebra05 & assist12 \\ 6 | \midrule 7 | \parbox[t]{2.2mm}{\multirow{3}{*}{\rotatebox[origin=c]{90}{\alert{DAS3H}}}} & 0 & $\textbf{0.790} \pm 0.004$ & $\textbf{0.826} \pm 0.003$ & $0.739 \pm 0.001$\\ 8 | & 5 & $\textbf{0.791} \pm 0.005$ & $0.818 \pm 0.004$ & $\textbf{0.744} \pm 0.002$ \\ 9 | & 20 & $0.776 \pm 0.023$ & $0.817 \pm 0.005$ & $0.740 \pm 0.001$ \\[1em] 10 | \parbox[t]{2.2mm}{\multirow{3}{*}{\raisebox{-2ex}[0.2ex]{\rotatebox[origin=c]{90}{$\text{DAS3H}_\text{1p}$}}}} & 0 & $0.757 \pm 0.003$ & $0.789 \pm 0.009$ & $0.701 \pm 0.002$ \\ 11 | & 5 & $0.757 \pm 0.005$ & $0.787 \pm 0.005$ & $0.700 \pm 0.001$ \\ 12 | & 20 & $0.757 \pm 0.003$ & $0.789 \pm 0.006$ & 0.701 (<1e-3) \\[0.5em] 13 | \bottomrule 14 | \end{tabular} 15 | \caption{AUC comparison between DAS3H and $\text{DAS3H}_{1p}$ (all datasets, 5-fold cross-validation).} 16 | \end{table} -------------------------------------------------------------------------------- /slides/tables/datasets_caracs.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | \begin{tabular}{@{}lrrrrrrrr@{}} 4 | \toprule 5 | Dataset & Users & Items & Skills & Interactions & \makecell[cl]{Mean \\ correctness} & 6 | \makecell[cl]{Skills \\ per item} & 7 | \makecell[cl]{Mean \\ skill delay} & 8 | \makecell[cl]{Mean \\ study period} \\ 9 | \midrule 10 | assist12 & 24,750 & 52,976 & 265 & 2,692,889 & 0.696 & 1.000 & 8.54 & 98.3 \\ 11 | bridge06 & 1,135 & 129,263 & 493 & 1,817,427 & 0.832 & 1.013 & 0.83 & 149.5 \\ 12 | algebra05 & 569 & 173,113 & 112 & 607,000 & 0.755 & 1.363 & 3.36 & 109.9 \\ 13 | \bottomrule 14 | \end{tabular} 15 | \caption{Datasets characteristics} 16 | \label{data_caracs} 17 | \end{table} -------------------------------------------------------------------------------- /slides/tables/datasets_caracs_jdse.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | \begin{tabular}{@{}lrrrrrr@{}} 4 | \toprule 5 | Dataset & Users & Items & Skills & Interactions & \makecell[cl]{Mean \\ correctness} & 6 | \makecell[cl]{Skills \\ per item} \\ 7 | \midrule 8 | assist12 & 24,750 & 52,976 & 265 & 2,692,889 & 0.696 & 1.000 \\ 9 | bridge06 & 1,135 & 129,263 & 493 & 1,817,427 & 0.832 & 1.013 \\ 10 | algebra05 & 569 & 173,113 & 112 & 607,000 & 0.755 & 1.363 \\ 11 | \bottomrule 12 | \end{tabular} 13 | \caption{Datasets characteristics} 14 | \label{data_caracs} 15 | \end{table} -------------------------------------------------------------------------------- /slides/tables/dummy_qmat.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | \begin{tabular}{c|ccc} 4 | \toprule 5 | & skill 1 & skill 2 & skill 3 \\ 6 | \midrule 7 | item 1 & 1 & 0 & 0 \\ 8 | item 2 & 0 & 1 & 1 \\ 9 | item 3 & 0 & 1 & 0 \\ 10 | item 4 & 1 & 0 & 1 \\ 11 | item 5 & 0 & 0 & 1 \\ 12 | \bottomrule 13 | \end{tabular} 14 | \end{table} -------------------------------------------------------------------------------- /slides/tables/exp_results.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \setlength{\abovecaptionskip}{4mm} 3 | \centering 4 | \begin{tabular}{cccc} 5 | \toprule 6 | model & algebra05 & bridge06 & assist12 \\ 7 | \midrule 8 | \alert{DAS3H} & $\textbf{0.826} \pm 0.003$ & $\textbf{0.790} \pm 0.004$ & $\textbf{0.739} \pm 0.001$ \\ 9 | DASH & $0.773 \pm 0.002$ & $0.749 \pm 0.002$ & $0.703 \pm 0.002$ \\ 10 | IRT & $0.771 \pm 0.007$ & $0.747 \pm 0.002$ & $0.702 \pm 0.001$ \\ 11 | PFA & $0.744 \pm 0.004$ & $0.739 \pm 0.003$ & $0.668 \pm 0.002$ \\ 12 | AFM & $0.707 \pm 0.005$ & $0.692 \pm 0.002$ & $0.608 \pm 0.002$ \\ 13 | \bottomrule 14 | \end{tabular} 15 | \caption{AUC comparison between the different student models for an embedding dimension $d=0$ (all datasets, 5-fold cross-validation).} 16 | \label{all_res} 17 | \end{table} -------------------------------------------------------------------------------- /split_data.py: -------------------------------------------------------------------------------- 1 | import dataio 2 | import argparse 3 | import pandas as pd 4 | 5 | parser = argparse.ArgumentParser(description='Split data into train-test folds.') 6 | parser.add_argument('--dataset_name', type=str, nargs='?') 7 | parser.add_argument('--generalization', type=str, nargs='?') 8 | parser.add_argument('--n_folds', type=int, nargs='?', default=5) 9 | parser.add_argument('--perc_init', type=float, nargs='?', default=.2) 10 | 11 | options = parser.parse_args() 12 | 13 | if __name__ == "__main__": 14 | df = pd.read_csv("data/"+options.dataset_name+"/preprocessed_data.csv") 15 | if options.generalization == "strongest": 16 | dataio.save_strongest_folds(df, options.dataset_name, options.n_folds) 17 | elif options.generalization == "pseudostrong": 18 | dataio.save_pseudostrong_folds(df, options.dataset_name, options.perc_init, options.n_folds) 19 | else: 20 | print("Unknown generalization scheme.") -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenoitChoffin/das3h/e77770af5e18ba0b9841840be67cfa9b5a91e449/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_encode_data.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import pandas as pd 4 | from scipy.sparse import load_npz 5 | from encode import df_to_sparse 6 | 7 | class EncodeTestCase(unittest.TestCase): 8 | def setUp(self): 9 | self.data = pd.read_csv("data/dummy/preprocessed_data.csv") 10 | self.q_mat = load_npz("data/dummy/q_mat.npz").toarray() 11 | 12 | def test_ui(self): 13 | # Test IRT/MIRT encoding 14 | X_ui = df_to_sparse(self.data, self.q_mat, ["users", "items"]).toarray() 15 | # Sort array 16 | X_ui = X_ui[X_ui[:,4].argsort(),5:] # Collect only sparse columns 17 | irt_features = np.array(pd.read_csv("data/dummy/irt.csv", sep=';')) 18 | self.assertSequenceEqual(X_ui.tolist(), irt_features.tolist(), 19 | "Inconsistent IRT features") 20 | 21 | def test_afm(self): 22 | # Test AFM encoding 23 | X_afm = df_to_sparse(self.data, self.q_mat, ["skills", "attempts"]).toarray() 24 | # Sort array 25 | X_afm = X_afm[X_afm[:,4].argsort(),5:] # Collect only sparse columns 26 | afm_features = np.array(pd.read_csv("data/dummy/afm.csv", sep=';')) 27 | self.assertSequenceEqual(X_afm.tolist(), afm_features.tolist(), 28 | "Inconsistent AFM features") 29 | 30 | def test_pfa(self): 31 | # Test PFA encoding 32 | X_pfa = df_to_sparse(self.data, self.q_mat, ["skills", "wins", "fails"]).toarray() 33 | # Sort array 34 | X_pfa = X_pfa[X_pfa[:,4].argsort(),5:] # Collect only sparse columns 35 | pfa_features = np.array(pd.read_csv("data/dummy/pfa.csv", sep=';')) 36 | self.assertSequenceEqual(X_pfa.tolist(), pfa_features.tolist(), 37 | "Inconsistent PFA features") 38 | 39 | def test_dash(self): 40 | # Test DASH encoding 41 | X_uiwat2 = df_to_sparse(self.data, self.q_mat, ["users", "items", "wins", "attempts"], tw="tw_items").toarray() 42 | # Sort array 43 | X_uiwat2 = X_uiwat2[X_uiwat2[:,4].argsort(),5:] # Collect only sparse columns 44 | # Convert to simple counters to avoid using assertAlmostEqual and floats 45 | X_uiwat2[:,-10:] = np.exp(X_uiwat2[:,-10:])-1 46 | 47 | dash_features = np.array(pd.read_csv("data/dummy/dash.csv", sep=';')) 48 | self.assertSequenceEqual(X_uiwat2.tolist(), dash_features.tolist(), 49 | "Inconsistent DASH features") 50 | 51 | def test_das3h(self): 52 | # Test DAS3H encoding 53 | X_uiswat1 = df_to_sparse(self.data, self.q_mat, ["users", "items", "skills", "wins", "attempts"], 54 | tw="tw_kc").toarray() 55 | # Sort array 56 | X_uiswat1 = X_uiswat1[X_uiswat1[:,4].argsort(),5:] # Collect only sparse columns 57 | # Convert to simple counters to avoid using assertAlmostEqual and floats 58 | X_uiswat1[:,-30:] = np.exp(X_uiswat1[:,-30:])-1 59 | X_uiswat1 = X_uiswat1.astype(np.int64) 60 | 61 | das3h_features = np.array(pd.read_csv("data/dummy/das3h.csv", sep=';')) 62 | self.assertSequenceEqual(X_uiswat1.tolist(), das3h_features.tolist(), 63 | "Inconsistent DAS3H features") 64 | 65 | if __name__ == '__main__': 66 | unittest.main() 67 | 68 | 69 | -------------------------------------------------------------------------------- /tests/test_this_queue.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from utils.this_queue import OurQueue 3 | 4 | # From JJ's ktm repo: https://github.com/jilljenn/ktm 5 | 6 | class TestOurQueue(unittest.TestCase): 7 | 8 | def test_simple(self): 9 | q = OurQueue() 10 | q.push(0) 11 | q.push(0.8 * 3600 * 24) 12 | q.push(5 * 3600 * 24) 13 | q.push(40 * 3600 * 24) 14 | self.assertEqual(q.get_counters(40 * 3600 * 24), [4, 1, 1, 1, 1]) 15 | 16 | def test_complex(self): 17 | q = OurQueue() 18 | q.push(0) 19 | q.push(10) 20 | q.push(3599) 21 | q.push(3600) 22 | q.push(3601) 23 | q.push(3600 * 24) 24 | q.push(3600 * 24 + 1) 25 | q.push(3600 * 24 * 7) 26 | q.push(3600 * 24 * 7 + 1) 27 | q.push(3600 * 24 * 7 * 30) 28 | q.push(3600 * 24 * 7 * 30 + 1) 29 | self.assertEqual(q.get_counters(3600 * 24 * 7 * 30 + 1), [11, 2, 2, 2, 2]) 30 | -------------------------------------------------------------------------------- /utils/this_queue.py: -------------------------------------------------------------------------------- 1 | class OurQueue: 2 | """ 3 | A queue for counting efficiently the number of events within time windows. 4 | Complexity: 5 | All operators in amortized O(W) time where W is the number of windows. 6 | 7 | From JJ's KTM repository: https://github.com/jilljenn/ktm. 8 | """ 9 | def __init__(self): 10 | #self.now = None 11 | self.queue = [] 12 | self.window_lengths = [3600 * 24 * 30, 3600 * 24 * 7, 3600 * 24, 3600] 13 | self.cursors = [0] * len(self.window_lengths) 14 | 15 | def __len__(self): 16 | return len(self.queue) 17 | 18 | def get_counters(self, t): 19 | self.update_cursors(t) 20 | return [len(self.queue)] + [len(self.queue) - cursor 21 | for cursor in self.cursors] 22 | 23 | def push(self, time): 24 | self.queue.append(time) 25 | 26 | def update_cursors(self, t): 27 | for pos, length in enumerate(self.window_lengths): 28 | while (self.cursors[pos] < len(self.queue) and 29 | t - self.queue[self.cursors[pos]] >= length): 30 | self.cursors[pos] += 1 31 | --------------------------------------------------------------------------------