├── edmine ├── __init__.py ├── data │ └── __init__.py ├── env │ ├── __init__.py │ └── learning_path_recommendation │ │ ├── __init__.py │ │ └── KTEnv.py ├── llm │ ├── __init__.py │ └── dspy │ │ ├── __init__.py │ │ └── remote_llm │ │ └── __init__.py ├── config │ ├── __init__.py │ ├── model.py │ ├── basic.py │ └── env.py ├── constant │ └── __init__.py ├── dataset │ ├── __init__.py │ └── CognitiveDiagnosisDataset.py ├── evaluator │ ├── __init__.py │ ├── DLEvaluator.py │ └── SequentialDLKTEvaluator4FTAcc.py ├── metric │ ├── __init__.py │ ├── learning_path_recommendation.py │ └── knowledge_tracing.py ├── model │ ├── __init__.py │ ├── module │ │ ├── __init__.py │ │ ├── Clipper.py │ │ ├── calculation.py │ │ ├── Graph.py │ │ └── PredictorLayer.py │ ├── non_sequential_kt_model │ │ └── __init__.py │ ├── sequential_kt_model │ │ └── __init__.py │ ├── cognitive_diagnosis_model │ │ ├── __init__.py │ │ ├── DLCognitiveDiagnosisModel.py │ │ └── MIRT.py │ ├── exercise_recommendation_model │ │ ├── __init__.py │ │ ├── DLExerciseRecommendationModel.py │ │ └── ExerciseRecommendationModel.py │ ├── learning_path_recommendation_agent │ │ ├── __init__.py │ │ ├── RLBasedLPRAgent.py │ │ └── RandomRecQCAgent.py │ ├── registry.py │ ├── ExerciseRecommendationModel.py │ ├── LearningPathRecommendationAgent.py │ ├── utils.py │ ├── CognitiveDiagnosisModel.py │ ├── KnowledgeTracingModel.py │ ├── load_model.py │ ├── load_agent.py │ └── loss.py ├── roster │ ├── __init__.py │ ├── DLCDRoster.py │ └── DLKTRoster.py ├── trainer │ ├── __init__.py │ ├── utils.py │ ├── DLCognitiveDiagnosisTrainer.py │ └── SequentialDLKTTrainer.py └── utils │ ├── __init__.py │ ├── log.py │ ├── use_dgl.py │ ├── check.py │ ├── calculate.py │ └── use_torch.py ├── examples ├── cognitive_diagnosis │ ├── train │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── mirt.py │ │ ├── set_params │ │ │ ├── __init__.py │ │ │ ├── grad_acc_params.py │ │ │ ├── clip_params.py │ │ │ ├── scheduler_params.py │ │ │ └── congnitive_diagnosis_params.py │ │ ├── rcd_search_params.py │ │ ├── mirt_search_params.py │ │ ├── hier_cdf_search_params.py │ │ ├── irt_search_params.py │ │ ├── hyper_cd_search_params.py │ │ ├── utils.py │ │ ├── ncd_search_params.py │ │ ├── dina_search_params.py │ │ ├── rcd.py │ │ ├── mirt.py │ │ ├── hier_cdf.py │ │ ├── irt.py │ │ ├── dina.py │ │ ├── ncd.py │ │ └── hyper_cd.py │ ├── prepare_dataset │ │ ├── config.py │ │ └── ncd_setting.py │ ├── rcd │ │ ├── config.py │ │ └── build_u_e_graph.py │ ├── hier_cdf │ │ └── config.py │ ├── hyper_cd │ │ └── config.py │ └── evaluate │ │ └── dlcd.py ├── knowledge_tracing │ ├── train │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── rekt.py │ │ │ ├── mikt.py │ │ │ ├── abqr.py │ │ │ └── ckt.py │ │ ├── set_params │ │ │ ├── __init__.py │ │ │ ├── grad_acc_params.py │ │ │ ├── clip_params.py │ │ │ ├── scheduler_params.py │ │ │ └── sequential_kt_params.py │ │ ├── hawkes_kt_search_params.py │ │ ├── abqr_search_params.py │ │ ├── dkvmn_search_params.py │ │ ├── atdkt_search_params.py │ │ ├── dkt_forget_search_params.py │ │ ├── dkt_search_params.py │ │ ├── dimkt_search_params.py │ │ └── qdkt_search_params.py │ ├── prepare_dataset │ │ ├── config.py │ │ ├── pykt_setting.py │ │ ├── sub_from_test_data.py │ │ └── sfkt_setting.py │ ├── abqr │ │ ├── config.py │ │ └── get_graph.py │ ├── dimkt │ │ └── config.py │ ├── dygkt │ │ └── config.py │ ├── gikt │ │ ├── config.py │ │ └── get_graph.py │ ├── grkt │ │ └── config.py │ ├── hgkt │ │ └── config.py │ ├── lbkt │ │ └── config.py │ ├── qdckt │ │ ├── config.py │ │ └── get_difficulty.py │ ├── evaluate │ │ └── first_trans_dlkt.py │ └── mc2sc.py ├── exercise_recommendation │ ├── train │ │ ├── config │ │ │ └── __init__.py │ │ └── set_params │ │ │ ├── __init__.py │ │ │ ├── grad_acc_params.py │ │ │ ├── clip_params.py │ │ │ ├── scheduler_params.py │ │ │ └── exercise_recommendation_params.py │ ├── preprare_dataset │ │ └── config.py │ ├── kg4ex │ │ └── config.py │ ├── evaluate │ │ ├── utils.py │ │ ├── config.py │ │ └── kg4ex.py │ └── user_exercise_based_CF │ │ ├── config.py │ │ └── rec_strategy.py ├── learning_path_recommendation │ ├── train │ │ ├── config │ │ │ └── __init__.py │ │ ├── set_params │ │ │ ├── grad_acc_params.py │ │ │ ├── __init__.py │ │ │ ├── clip_params.py │ │ │ ├── epoch_trainer_params.py │ │ │ ├── step_trainer_params.py │ │ │ ├── scheduler_params.py │ │ │ └── lpr_params.py │ │ ├── utils.py │ │ ├── reinforce_search.py │ │ ├── reinforce.py │ │ ├── d3qn_search.py │ │ └── a2c.py │ ├── prepare_dataset │ │ ├── config.py │ │ └── online_setting.py │ ├── dlpr │ │ ├── config.py │ │ └── utils.py │ └── evaluate │ │ └── offline_single_goal_evaluate.py ├── set_up.py ├── roster │ ├── dlcd_roster.py │ ├── dlkt_roster.py │ └── config.py └── data_preprocess │ └── kt_data.py ├── asset └── img │ ├── pypi_icon.png │ ├── ExperimentalFlowChart.jpg │ ├── trace_related_cs_change.png │ ├── trace_selected_cs_change.png │ ├── trace_single_concept_change.png │ └── pypi_icon.svg ├── .gitignore ├── pyproject.toml ├── LICENSE └── CONTRIBUTING.md /edmine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/env/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/constant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/llm/dspy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/metric/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/roster/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/llm/dspy/remote_llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/non_sequential_kt_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/sequential_kt_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/env/learning_path_recommendation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/cognitive_diagnosis_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/exercise_recommendation_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /edmine/model/learning_path_recommendation_agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /asset/img/pypi_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhijieXiong/pyedmine/HEAD/asset/img/pypi_icon.png -------------------------------------------------------------------------------- /asset/img/ExperimentalFlowChart.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhijieXiong/pyedmine/HEAD/asset/img/ExperimentalFlowChart.jpg -------------------------------------------------------------------------------- /asset/img/trace_related_cs_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhijieXiong/pyedmine/HEAD/asset/img/trace_related_cs_change.png -------------------------------------------------------------------------------- /asset/img/trace_selected_cs_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhijieXiong/pyedmine/HEAD/asset/img/trace_selected_cs_change.png -------------------------------------------------------------------------------- /asset/img/trace_single_concept_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhijieXiong/pyedmine/HEAD/asset/img/trace_single_concept_change.png -------------------------------------------------------------------------------- /edmine/utils/log.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def get_now_time(): 5 | now = datetime.datetime.now() 6 | return now.strftime("%Y-%m-%d %H:%M:%S") 7 | -------------------------------------------------------------------------------- /edmine/model/registry.py: -------------------------------------------------------------------------------- 1 | MODEL_REGISTRY = {} 2 | 3 | 4 | def register_model(name): 5 | def decorator(cls): 6 | MODEL_REGISTRY[name] = cls 7 | return cls 8 | return decorator 9 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/set_params/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_params import setup_clip_args 2 | from .scheduler_params import setup_scheduler_args 3 | from .sequential_kt_params import setup_common_args 4 | from .grad_acc_params import setup_grad_acc_args -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/set_params/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_params import setup_clip_args 2 | from .scheduler_params import setup_scheduler_args 3 | from .congnitive_diagnosis_params import setup_common_args 4 | from .grad_acc_params import setup_grad_acc_args -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/set_params/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_params import setup_clip_args 2 | from .scheduler_params import setup_scheduler_args 3 | from .exercise_recommendation_params import setup_common_args 4 | from .grad_acc_params import setup_grad_acc_args -------------------------------------------------------------------------------- /edmine/model/module/Clipper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class NoneNegClipper(object): 4 | def __init__(self): 5 | super(NoneNegClipper, self).__init__() 6 | 7 | def __call__(self, module): 8 | if hasattr(module, 'weight'): 9 | w = module.weight.data 10 | a = torch.relu(torch.neg(w)) 11 | w.add_(a) -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/set_params/grad_acc_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def setup_grad_acc_args(): 5 | parser = argparse.ArgumentParser(description="梯度累计", add_help=False) 6 | parser.add_argument("--accumulation_step", type=int, default=1, 7 | help="1表示不使用,大于1表示使用accumulation_step的梯度累计") 8 | return parser 9 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/set_params/grad_acc_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def setup_grad_acc_args(): 5 | parser = argparse.ArgumentParser(description="梯度累计", add_help=False) 6 | parser.add_argument("--accumulation_step", type=int, default=1, 7 | help="1表示不使用,大于1表示使用accumulation_step的梯度累计") 8 | return parser 9 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/set_params/grad_acc_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def setup_grad_acc_args(): 5 | parser = argparse.ArgumentParser(description="梯度累计", add_help=False) 6 | parser.add_argument("--accumulation_step", type=int, default=1, 7 | help="1表示不使用,大于1表示使用accumulation_step的梯度累计") 8 | return parser 9 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/grad_acc_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def setup_grad_acc_args(): 5 | parser = argparse.ArgumentParser(description="梯度累计", add_help=False) 6 | parser.add_argument("--accumulation_step", type=int, default=1, 7 | help="1表示不使用,大于1表示使用accumulation_step的梯度累计") 8 | return parser 9 | -------------------------------------------------------------------------------- /edmine/model/ExerciseRecommendationModel.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class ExerciseRecommendationModel(ABC): 5 | @abstractmethod 6 | def get_top_ns(self, data, top_ns): 7 | """ 8 | 根据输入的数据data(user的历史数据),返回top n推荐习题 9 | :param data: 10 | :param top_ns: 11 | :return: 12 | """ 13 | pass 14 | -------------------------------------------------------------------------------- /edmine/model/exercise_recommendation_model/DLExerciseRecommendationModel.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | from edmine.model.ExerciseRecommendationModel import ExerciseRecommendationModel 4 | 5 | 6 | class DLExerciseRecommendationModel(ExerciseRecommendationModel): 7 | @abstractmethod 8 | def train_one_step(self, one_step_data): 9 | pass 10 | 11 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip_params import setup_clip_args 2 | from .scheduler_params import setup_scheduler_args 3 | from .lpr_params import setup_common_args 4 | from .grad_acc_params import setup_grad_acc_args 5 | from .epoch_trainer_params import setup_epoch_trainer_args 6 | from .step_trainer_params import setup_step_trainer_args -------------------------------------------------------------------------------- /edmine/model/exercise_recommendation_model/ExerciseRecommendationModel.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class ExerciseRecommendationModel(ABC): 5 | @abstractmethod 6 | def get_top_ns(self, data, top_ns): 7 | """ 8 | 根据输入的数据data(user的历史数据),返回top n推荐习题 9 | :param data: 10 | :param top_ns: 11 | :return: 12 | """ 13 | pass 14 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/set_params/clip_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_clip_args(): 7 | parser = argparse.ArgumentParser(description="梯度裁剪", add_help=False) 8 | parser.add_argument("--enable_clip_grad", type=str2bool, default=False) 9 | parser.add_argument("--grad_clipped", type=float, default=10.0) 10 | return parser 11 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/set_params/clip_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_clip_args(): 7 | parser = argparse.ArgumentParser(description="梯度裁剪", add_help=False) 8 | parser.add_argument("--enable_clip_grad", type=str2bool, default=False) 9 | parser.add_argument("--grad_clipped", type=float, default=10.0) 10 | return parser 11 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/set_params/clip_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_clip_args(): 7 | parser = argparse.ArgumentParser(description="梯度裁剪", add_help=False) 8 | parser.add_argument("--enable_clip_grad", type=str2bool, default=False) 9 | parser.add_argument("--grad_clipped", type=float, default=10.0) 10 | return parser 11 | -------------------------------------------------------------------------------- /edmine/model/cognitive_diagnosis_model/DLCognitiveDiagnosisModel.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | from edmine.model.CognitiveDiagnosisModel import CognitiveDiagnosisModel 4 | 5 | 6 | class DLCognitiveDiagnosisModel(CognitiveDiagnosisModel): 7 | @abstractmethod 8 | def get_predict_loss(self, batch): 9 | pass 10 | 11 | @abstractmethod 12 | def get_predict_score(self, batch): 13 | pass -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/prepare_dataset/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/prepare_dataset/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/clip_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_clip_args(): 7 | parser = argparse.ArgumentParser(description="梯度裁剪", add_help=False) 8 | parser.add_argument("--enable_clip_grad", type=str2bool, default=False) 9 | parser.add_argument("--grad_clipped", type=float, default=10.0) 10 | return parser 11 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/preprare_dataset/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/prepare_dataset/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/rcd/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/abqr/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/dimkt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/dygkt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/gikt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/grkt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/hgkt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/lbkt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/qdckt/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/hier_cdf/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/hyper_cd/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/dlpr/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | current_file_name = inspect.getfile(inspect.currentframe()) 7 | current_dir = os.path.dirname(current_file_name) 8 | settings_path = os.path.join(current_dir, "../../settings.json") 9 | with open(settings_path, "r") as f: 10 | settings = json.load(f) 11 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 12 | MODEL_DIR = settings["MODELS_DIR"] 13 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/epoch_trainer_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_epoch_trainer_args(): 7 | parser = argparse.ArgumentParser(description="Epoch trainer的配置", add_help=False) 8 | parser.add_argument("--max_epoch", type=int, default=100) 9 | parser.add_argument("--use_early_stop", type=str2bool, default=True) 10 | parser.add_argument("--num_epoch_early_stop", type=int, default=10) 11 | return parser 12 | -------------------------------------------------------------------------------- /edmine/model/learning_path_recommendation_agent/RLBasedLPRAgent.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | from edmine.model.LearningPathRecommendationAgent import LPRAgent 4 | 5 | 6 | class RLBasedLPRAgent(LPRAgent): 7 | def __init__(self, params, objects): 8 | super().__init__(params, objects) 9 | 10 | @abstractmethod 11 | def done_data2rl_data(self, done_data): 12 | """ 13 | transform history data of done memory to rl data which is agent required 14 | """ 15 | pass -------------------------------------------------------------------------------- /examples/learning_path_recommendation/dlpr/utils.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | 4 | def check_cycles(edge_pairs): 5 | G = nx.DiGraph() 6 | G.add_edges_from(edge_pairs) 7 | cycles = list(nx.simple_cycles(G)) 8 | if cycles: 9 | print("检测到闭环!所有环如下:") 10 | for i, cycle in enumerate(cycles): 11 | # 将环补成首尾相接的路径 12 | path = " -> ".join(cycle + [cycle[0]]) 13 | print(f"环 {i}: {path}") 14 | else: 15 | print("✅ 没有闭环,知识点先修关系是有向无环图(DAG)") 16 | 17 | -------------------------------------------------------------------------------- /edmine/model/LearningPathRecommendationAgent.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | 4 | class LPRAgent: 5 | def __init__(self, params, objects): 6 | self.params = params 7 | self.objects = objects 8 | 9 | @abstractmethod 10 | def judge_done(self, memory, master_th=0.6): 11 | pass 12 | 13 | @abstractmethod 14 | def recommend_qc(self, memory, master_th=0.6, epsilon=0): 15 | pass 16 | 17 | def eval(self): 18 | pass 19 | 20 | def train(self): 21 | pass 22 | -------------------------------------------------------------------------------- /examples/set_up.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.data.FileManager import FileManager 6 | 7 | current_file_name = inspect.getfile(inspect.currentframe()) 8 | current_dir = os.path.dirname(current_file_name) 9 | settings_path = os.path.join(current_dir, "./settings.json") 10 | with open(settings_path, "r") as f: 11 | settings = json.load(f) 12 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 13 | 14 | 15 | if __name__ == "__main__": 16 | kt_file_manager = FileManager(root_dir=FILE_MANAGER_ROOT, init_dirs=True) 17 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/step_trainer_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_step_trainer_args(): 7 | parser = argparse.ArgumentParser(description="Step trainer的配置", add_help=False) 8 | parser.add_argument("--max_step", type=int, default=50000) 9 | parser.add_argument("--use_early_stop", type=str2bool, default=True) 10 | parser.add_argument("--num_early_stop", type=int, default=10, help="num_early_stop * num_step2evaluate") 11 | parser.add_argument("--num_step2evaluate", type=int, default=500) 12 | return parser 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | edmine.egg-info 2 | build 3 | .ipynb_checkpoints 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | .idea 8 | .vscode 9 | 10 | *.DS_Store 11 | **/.DS_Store 12 | wandb 13 | script 14 | script_result 15 | 16 | /examples/settings.json 17 | /examples/knowledge_tracing/llm4kt 18 | /examples/knowledge_tracing/data_analysis 19 | /examples/knowledge_tracing/model_analysis 20 | /examples/knowledge_tracing/train/dygkt.py 21 | /examples/knowledge_tracing/train/config/dygkt.py 22 | /dataset 23 | /models 24 | /save_models 25 | /dist 26 | /README4PYPI.md 27 | /release_process.txt 28 | /examples/roster/*.png 29 | /check_train.py 30 | /delete_log.py -------------------------------------------------------------------------------- /edmine/model/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import pkgutil 3 | import edmine.model.sequential_kt_model as kt_models 4 | import edmine.model.cognitive_diagnosis_model as cd_models 5 | import edmine.model.exercise_recommendation_model as er_models 6 | import edmine.model.learning_path_recommendation_agent as lpr_agents 7 | 8 | 9 | def import_all_models(): 10 | for package in [kt_models, cd_models, er_models, lpr_agents]: 11 | package_dir = package.__path__[0] 12 | prefix = package.__name__ + "." 13 | for _, name, _ in pkgutil.iter_modules([package_dir]): 14 | importlib.import_module(prefix + name) 15 | 16 | -------------------------------------------------------------------------------- /edmine/config/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from edmine.utils.use_torch import is_cuda_available, is_mps_available 4 | 5 | 6 | def config_general_dl_model(local_params, global_params): 7 | if is_cuda_available() and not local_params.get("use_cpu", False): 8 | device = "cuda" 9 | elif is_mps_available() and not local_params.get("use_cpu", False): 10 | device = "mps" 11 | else: 12 | device = "cpu" 13 | global_params["device"] = device 14 | if local_params.get("debug_mode", False): 15 | torch.autograd.set_detect_anomaly(True) 16 | global_params["seed"] = local_params.get("seed", 0) 17 | 18 | 19 | -------------------------------------------------------------------------------- /edmine/utils/use_dgl.py: -------------------------------------------------------------------------------- 1 | import dgl 2 | 3 | 4 | def build_graph4rcd(g_path, node, directed=True): 5 | g = dgl.DGLGraph() 6 | g.add_nodes(node) 7 | edge_list = [] 8 | with open(g_path, 'r') as f: 9 | for line in f.readlines(): 10 | line = line.replace('\n', '').split('\t') 11 | edge_list.append((int(line[0]), int(line[1]))) 12 | if directed: 13 | src, dst = tuple(zip(*edge_list)) 14 | g.add_edges(src, dst) 15 | return g 16 | else: 17 | src, dst = tuple(zip(*edge_list)) 18 | g.add_edges(src, dst) 19 | g.add_edges(dst, src) 20 | return g 21 | -------------------------------------------------------------------------------- /edmine/config/basic.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def config_logger(local_params, global_objects, log_path=None): 6 | global_objects["logger"] = logging.getLogger("train_log") 7 | global_objects["logger"].setLevel(4) 8 | ch = logging.StreamHandler(stream=sys.stdout) 9 | if not local_params.get("search_params", False): 10 | ch.setLevel(logging.DEBUG) 11 | else: 12 | ch.setLevel(logging.ERROR) 13 | global_objects["logger"].addHandler(ch) 14 | if log_path is not None: 15 | fh = logging.FileHandler(log_path) 16 | fh.setLevel(logging.INFO) 17 | global_objects["logger"].addHandler(fh) 18 | -------------------------------------------------------------------------------- /edmine/model/module/calculation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def wasserstein_distance_matmul(mean1, cov1, mean2, cov2): 5 | # Equation (4) 6 | mean1_2 = torch.sum(mean1**2, -1, keepdim=True) 7 | mean2_2 = torch.sum(mean2**2, -1, keepdim=True) 8 | ret = -2 * torch.matmul(mean1, mean2.transpose(-1, -2)) + mean1_2 + mean2_2.transpose(-1, -2) 9 | 10 | cov1_2 = torch.sum(cov1, -1, keepdim=True) 11 | cov2_2 = torch.sum(cov2, -1, keepdim=True) 12 | cov_ret = -2 * torch.matmul(torch.sqrt(torch.clamp(cov1, min=1e-24)), torch.sqrt(torch.clamp(cov2, min=1e-24)).transpose(-1, -2)) + cov1_2 + cov2_2.transpose(-1, -2) 13 | 14 | return ret + cov_ret -------------------------------------------------------------------------------- /edmine/model/CognitiveDiagnosisModel.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class CognitiveDiagnosisModel: 5 | @abstractmethod 6 | def get_knowledge_state(self, user_id): 7 | """ 8 | Estimates the knowledge state of users based on their interaction data. 9 | Returns a matrix where each element represents a user's mastery level (a value between 0 and 1) for a specific concept. 10 | :param user_id: 11 | :return: A matrix of shape (num_users, num_concepts) where: 12 | Each row corresponds to a user. 13 | Each column corresponds to a concept. 14 | Each element is a value between 0 and 1, representing the user's mastery level for that concept. 15 | """ 16 | pass 17 | -------------------------------------------------------------------------------- /edmine/model/KnowledgeTracingModel.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class KnowledgeTracingModel(ABC): 5 | @abstractmethod 6 | def get_knowledge_state(self, user_data): 7 | """ 8 | Estimates the knowledge state of users based on their historical interaction data. 9 | Returns a matrix where each element represents a user's mastery level (a value between 0 and 1) for a specific concept. 10 | :param user_data: A dataset containing the historical interaction records of users. 11 | :return: A matrix of shape (num_users, num_concepts) where: 12 | Each row corresponds to a user. 13 | Each column corresponds to a concept. 14 | Each element is a value between 0 and 1, representing the user's mastery level for that concept. 15 | """ 16 | pass 17 | -------------------------------------------------------------------------------- /edmine/utils/check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def check_q_table(q_table: np.ndarray): 5 | # Check if q_table is a 2D NumPy array 6 | if q_table.size == 0 or q_table.ndim != 2: 7 | raise IndexError("Input q_table must be a 2D NumPy array.") 8 | 9 | # Check if q_table contains only 0s and 1s 10 | if not np.all(np.isin(q_table, [0, 1])): 11 | raise ValueError("Input q_table must contain only 0s and 1s.") 12 | 13 | rows_check = np.any(q_table == 1, axis=1) 14 | cols_check = np.any(q_table == 1, axis=0) 15 | 16 | if not (np.all(rows_check) and np.all(cols_check)): 17 | raise ValueError("Each row and column of the input q_table has at least one value of 1.") 18 | 19 | 20 | def check_kt_seq_start(seq_start): 21 | if seq_start < 2: 22 | raise ValueError(f"seq_start must greater than 1") 23 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/set_params/scheduler_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_scheduler_args(): 7 | parser = argparse.ArgumentParser(description="scheduler配置", add_help=False) 8 | parser.add_argument("--enable_scheduler", type=str2bool, default=False) 9 | parser.add_argument("--scheduler_type", type=str, default="MultiStepLR", 10 | choices=("StepLR", "MultiStepLR")) 11 | parser.add_argument("--scheduler_step", type=int, default=10) 12 | parser.add_argument("--scheduler_milestones", type=str, default="[20, 50, 100]") 13 | parser.add_argument("--scheduler_gamma", type=float, default=0.5) 14 | parser.add_argument("--scheduler_T_max", type=int, default=10) 15 | parser.add_argument("--scheduler_eta_min", type=float, default=0.0001) 16 | return parser 17 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/set_params/scheduler_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_scheduler_args(): 7 | parser = argparse.ArgumentParser(description="scheduler配置", add_help=False) 8 | parser.add_argument("--enable_scheduler", type=str2bool, default=False) 9 | parser.add_argument("--scheduler_type", type=str, default="MultiStepLR", 10 | choices=("StepLR", "MultiStepLR")) 11 | parser.add_argument("--scheduler_step", type=int, default=10) 12 | parser.add_argument("--scheduler_milestones", type=str, default="[20, 50, 100]") 13 | parser.add_argument("--scheduler_gamma", type=float, default=0.5) 14 | parser.add_argument("--scheduler_T_max", type=int, default=10) 15 | parser.add_argument("--scheduler_eta_min", type=float, default=0.0001) 16 | return parser 17 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/set_params/scheduler_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_scheduler_args(): 7 | parser = argparse.ArgumentParser(description="scheduler配置", add_help=False) 8 | parser.add_argument("--enable_scheduler", type=str2bool, default=False) 9 | parser.add_argument("--scheduler_type", type=str, default="MultiStepLR", 10 | choices=("StepLR", "MultiStepLR")) 11 | parser.add_argument("--scheduler_step", type=int, default=10) 12 | parser.add_argument("--scheduler_milestones", type=str, default="[20, 50, 100]") 13 | parser.add_argument("--scheduler_gamma", type=float, default=0.5) 14 | parser.add_argument("--scheduler_T_max", type=int, default=10) 15 | parser.add_argument("--scheduler_eta_min", type=float, default=0.0001) 16 | return parser 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "edmine" 7 | version = "1.0.0" 8 | description = "A library of algorithms for reproducing Knowledge Tracing, Cognitive Diagnosis, Exercise Recommendation and Learning Path Recommendation models." 9 | authors = [ 10 | { name = "ZhijieXiong", email = "18800118477@163.com" }, 11 | ] 12 | readme = "README4PYPI.md" 13 | license = { text = "MIT License" } 14 | requires-python = ">=3.9" 15 | dependencies = [ 16 | ] 17 | 18 | [project.optional-dependencies] 19 | dev = [ 20 | ] 21 | 22 | [project.urls] 23 | Homepage = "https://zhijiexiong.github.io/sub-page/pyedmine/document/site/index.html" 24 | 25 | [tool.setuptools.packages.find] 26 | where = ["."] 27 | include = ["edmine*"] 28 | [tool.setuptools] 29 | package-dir = { "edmine" = "edmine" } -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/scheduler_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_scheduler_args(): 7 | parser = argparse.ArgumentParser(description="scheduler配置", add_help=False) 8 | parser.add_argument("--enable_scheduler", type=str2bool, default=False) 9 | parser.add_argument("--scheduler_type", type=str, default="MultiStepLR", 10 | choices=("StepLR", "MultiStepLR")) 11 | parser.add_argument("--scheduler_step", type=int, default=10) 12 | parser.add_argument("--scheduler_milestones", type=str, default="[20, 50, 100]") 13 | parser.add_argument("--scheduler_gamma", type=float, default=0.5) 14 | parser.add_argument("--scheduler_T_max", type=int, default=10) 15 | parser.add_argument("--scheduler_eta_min", type=float, default=0.0001) 16 | return parser 17 | -------------------------------------------------------------------------------- /edmine/roster/DLCDRoster.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class DLCDRoster: 6 | def __init__(self, params, objects): 7 | self.params = params 8 | self.objects = objects 9 | 10 | def process_batch4cd_model(self, batch): 11 | assert type(batch) in [list, np.ndarray, torch.Tensor], "type of batch must in [list, np.ndarray, torch.Tensor]" 12 | if type(batch) is not torch.Tensor: 13 | return torch.tensor(batch).long().to(self.params["device"]) 14 | else: 15 | return batch 16 | 17 | def get_knowledge_state(self, batch): 18 | model_name = self.params["roster_config"]["model_name"] 19 | model = self.objects["models"][model_name] 20 | model.eval() 21 | batch = self.process_batch4cd_model(batch) 22 | with torch.no_grad(): 23 | return model.get_knowledge_state(batch) 24 | -------------------------------------------------------------------------------- /edmine/trainer/utils.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | 3 | 4 | def create_optimizer(parameters, opt_config): 5 | if opt_config["type"] == 'sgd': 6 | optimizer = optim.SGD(parameters, **opt_config["sgd"]) 7 | elif opt_config["type"] == 'adamW': 8 | optimizer = optim.AdamW(parameters, **opt_config["adamW"]) 9 | else: 10 | optimizer = optim.Adam(parameters, **opt_config["adam"]) 11 | return optimizer 12 | 13 | 14 | def create_scheduler(optimizer, sch_config): 15 | if sch_config["type"] == "CosineAnnealingLR": 16 | scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, **sch_config["CosineAnnealingLR"]) 17 | elif sch_config["type"] == "MultiStepLR": 18 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, **sch_config["MultiStepLR"]) 19 | else: 20 | scheduler = optim.lr_scheduler.StepLR(optimizer, **sch_config["StepLR"]) 21 | return scheduler 22 | -------------------------------------------------------------------------------- /examples/roster/dlcd_roster.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from config import config_roster 4 | 5 | from edmine.roster.DLCDRoster import DLCDRoster 6 | from edmine.utils.data_io import read_kt_file 7 | 8 | 9 | if __name__ == "__main__": 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--model_dir_name", type=str, 12 | default=r"NCD@@ncd_setting@@assist2009_train_fold_0@@seed_0@@2025-03-11@01-46-58") 13 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 14 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 15 | args = parser.parse_args() 16 | params = vars(args) 17 | 18 | global_params, global_objects = config_roster(params) 19 | roster = DLCDRoster(global_params, global_objects) 20 | user_concept_mastery_level = roster.get_knowledge_state([0,1,2,3]) 21 | print(user_concept_mastery_level[0]) 22 | -------------------------------------------------------------------------------- /asset/img/pypi_icon.svg: -------------------------------------------------------------------------------- 1 | pypi: 1.0.0pypi1.0.0 -------------------------------------------------------------------------------- /edmine/trainer/DLCognitiveDiagnosisTrainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from edmine.metric.knowledge_tracing import get_kt_metric 5 | from edmine.trainer.EpochTrainer import SingleModelEpochTrainer 6 | 7 | 8 | class DLCognitiveDiagnosisTrainer(SingleModelEpochTrainer): 9 | def __init__(self, params, objects): 10 | super().__init__(params, objects) 11 | 12 | def evaluate_dataset(self, model, data_loader): 13 | model.eval() 14 | with torch.no_grad(): 15 | predict_score_all = [] 16 | ground_truth_all = [] 17 | for batch in data_loader: 18 | predict_score = model.get_predict_score(batch)["predict_score"].detach().cpu().numpy() 19 | ground_truth = batch["correctness"].detach().cpu().numpy() 20 | predict_score_all.append(predict_score) 21 | ground_truth_all.append(ground_truth) 22 | predict_score_all = np.concatenate(predict_score_all, axis=0) 23 | ground_truth_all = np.concatenate(ground_truth_all, axis=0) 24 | return get_kt_metric(ground_truth_all, predict_score_all) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Stanford Future Data Systems 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/roster/dlkt_roster.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from config import config_roster 5 | 6 | from edmine.roster.DLKTRoster import DLKTRoster 7 | from edmine.utils.data_io import read_kt_file 8 | 9 | 10 | if __name__ == "__main__": 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--model_dir_name", type=str, 13 | default=r"qDKT@@pykt_setting@@assist2009_train@@seed_0@@2025-07-18@20-22-14") 14 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 15 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 16 | parser.add_argument("--dataset_name", type=str, default="assist2009", help="for Q table") 17 | args = parser.parse_args() 18 | params = vars(args) 19 | 20 | global_params, global_objects = config_roster(params) 21 | roster = DLKTRoster(global_params, global_objects) 22 | setting_dir = global_objects["file_manager"].get_setting_dir("pykt_setting") 23 | data = read_kt_file(os.path.join(setting_dir, "assist2009_test.txt")) 24 | batch_data = data[:4] 25 | last_knowledge_state = roster.get_knowledge_state(batch_data) 26 | print(last_knowledge_state) 27 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/set_params/congnitive_diagnosis_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_common_args(): 7 | parser = argparse.ArgumentParser(description="认知诊断模型的公共配置", add_help=False) 8 | parser.add_argument("--setting_name", type=str, default="ncd_setting") 9 | parser.add_argument("--dataset_name", type=str, default="assist2009") 10 | parser.add_argument("--train_file_name", type=str, default="assist2009_train_fold_0.txt") 11 | parser.add_argument("--valid_file_name", type=str, default="assist2009_valid_fold_0.txt") 12 | # 训练策略 13 | parser.add_argument("--max_epoch", type=int, default=50) 14 | parser.add_argument("--use_early_stop", type=str2bool, default=True) 15 | parser.add_argument("--num_epoch_early_stop", type=int, default=5) 16 | # 评价指标选择 17 | parser.add_argument("--main_metric", type=str, default="AUC") 18 | parser.add_argument("--use_multi_metrics", type=str2bool, default=False) 19 | parser.add_argument("--multi_metrics", type=str, default="[('AUC', 1, 1), ('ACC', 1, 1), ('RMSE', 1, -1)]") 20 | # 其它配置 21 | parser.add_argument("--debug_mode", type=str2bool, default=False) 22 | parser.add_argument("--use_cpu", type=str2bool, default=False) 23 | parser.add_argument("--seed", type=int, default=0) 24 | return parser 25 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/set_params/sequential_kt_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_common_args(): 7 | parser = argparse.ArgumentParser(description="sequential kt模型的公共配置", add_help=False) 8 | parser.add_argument("--setting_name", type=str, default="pykt_setting") 9 | parser.add_argument("--dataset_name", type=str, default="assist2009") 10 | parser.add_argument("--train_file_name", type=str, default="assist2009_train_fold_0.txt") 11 | parser.add_argument("--valid_file_name", type=str, default="assist2009_valid_fold_0.txt") 12 | # 训练策略 13 | parser.add_argument("--max_epoch", type=int, default=200) 14 | parser.add_argument("--use_early_stop", type=str2bool, default=True) 15 | parser.add_argument("--num_epoch_early_stop", type=int, default=20) 16 | # 评价指标选择 17 | parser.add_argument("--main_metric", type=str, default="AUC") 18 | parser.add_argument("--use_multi_metrics", type=str2bool, default=False) 19 | parser.add_argument("--multi_metrics", type=str, default="[('AUC', 1, 1), ('ACC', 1, 1), ('RMSE', 1, -1)]") 20 | # 其它配置 21 | parser.add_argument("--debug_mode", type=str2bool, default=False) 22 | parser.add_argument("--use_cpu", type=str2bool, default=False) 23 | parser.add_argument("--seed", type=int, default=0) 24 | return parser 25 | -------------------------------------------------------------------------------- /edmine/model/load_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from edmine.utils.parse import str_dict2params 5 | from edmine.utils.data_io import read_json 6 | from edmine.model.registry import MODEL_REGISTRY 7 | from edmine.model.utils import import_all_models 8 | 9 | 10 | def load_dl_model(global_params, global_objects, save_model_dir, ckt_name="saved.ckt", model_name_in_ckt="best_valid"): 11 | # 自动导入所有模型模块(必须放在使用注册表之前) 12 | import_all_models() 13 | 14 | params_path = os.path.join(save_model_dir, "params.json") 15 | saved_params = read_json(params_path) 16 | global_params["models_config"] = str_dict2params(saved_params["models_config"]) 17 | 18 | ckt_path = os.path.join(save_model_dir, ckt_name) 19 | model_name = os.path.basename(save_model_dir).split("@@")[0] 20 | model_class = MODEL_REGISTRY[model_name] 21 | model = model_class(global_params, global_objects).to(global_params["device"]) 22 | if global_params["device"] == "cpu": 23 | saved_ckt = torch.load(ckt_path, map_location=torch.device('cpu'), weights_only=True) 24 | elif global_params["device"] == "mps": 25 | saved_ckt = torch.load(ckt_path, map_location=torch.device('mps'), weights_only=True) 26 | else: 27 | saved_ckt = torch.load(ckt_path, weights_only=True) 28 | model.load_state_dict(saved_ckt[model_name_in_ckt]) 29 | 30 | return model 31 | -------------------------------------------------------------------------------- /edmine/evaluator/DLEvaluator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class DLEvaluator(ABC): 6 | def __init__(self, params, objects): 7 | self.params = params 8 | self.objects = objects 9 | self.inference_results = {data_loader_name: {} for data_loader_name in self.objects["data_loaders"].keys()} 10 | 11 | def evaluate(self): 12 | model_name = self.params["evaluator_config"]["model_name"] 13 | model = self.objects["models"][model_name] 14 | self.objects["logger"].info( 15 | f"The number of parameters in {model_name} are {DLEvaluator.count_parameters(model)}" 16 | ) 17 | for data_loader_name in self.objects["data_loaders"].keys(): 18 | data_loader = self.objects["data_loaders"][data_loader_name] 19 | model.eval() 20 | with torch.no_grad(): 21 | inference_result = self.inference(model, data_loader) 22 | self.inference_results[data_loader_name] = inference_result 23 | self.log_inference_results() 24 | 25 | @staticmethod 26 | def count_parameters(model): 27 | return sum(p.numel() for p in model.parameters()) 28 | 29 | @abstractmethod 30 | def log_inference_results(self): 31 | pass 32 | 33 | @abstractmethod 34 | def inference(self, model, data_loader): 35 | pass 36 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/kg4ex/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | 6 | from edmine.data.FileManager import FileManager 7 | from edmine.config.data import config_q_table 8 | from edmine.config.model import config_general_dl_model 9 | from edmine.model.load_model import load_dl_model 10 | 11 | 12 | current_file_name = inspect.getfile(inspect.currentframe()) 13 | current_dir = os.path.dirname(current_file_name) 14 | settings_path = os.path.join(current_dir, "../../settings.json") 15 | with open(settings_path, "r") as f: 16 | settings = json.load(f) 17 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 18 | MODEL_DIR = settings["MODELS_DIR"] 19 | 20 | 21 | def config_roster(local_params): 22 | global_params = {} 23 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 24 | config_general_dl_model(local_params, global_params) 25 | 26 | config_q_table(local_params, global_params, global_objects) 27 | model_name = local_params["model_dir_name"] 28 | model_dir = os.path.join(MODEL_DIR, model_name) 29 | model = load_dl_model(global_params, global_objects, 30 | model_dir, local_params["model_file_name"], local_params["model_name_in_ckt"]) 31 | model.eval() 32 | global_params["roster_config"] = {"model_name": model_name} 33 | global_objects["models"] = {model_name: model} 34 | 35 | return global_params, global_objects 36 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/evaluate/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from edmine.utils.data_io import read_id_map_kg4ex 4 | 5 | 6 | def get_model_info(model_dir_name): 7 | model_info = model_dir_name.split("@@") 8 | model_name, setting_name, train_file_name = model_info[0], model_info[1], model_info[2] 9 | return model_name, setting_name, train_file_name 10 | 11 | 12 | def config_kg4ex(local_params, global_objects, setting_name): 13 | dataset_name = local_params["dataset_name"] 14 | setting_dir = global_objects["file_manager"].get_setting_dir(setting_name) 15 | kg4ex_dir = os.path.join(setting_dir, "KG4EX") 16 | global_objects["dataset"]["entity2id"] = read_id_map_kg4ex(os.path.join(kg4ex_dir, f'{dataset_name}_entities_kg4ex.dict')) 17 | # 存储relations 18 | relations_path = os.path.join(kg4ex_dir, "relations_kg4ex.dict") 19 | if not os.path.exists(relations_path): 20 | scores = [round(i * 0.01, 2) for i in range(101)] 21 | with open(relations_path, "w") as fs: 22 | for i, s in enumerate(scores): 23 | fs.write(f"{i}\tmlkc{s}\n") 24 | for i, s in enumerate(scores): 25 | fs.write(f"{i + 101}\tpkc{s}\n") 26 | for i, s in enumerate(scores): 27 | fs.write(f"{i + 202}\tefr{s}\n") 28 | fs.write("303\trec") 29 | global_objects["dataset"]["relation2id"] = read_id_map_kg4ex(os.path.join(kg4ex_dir, 'relations_kg4ex.dict')) -------------------------------------------------------------------------------- /edmine/model/load_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from edmine.utils.parse import str_dict2params 5 | from edmine.utils.data_io import read_json 6 | from edmine.model.registry import MODEL_REGISTRY 7 | from edmine.model.utils import import_all_models 8 | 9 | 10 | def load_lpr_agent(global_params, global_objects, save_agent_dir, ckt_name="saved.ckt"): 11 | # 自动导入所有模型模块(必须放在使用注册表之前) 12 | import_all_models() 13 | 14 | params_path = os.path.join(save_agent_dir, "params.json") 15 | saved_params = read_json(params_path) 16 | global_params["models_config"] = str_dict2params(saved_params["models_config"]) 17 | global_params["agents_config"] = str_dict2params(saved_params["agents_config"]) 18 | 19 | ckt_path = os.path.join(save_agent_dir, ckt_name) 20 | agent_name = os.path.basename(save_agent_dir).split("@@")[0] 21 | agent_class = MODEL_REGISTRY[agent_name] 22 | agent = agent_class(global_params, global_objects) 23 | if global_params["device"] == "cpu": 24 | saved_ckt = torch.load(ckt_path, map_location=torch.device('cpu'), weights_only=True) 25 | elif global_params["device"] == "mps": 26 | saved_ckt = torch.load(ckt_path, map_location=torch.device('mps'), weights_only=True) 27 | else: 28 | saved_ckt = torch.load(ckt_path, weights_only=True) 29 | for model_name, model in global_objects["lpr_models"].items(): 30 | model.load_state_dict(saved_ckt[model_name]) 31 | 32 | return agent 33 | -------------------------------------------------------------------------------- /edmine/trainer/SequentialDLKTTrainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from edmine.metric.knowledge_tracing import get_kt_metric 5 | from edmine.trainer.EpochTrainer import SingleModelEpochTrainer 6 | 7 | 8 | class SequentialDLKTTrainer(SingleModelEpochTrainer): 9 | def __init__(self, params, objects): 10 | super().__init__(params, objects) 11 | 12 | def evaluate_dataset(self, model, data_loader): 13 | model.eval() 14 | with torch.no_grad(): 15 | predict_score_all = [] 16 | ground_truth_all = [] 17 | for batch in data_loader: 18 | correctness_seq = batch["correctness_seq"] 19 | mask_bool_seq = torch.ne(batch["mask_seq"], 0) 20 | score_result = model.get_predict_score(batch) 21 | predict_score = score_result["predict_score"].detach().cpu().numpy() 22 | ground_truth = torch.masked_select(correctness_seq[:, 1:], mask_bool_seq[:, 1:]).detach().cpu().numpy() 23 | predict_score_all.append(predict_score) 24 | ground_truth_all.append(ground_truth) 25 | 26 | predict_score_all = np.concatenate(predict_score_all, axis=0) 27 | ground_truth_all = np.concatenate(ground_truth_all, axis=0) 28 | if model.model_name == "DKT_KG4EX": 29 | ground_truth_all = [1] * len(predict_score_all) 30 | 31 | return get_kt_metric(ground_truth_all, predict_score_all) 32 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/user_exercise_based_CF/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.data.FileManager import FileManager 6 | from edmine.config.data import config_q_table 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.model.load_model import load_dl_model 9 | 10 | current_file_name = inspect.getfile(inspect.currentframe()) 11 | current_dir = os.path.dirname(current_file_name) 12 | settings_path = os.path.join(current_dir, "../../settings.json") 13 | with open(settings_path, "r") as f: 14 | settings = json.load(f) 15 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 16 | MODEL_DIR = settings["MODELS_DIR"] 17 | 18 | 19 | def config_roster(local_params): 20 | global_params = {} 21 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 22 | config_general_dl_model(local_params, global_params) 23 | if local_params.get("dataset_name", False): 24 | config_q_table(local_params, global_params, global_objects) 25 | model_name = local_params["model_dir_name"] 26 | model_dir = os.path.join(MODEL_DIR, model_name) 27 | model = load_dl_model(global_params, global_objects, 28 | model_dir, local_params["model_file_name"], local_params["model_name_in_ckt"]) 29 | model.eval() 30 | global_params["roster_config"] = {"model_name": model_name} 31 | global_objects["models"] = {model_name: model} 32 | 33 | return global_params, global_objects -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PyEdmine 2 | 3 | 感谢你对 PyEdmine 的关注与支持!我们欢迎来自社区的任何形式贡献,无论是报告问题、提出建议、提交代码改进,还是分享训练成果。 4 | 5 | ## 开始之前 6 | 7 | 请先阅读我们的 [README](README.md) 和 [项目文档](https://zhijiexiong.github.io/sub-page/pyedmine/document/site/index.html),了解项目的目标、结构与核心使用流程。 8 | 9 | ## 反馈 Bug / 建议 10 | 11 | 如发现 Bug 或有改进建议,请通过 GitHub **Issue** 提出。 12 | 在 issue 中,请包含以下信息(越详细越好): 13 | 14 | - 使用的 PyEdmine 版本(例如:`v1.0.0`) 15 | - 问题描述与复现步骤 16 | - 错误信息、日志或截图等 17 | 18 | ## 推荐论文 19 | 20 | 欢迎推荐教育数据挖掘相关论文加入本项目支持模型中。 21 | 请将推荐内容发布至 **[功能建议收集贴 | Feature Request Thread](https://github.com/ZhijieXiong/pyedmine/discussions/8)**。 22 | 23 | ## 贡献代码 24 | 25 | 你可以通过以下两种方式提交代码贡献: 26 | 27 | ### 直接提交 Pull Request 28 | 29 | 适用于较小修复或无潜在冲突的更新,例如: 30 | 31 | - 文档更新 32 | - 简单 bug 修复 33 | - 小功能增强,不影响现有功能 34 | 35 | 请确保 PR 包含: 36 | 37 | - 清晰的描述与修改目的 38 | - 适当的单元测试(如适用) 39 | - 格式整洁、符合项目编码规范 40 | 41 | ### 先讨论再贡献 42 | 43 | 若你的更改范围较大(例如新增模型、修改框架逻辑),建议你: 44 | 45 | 1. 通过 Issue 描述你的计划和设计思路 46 | 2. 与维护者讨论后达成共识 47 | 3. 再提交 PR,便于评审与协作 48 | 49 | ## 提供训练好的模型权重 50 | 51 | 如果你希望分享训练好的模型权重,请发邮件至:18800118477@163.com 52 | 53 | 附件中请包含: 54 | 55 | - 模型权重文件或下载链接 56 | - 对应的训练脚本或说明文档 57 | - 相关参数配置与依赖说明 58 | 59 | 我们将审核后考虑集成这些权重。 60 | 61 | 若你提交的是 PyEdmine 尚未实现的新模型,请先通过 Pull Request 提交实现代码,然后再发送邮件通知。 62 | 63 | ## 代码风格与测试 64 | 65 | 请遵循项目中的代码风格。最佳实践包括但不限于: 66 | 67 | - 保持一致的缩进与代码结构 68 | - 添加或更新文档注释与 README 内容 69 | 70 | ## 社区行为准则 71 | 72 | - 对他人友好与尊重 73 | - 清晰表达问题和想法 74 | - 欢迎提问与协助他人 75 | 76 | --- 77 | 78 | -------------------------------------------------------------------------------- /edmine/model/learning_path_recommendation_agent/RandomRecQCAgent.py: -------------------------------------------------------------------------------- 1 | from edmine.model.learning_path_recommendation_agent.RLBasedLPRAgent import RLBasedLPRAgent 2 | 3 | 4 | class RandomRecQCAgent(RLBasedLPRAgent): 5 | def __init__(self, params, objects): 6 | super().__init__(params, objects) 7 | 8 | def judge_done(self, memory, master_th=0.6): 9 | if memory.achieve_single_goal(master_th): 10 | return True 11 | evaluator_config = self.params["evaluator_config"] 12 | agent_name = evaluator_config["agent_name"] 13 | max_question_attempt = int(agent_name.split("-")[1]) 14 | num_question_his = 0 15 | for qs in memory.question_rec_history: 16 | num_question_his += len(qs) 17 | return num_question_his >= max_question_attempt 18 | 19 | def recommend_qc(self, memory, master_th=0.6, epsilon=0): 20 | num_concept = self.objects["dataset"]["q_table"].shape[1] 21 | evaluator_config = self.params["evaluator_config"] 22 | master_th = evaluator_config["master_threshold"] 23 | c2q = self.objects["dataset"]["c2q"] 24 | random_generator = self.objects["random_generator"] 25 | 26 | state = memory.state_history[-1] 27 | eligible_concepts = [c_id for c_id in range(num_concept) if float(state[c_id]) < master_th] 28 | c_id2rec = random_generator.choice(eligible_concepts) 29 | q_id2rec = int(random_generator.choice(c2q[c_id2rec])) 30 | return c_id2rec, q_id2rec 31 | -------------------------------------------------------------------------------- /edmine/dataset/CognitiveDiagnosisDataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from torch.utils.data import Dataset 5 | 6 | from edmine.utils.data_io import read_cd_file 7 | 8 | 9 | class BasicCognitiveDiagnosisDataset(Dataset): 10 | def __init__(self, dataset_config, objects): 11 | super(BasicCognitiveDiagnosisDataset, self).__init__() 12 | self.dataset_config = dataset_config 13 | self.objects = objects 14 | self.dataset = None 15 | self.load_dataset() 16 | 17 | def __len__(self): 18 | return len(self.dataset["user_id"]) 19 | 20 | def __getitem__(self, index): 21 | result = dict() 22 | for key in self.dataset.keys(): 23 | result[key] = self.dataset[key][index] 24 | return result 25 | 26 | def load_dataset(self): 27 | setting_name = self.dataset_config["setting_name"] 28 | file_name = self.dataset_config["file_name"] 29 | dataset_path = os.path.join(self.objects["file_manager"].get_setting_dir(setting_name), file_name) 30 | dataset_original = read_cd_file(dataset_path) 31 | all_keys = list(dataset_original[0].keys()) 32 | dataset_converted = {k: [] for k in all_keys} 33 | for interaction_data in dataset_original: 34 | for k in all_keys: 35 | dataset_converted[k].append(interaction_data[k]) 36 | 37 | for k in dataset_converted.keys(): 38 | dataset_converted[k] = torch.tensor(dataset_converted[k]).long().to(self.dataset_config["device"]) 39 | self.dataset = dataset_converted 40 | -------------------------------------------------------------------------------- /edmine/roster/DLKTRoster.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class DLKTRoster: 5 | def __init__(self, params, objects): 6 | self.params = params 7 | self.objects = objects 8 | 9 | def process_batch4sequential_kt_model(self, batch): 10 | if type(batch) is list: 11 | max_seq_len = max(list(map(lambda x: len(x["correctness_seq"]), batch))) 12 | batch_converted = {k: [] for k in batch[0].keys()} 13 | for item_data in batch: 14 | for seq_name, seq in item_data.items(): 15 | if type(seq) is list: 16 | seq += [0] * (max_seq_len - len(seq)) 17 | batch_converted[seq_name].append(seq) 18 | for k in batch_converted.keys(): 19 | if k not in ["weight_seq", "hint_factor_seq", "attempt_factor_seq", "time_factor_seq", "correct_float"]: 20 | batch_converted[k] = torch.tensor(batch_converted[k]).long().to(self.params["device"]) 21 | else: 22 | batch_converted[k] = torch.tensor(batch_converted[k]).float().to(self.params["device"]) 23 | return batch_converted 24 | else: 25 | return batch 26 | 27 | def get_knowledge_state(self, batch, last_state=True): 28 | model_name = self.params["roster_config"]["model_name"] 29 | model = self.objects["models"][model_name] 30 | model.eval() 31 | if model.model_type == "DLSequentialKTModel": 32 | batch = self.process_batch4sequential_kt_model(batch) 33 | else: 34 | pass 35 | with torch.no_grad(): 36 | return model.get_knowledge_state(batch, last_state) 37 | -------------------------------------------------------------------------------- /edmine/model/module/Graph.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import scipy.sparse as sp 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class RCDGraphLayer(nn.Module): 9 | def __init__(self, g, in_dim, out_dim): 10 | super(RCDGraphLayer, self).__init__() 11 | self.g = g 12 | self.fc = nn.Linear(in_dim, out_dim, bias=False) 13 | self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False) 14 | 15 | def edge_attention(self, edges): 16 | z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1) 17 | a = self.attn_fc(z2) 18 | return {'e': a} 19 | 20 | def message_func(self, edges): 21 | return {'z': edges.src['z'], 'e': edges.data['e']} 22 | 23 | def reduce_func(self, nodes): 24 | alpha = F.softmax(nodes.mailbox['e'], dim=1) 25 | h = torch.sum(alpha * nodes.mailbox['z'], dim=1) 26 | return {'h': h} 27 | 28 | def forward(self, h): 29 | z = self.fc(h) 30 | self.g.ndata['z'] = z 31 | self.g.apply_edges(self.edge_attention) 32 | self.g.update_all(self.message_func, self.reduce_func) 33 | return self.g.ndata.pop('h') 34 | 35 | 36 | class HyperCDgraph: 37 | def __init__(self, H: np.ndarray): 38 | self.H = H 39 | # avoid zero 40 | self.Dv = np.count_nonzero(H, axis=1) + 1 41 | self.De = np.count_nonzero(H, axis=0) + 1 42 | 43 | def to_tensor_nadj(self): 44 | coo = sp.coo_matrix(self.H @ np.diag(1 / self.De) @ self.H.T @ np.diag(1 / self.Dv)) 45 | indices = torch.from_numpy(np.asarray([coo.row, coo.col])) 46 | return torch.sparse_coo_tensor(indices, coo.data, coo.shape, dtype=torch.float64).coalesce() -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/set_params/lpr_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_common_args(): 7 | parser = argparse.ArgumentParser(description="learning path recommendation模型的公共配置", add_help=False) 8 | parser.add_argument("--setting_name", type=str, default="LPR_offline_setting") 9 | parser.add_argument("--dataset_name", type=str, default="assist2009") 10 | parser.add_argument("--train_file_name", type=str, default="assist2009_single_goal_train.txt") 11 | parser.add_argument("--valid_file_name", type=str, default="assist2009_single_goal_valid.txt") 12 | parser.add_argument("--kt_setting_name", type=str, default="pykt_setting") 13 | # 模拟器配置 14 | parser.add_argument("--model_dir_name", type=str, 15 | default=r"qDKT@@pykt_setting@@assist2009_train@@seed_0@@2025-07-18@20-22-14") 16 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 17 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 18 | # 掌握阈值 19 | parser.add_argument("--master_threshold", type=float, default=0.6) 20 | # 评价指标选择 21 | parser.add_argument("--target_steps", type=str, default="[5,10,20]") 22 | parser.add_argument("--main_metric", type=str, default="NRPR") 23 | parser.add_argument("--use_multi_metrics", type=str2bool, default=False) 24 | parser.add_argument("--multi_metrics", type=str, default="[('NRPR', 1, 1), ('APR', 1, 1), ('RPR', 1, 1)]") 25 | # 其它配置 26 | parser.add_argument("--debug_mode", type=str2bool, default=False) 27 | parser.add_argument("--use_cpu", type=str2bool, default=False) 28 | parser.add_argument("--seed", type=int, default=0) 29 | return parser 30 | -------------------------------------------------------------------------------- /examples/roster/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.data.FileManager import FileManager 6 | from edmine.config.data import config_q_table 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.model.load_model import load_dl_model 9 | 10 | 11 | current_file_name = inspect.getfile(inspect.currentframe()) 12 | current_dir = os.path.dirname(current_file_name) 13 | settings_path = os.path.join(current_dir, "../settings.json") 14 | with open(settings_path, "r") as f: 15 | settings = json.load(f) 16 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 17 | MODEL_DIR = settings["MODELS_DIR"] 18 | 19 | 20 | def config_roster(local_params): 21 | global_params = {} 22 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 23 | config_general_dl_model(local_params, global_params) 24 | if local_params.get("dataset_name", False): 25 | config_q_table(local_params, global_params, global_objects) 26 | model_dir_name = local_params["model_dir_name"] 27 | model_name, _, _ = get_model_info(local_params["model_dir_name"]) 28 | model_dir = os.path.join(MODEL_DIR, model_dir_name) 29 | model = load_dl_model(global_params, global_objects, 30 | model_dir, local_params["model_file_name"], local_params["model_name_in_ckt"]) 31 | model.eval() 32 | global_params["roster_config"] = {"model_name": model_name} 33 | global_objects["models"] = {model_name: model} 34 | 35 | return global_params, global_objects 36 | 37 | 38 | def get_model_info(model_dir_name): 39 | model_info = model_dir_name.split("@@") 40 | model_name, setting_name, train_file_name = model_info[0], model_info[1], model_info[2] 41 | return model_name, setting_name, train_file_name 42 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/prepare_dataset/ncd_setting.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import config 4 | 5 | from edmine.data.FileManager import FileManager 6 | from edmine.dataset.split_dataset import n_fold_split 7 | from edmine.utils.data_io import read_kt_file, write_cd_file 8 | from edmine.utils.parse import kt_data2cd_data 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--dataset_name", type=str, default="assist2009") 14 | args = parser.parse_args() 15 | params = vars(args) 16 | 17 | setting = { 18 | "name": "ncd_setting", 19 | "n_fold": 5, 20 | "test_radio": 0.2, 21 | } 22 | 23 | file_manager = FileManager(config.FILE_MANAGER_ROOT) 24 | file_manager.add_new_setting(setting["name"], setting) 25 | kt_data_ = read_kt_file(file_manager.get_preprocessed_path(params["dataset_name"])) 26 | if "SLP" in params["dataset_name"]: 27 | kt_data = [] 28 | for user_data in kt_data_: 29 | term_data = { 30 | k: v if type(v) is not list else [] for k, v in user_data.items() 31 | } 32 | mode_seq = user_data["mode_seq"] 33 | for i, m in enumerate(mode_seq): 34 | if m == 0: 35 | continue 36 | for k, v in user_data.items(): 37 | if type(v) is list: 38 | term_data[k].append(user_data[k][i]) 39 | term_data["seq_len"] = len(term_data["correctness_seq"]) 40 | if term_data["seq_len"] > 1: 41 | kt_data.append(term_data) 42 | else: 43 | kt_data = kt_data_ 44 | cd_data = kt_data2cd_data(kt_data) 45 | n_fold_split(params["dataset_name"], cd_data, setting, file_manager, write_cd_file, "cd") 46 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/prepare_dataset/pykt_setting.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | 4 | import config 5 | 6 | from edmine.data.FileManager import FileManager 7 | from edmine.dataset.split_seq import truncate2multi_seq 8 | from edmine.dataset.split_dataset import n_fold_split 9 | from edmine.utils.data_io import write_kt_file, read_kt_file 10 | from edmine.utils.parse import get_kt_data_statics 11 | 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--dataset_name", type=str, default="assist2009") 16 | args = parser.parse_args() 17 | params = vars(args) 18 | 19 | setting = { 20 | "name": "pykt_setting", 21 | "max_seq_len": 200, 22 | "min_seq_len": 2, 23 | "n_fold": 5, 24 | "test_radio": 0.2, 25 | } 26 | 27 | file_manager = FileManager(config.FILE_MANAGER_ROOT) 28 | file_manager.add_new_setting(setting["name"], setting) 29 | data = read_kt_file(file_manager.get_preprocessed_path(params["dataset_name"])) 30 | if params["dataset_name"] in ["junyi2015", "edi2020-task1"]: 31 | # 只取长度最长的5000条序列 32 | seq_lens = list(map(lambda x: x["seq_len"], data)) 33 | max_indices = np.argpartition(np.array(seq_lens), -5000)[-5000:] 34 | data_ = [] 35 | for i in max_indices: 36 | data_.append(data[i]) 37 | data = data_ 38 | q_table = file_manager.get_q_table(params["dataset_name"]) 39 | data_statics = get_kt_data_statics(data, q_table) 40 | print(f"data statics: {data_statics}") 41 | 42 | dataset_truncated = truncate2multi_seq(data, 43 | setting["min_seq_len"], 44 | setting["max_seq_len"],) 45 | n_fold_split(params["dataset_name"], dataset_truncated, setting, file_manager, write_kt_file, "kt") 46 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/rcd_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.ncd import config_rcd 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.RCD import RCD 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=1024) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.00001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | 24 | # 设置参数空间 25 | parameters_space = { 26 | "weight_decay": [0.00001, 0], 27 | } 28 | space = { 29 | param_name: hp.choice(param_name, param_space) 30 | for param_name, param_space in parameters_space.items() 31 | } 32 | num = 1 33 | for parameter_space in parameters_space.values(): 34 | num *= len(parameter_space) 35 | if num > 100: 36 | max_evals = 20 + int(num * 0.2) 37 | elif num > 50: 38 | max_evals = 15 + int(num * 0.2) 39 | elif num > 20: 40 | max_evals = 10 + int(num * 0.2) 41 | elif num > 10: 42 | max_evals = 5 + int(num * 0.2) 43 | else: 44 | max_evals = num 45 | current_best_performance = 0 46 | fmin(fmin(get_objective_func(parser, config_rcd, "RCD", RCD), space, algo=tpe.suggest, max_evals=max_evals), space, algo=tpe.suggest, max_evals=max_evals) 47 | 48 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/evaluate/first_trans_dlkt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torch.utils.data import DataLoader 3 | 4 | from config import config_sequential_dlkt 5 | from utils import get_model_info, select_dataset 6 | 7 | from edmine.utils.parse import str2bool 8 | from edmine.evaluator.SequentialDLKTEvaluator4FTAcc import SequentialDLKTEvaluator4FTAcc 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | # 加载模型参数配置 14 | parser.add_argument("--model_dir_name", type=str, help="", 15 | default="DKT@@pykt_setting@@assist2009_train_fold_0@@seed_0@@2025-03-06@02-12-29") 16 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 17 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 18 | # 测试配置 19 | parser.add_argument("--dataset_name", type=str, default="assist2009") 20 | parser.add_argument("--test_file_name", type=str, help="文件名", default="assist2009_test.txt") 21 | parser.add_argument("--seq_start", type=int, default=2, help="序列中seq_start(自然序列,从1开始)之前的元素不参与评估") 22 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 23 | # 保存测试结果 24 | parser.add_argument("--save_log", type=str2bool, default=False) 25 | 26 | args = parser.parse_args() 27 | params = vars(args) 28 | 29 | global_params, global_objects = config_sequential_dlkt(params) 30 | model_name, setting_name, _ = get_model_info(params["model_dir_name"]) 31 | 32 | dataset_test = select_dataset(model_name)({ 33 | "setting_name": setting_name, 34 | "file_name": params["test_file_name"], 35 | "device": global_params["device"] 36 | }, global_objects) 37 | dataloader_test = DataLoader(dataset_test, batch_size=params["evaluate_batch_size"], shuffle=False) 38 | global_objects["data_loaders"] = {"test_loader": dataloader_test} 39 | evaluator = SequentialDLKTEvaluator4FTAcc(global_params, global_objects) 40 | evaluator.evaluate() 41 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/prepare_dataset/sub_from_test_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import config 5 | 6 | from edmine.data.FileManager import FileManager 7 | from edmine.utils.data_io import write_kt_file, read_kt_file 8 | from edmine.utils.parse import q2c_from_q_table 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--setting_name", type=str, default="pykt_setting") 14 | parser.add_argument("--dataset_name", type=str, default="xes3g5m") 15 | parser.add_argument("--test_file_name", type=str, default="xes3g5m_test.txt") 16 | parser.add_argument("--num_data", type=int, default=100) 17 | args = parser.parse_args() 18 | params = vars(args) 19 | 20 | file_manager = FileManager(config.FILE_MANAGER_ROOT) 21 | setting_dir = file_manager.get_setting_dir(params["setting_name"]) 22 | q_table = file_manager.get_q_table(params["dataset_name"]) 23 | q2c = q2c_from_q_table(q_table) 24 | test_data = read_kt_file(os.path.join(setting_dir, params["test_file_name"])) 25 | 26 | candidate = [] 27 | for user_data in test_data: 28 | seq_len = user_data["seq_len"] 29 | if seq_len < 200: 30 | continue 31 | num_correct = sum(user_data["correctness_seq"]) 32 | if (num_correct < 50) or (num_correct > 150): 33 | continue 34 | concept_exercised = set() 35 | for q_id in user_data["question_seq"]: 36 | c_ids = q2c[q_id] 37 | concept_exercised.update(c_ids) 38 | user_data["num_c_exercised"] = len(concept_exercised) 39 | candidate.append(user_data) 40 | 41 | candidate_sorted = sorted(candidate, key=lambda x: x["num_c_exercised"], reverse=True) 42 | num_data = min(params["num_data"], len(candidate_sorted)) 43 | final_data = candidate_sorted[:num_data] 44 | save_path = os.path.join(setting_dir, f"{params['dataset_name']}-subtest-{num_data}.txt") 45 | write_kt_file(final_data, save_path) 46 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/prepare_dataset/online_setting.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import config 5 | 6 | from edmine.data.FileManager import FileManager 7 | from edmine.utils.data_io import write_kt_file 8 | 9 | 10 | def extract_shortest_paths(input_file): 11 | shortest_paths = {} 12 | 13 | # 读取文件 14 | with open(input_file, "r") as f: 15 | for line in f: 16 | line = line.strip() 17 | if not line: 18 | continue 19 | path = list(map(int, line.split(","))) 20 | key = (path[0], path[-1]) 21 | if key not in shortest_paths or len(path) < len(shortest_paths[key]): 22 | shortest_paths[key] = path 23 | 24 | # 写入文件 25 | return shortest_paths 26 | # with open(output_file, "w") as f: 27 | # for path in shortest_paths.values(): 28 | # line = delimiter.join(map(str, path)) 29 | # f.write(line + "\n") 30 | 31 | 32 | if __name__ == "__main__": 33 | # 选择所有最短路径作为测试集 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument("--dataset_name", type=str, default="junyi2015") 36 | args = parser.parse_args() 37 | params = vars(args) 38 | 39 | lpr_setting = { 40 | "name": "LPR_online_setting", 41 | } 42 | 43 | file_manager = FileManager(config.FILE_MANAGER_ROOT) 44 | file_manager.add_new_setting(lpr_setting["name"], lpr_setting) 45 | preprocessed_dir = file_manager.get_preprocessed_dir(params["dataset_name"]) 46 | setting_dir = file_manager.get_setting_dir(lpr_setting["name"]) 47 | 48 | target_paths = extract_shortest_paths(os.path.join(preprocessed_dir, "pre_path.txt")) 49 | data = [] 50 | for start_end, concept_path in target_paths.items(): 51 | data.append({ 52 | "start_concept_id": start_end[0], 53 | "end_concept_id": start_end[1], 54 | "shortest_path": concept_path, 55 | }) 56 | write_kt_file(data, os.path.join(setting_dir, f"{params['dataset_name']}_single_goal.txt")) -------------------------------------------------------------------------------- /examples/exercise_recommendation/train/set_params/exercise_recommendation_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from edmine.utils.parse import str2bool 4 | 5 | 6 | def setup_common_args(): 7 | parser = argparse.ArgumentParser(description="习题推荐模型的公共配置", add_help=False) 8 | # 数据集相关 9 | parser.add_argument("--setting_name", type=str, default="ER_offline_setting") 10 | parser.add_argument("--dataset_name", type=str, default="assist2009") 11 | parser.add_argument("--user_data_file_name", type=str, default="assist2009_user_data.txt") 12 | parser.add_argument("--valid_mlkc_file_name", type=str, default="assist2009_dkt_mlkc_valid.txt") 13 | parser.add_argument("--train_file_name", type=str, default="assist2009_train_triples_dkt_0.2.txt") 14 | parser.add_argument("--valid_file_name", type=str, default="assist2009_valid_triples_dkt_0.2.txt") 15 | # 评价指标选择 16 | parser.add_argument("--top_ns", type=str, default="[5,10,20]") 17 | parser.add_argument("--main_metric", type=str, default="OFFLINE_NDCG") 18 | parser.add_argument("--use_multi_metrics", type=str2bool, default=False) 19 | parser.add_argument("--multi_metrics", type=str, default="[('KG4EX_ACC', 1, 1), ('KG4EX_NOV', 1, 1), ('OFFLINE_ACC', 1, 1)]") 20 | # 优化器相关参数选择 21 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 22 | parser.add_argument("--weight_decay", type=float, default=0) 23 | parser.add_argument("--momentum", type=float, default=0.9) 24 | # 训练策略 25 | parser.add_argument("--max_step", type=int, default=50000) 26 | parser.add_argument("--use_early_stop", type=str2bool, default=True) 27 | parser.add_argument("--num_early_stop", type=int, default=5, help="num_early_stop * num_step2evaluate") 28 | parser.add_argument("--num_step2evaluate", type=int, default=1000) 29 | # 其它配置 30 | parser.add_argument("--debug_mode", type=str2bool, default=False) 31 | parser.add_argument("--use_cpu", type=str2bool, default=False) 32 | parser.add_argument("--seed", type=int, default=0) 33 | return parser 34 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/mirt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.mirt import config_mirt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.MIRT import MIRT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--a_range", type=float, default=1) 25 | 26 | # 设置参数空间 27 | parameters_space = { 28 | "train_batch_size": [512, 1024, 2048], 29 | "learning_rate": [0.0001, 0.001], 30 | "weight_decay": [0.0001, 0.00001, 0], 31 | } 32 | space = { 33 | param_name: hp.choice(param_name, param_space) 34 | for param_name, param_space in parameters_space.items() 35 | } 36 | num = 1 37 | for parameter_space in parameters_space.values(): 38 | num *= len(parameter_space) 39 | if num > 100: 40 | max_evals = 20 + int(num * 0.2) 41 | elif num > 50: 42 | max_evals = 15 + int(num * 0.2) 43 | elif num > 20: 44 | max_evals = 10 + int(num * 0.2) 45 | elif num > 10: 46 | max_evals = 5 + int(num * 0.2) 47 | else: 48 | max_evals = num 49 | current_best_performance = 0 50 | fmin(get_objective_func(parser, config_mirt, "MIRT", MIRT), space, algo=tpe.suggest, max_evals=max_evals) 51 | 52 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/hier_cdf_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.hier_cdf import config_hier_cdf 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.HierCDF import HierCDF 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_hidden", type=int, default=16) 25 | parser.add_argument("--w_penalty_loss", type=float, default=0.001) 26 | 27 | # 设置参数空间 28 | parameters_space = { 29 | "dim_hidden": [8, 16], 30 | "w_penalty_loss": [0.01, 0.001, 0.0001] 31 | } 32 | space = { 33 | param_name: hp.choice(param_name, param_space) 34 | for param_name, param_space in parameters_space.items() 35 | } 36 | num = 1 37 | for parameter_space in parameters_space.values(): 38 | num *= len(parameter_space) 39 | if num > 100: 40 | max_evals = 20 + int(num * 0.2) 41 | elif num > 50: 42 | max_evals = 15 + int(num * 0.2) 43 | elif num > 20: 44 | max_evals = 10 + int(num * 0.2) 45 | elif num > 10: 46 | max_evals = 5 + int(num * 0.2) 47 | else: 48 | max_evals = num 49 | current_best_performance = 0 50 | fmin(get_objective_func(parser, config_hier_cdf, "HierCDF", HierCDF), space, algo=tpe.suggest, max_evals=max_evals) 51 | 52 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/utils.py: -------------------------------------------------------------------------------- 1 | from edmine.utils.use_torch import set_seed 2 | from edmine.dataset.SequentialKTDataset import * 3 | from edmine.trainer.LPROfflineDRLTrainer import LPROfflineDRLTrainer 4 | from edmine.trainer.LPROnlineDRLTrainer import LPROnlineDRLTrainer 5 | 6 | current_best_performance = -100 7 | 8 | def get_objective_func(parser, config_func, agent_name, agent_class): 9 | def objective(parameters): 10 | global current_best_performance 11 | args = parser.parse_args() 12 | params = vars(args) 13 | set_seed(params["seed"]) 14 | 15 | # 替换参数 16 | params["search_params"] = True 17 | params["save_model"] = False 18 | params["debug_mode"] = False 19 | params["use_cpu"] = False 20 | global_params, global_objects = config_func(params) 21 | 22 | setting_dir = global_objects["file_manager"].get_setting_dir(params["setting_name"]) 23 | global_objects["data"] = { 24 | "train": read_kt_file(os.path.join(setting_dir, params["train_file_name"])), 25 | "valid": read_kt_file(os.path.join(setting_dir, params["valid_file_name"])) 26 | } 27 | global_objects["agents"] = { 28 | agent_name: agent_class(global_params, global_objects) 29 | } 30 | 31 | if agent_name in ["D3QN"]: 32 | trainer = LPROfflineDRLTrainer(global_params, global_objects) 33 | else: 34 | trainer = LPROnlineDRLTrainer(global_params, global_objects) 35 | trainer.train() 36 | if agent_name in ["D3QN"]: 37 | performance_this = trainer.train_record.get_evaluate_result()["main_metric"] 38 | else: 39 | performance_this = trainer.best_valid_main_metric 40 | 41 | if (performance_this - current_best_performance) >= 0.001: 42 | current_best_performance = performance_this 43 | print(f"current best params (performance is {performance_this}):\n " + 44 | ", ".join(list(map(lambda s: f"{s}: {parameters[s]}", parameters.keys())))) 45 | return -performance_this 46 | return objective 47 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/mc2sc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | import inspect 5 | import numpy as np 6 | 7 | from edmine.data.FileManager import FileManager 8 | from edmine.utils.parse import q2c_from_q_table 9 | 10 | 11 | current_file_name = inspect.getfile(inspect.currentframe()) 12 | current_dir = os.path.dirname(current_file_name) 13 | settings_path = os.path.join(current_dir, "../settings.json") 14 | with open(settings_path, "r") as f: 15 | settings = json.load(f) 16 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 17 | MODEL_DIR = settings["MODELS_DIR"] 18 | 19 | 20 | def transform_T(T): 21 | Q, C = T.shape 22 | q2c = q2c_from_q_table(T) 23 | 24 | c_id_new = {} 25 | qc_map = {} 26 | for q_id in range(Q): 27 | c_ids = q2c[q_id] 28 | if len(c_ids) > 1: 29 | c_ids_str = "-".join(list(map(str, c_ids))) 30 | if c_ids_str not in c_id_new: 31 | c_id_new[c_ids_str] = C + len(c_id_new) 32 | qc_map[q_id] = c_id_new[c_ids_str] 33 | else: 34 | qc_map[q_id] = c_ids[0] 35 | 36 | T_new = np.zeros((Q, C + len(c_id_new)), dtype=int) 37 | for q_id in range(Q): 38 | T_new[q_id][qc_map[q_id]] = 1 39 | 40 | return T_new[:, ~np.all(T_new == 0, axis=0)] 41 | 42 | 43 | if __name__ == "__main__": 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument("--dataset_name", type=str, default="assist2009") 46 | args = parser.parse_args() 47 | params = vars(args) 48 | 49 | file_manager = FileManager(FILE_MANAGER_ROOT) 50 | q_table = file_manager.get_q_table(params["dataset_name"]) 51 | if q_table.sum() > q_table.shape[0]: 52 | q_table_ = transform_T(q_table) 53 | new_dir_name = params["dataset_name"] + "-single-concept" 54 | root_dir = file_manager.get_root_dir() 55 | new_dir = os.path.join(root_dir, "dataset", "dataset_preprocessed", new_dir_name) 56 | if not os.path.exists(new_dir): 57 | os.mkdir(new_dir) 58 | q_table_path = os.path.join(new_dir, "Q_table.npy") 59 | np.save(q_table_path, q_table_) 60 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/reinforce_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.reinforce import config_reinforce 6 | from utils import get_objective_func 7 | 8 | from edmine.model.learning_path_recommendation_agent.Reinforce import Reinforce 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_step_trainer_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 13 | add_help=False) 14 | # 优化器 15 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 16 | parser.add_argument("--learning_rate", type=float, default=0.0001) 17 | parser.add_argument("--weight_decay", type=float, default=0) 18 | parser.add_argument("--momentum", type=float, default=0.9) 19 | # 折扣因子 20 | parser.add_argument("--gamma", type=float, default=0.9, help="discount factor") 21 | # 模型参数 22 | parser.add_argument("--max_question_attempt", type=int, default=20) 23 | parser.add_argument("--num_layer_action_model", type=int, default=2) 24 | parser.add_argument("--num_layer_state_model", type=int, default=2) 25 | 26 | # 设置参数空间 27 | parameters_space = { 28 | "learning_rate": [0.00001, 0.0001, 0.001], 29 | "gamma": [0.9, 0.95, 0.99], 30 | } 31 | space = { 32 | param_name: hp.choice(param_name, param_space) 33 | for param_name, param_space in parameters_space.items() 34 | } 35 | num = 1 36 | for parameter_space in parameters_space.values(): 37 | num *= len(parameter_space) 38 | if num > 100: 39 | max_evals = 20 + int(num * 0.2) 40 | elif num > 50: 41 | max_evals = 15 + int(num * 0.2) 42 | elif num > 20: 43 | max_evals = 10 + int(num * 0.2) 44 | elif num > 10: 45 | max_evals = 5 + int(num * 0.2) 46 | else: 47 | max_evals = num 48 | current_best_performance = 0 49 | fmin(get_objective_func(parser, config_reinforce, "Reinforce", Reinforce), space, algo=tpe.suggest, max_evals=max_evals) 50 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/irt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.irt import config_irt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.IRT import IRT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--value_range", type=float, default=1) 25 | parser.add_argument("--a_range", type=float, default=1) 26 | parser.add_argument("--D", type=float, default=1.702) 27 | 28 | # 设置参数空间 29 | parameters_space = { 30 | "train_batch_size": [512, 1024, 2048], 31 | "learning_rate": [0.0001, 0.001], 32 | "weight_decay": [0.0001, 0.00001, 0], 33 | } 34 | space = { 35 | param_name: hp.choice(param_name, param_space) 36 | for param_name, param_space in parameters_space.items() 37 | } 38 | num = 1 39 | for parameter_space in parameters_space.values(): 40 | num *= len(parameter_space) 41 | if num > 100: 42 | max_evals = 20 + int(num * 0.2) 43 | elif num > 50: 44 | max_evals = 15 + int(num * 0.2) 45 | elif num > 20: 46 | max_evals = 10 + int(num * 0.2) 47 | elif num > 10: 48 | max_evals = 5 + int(num * 0.2) 49 | else: 50 | max_evals = num 51 | current_best_performance = 0 52 | fmin(get_objective_func(parser, config_irt, "IRT", IRT), space, algo=tpe.suggest, max_evals=max_evals) 53 | 54 | -------------------------------------------------------------------------------- /edmine/utils/calculate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def tf_idf_from_q_table(q_table): 5 | N = q_table.shape[0] 6 | tf = q_table 7 | idf = np.log(N / q_table.sum(axis=0)) 8 | return tf * np.expand_dims(idf, axis=0) 9 | 10 | 11 | def cosine_similarity_matrix(arr, axis=0): 12 | """ 13 | 计算行向量或列向量两两之间的余弦相似度矩阵。 14 | 15 | 参数: 16 | axis: axis=0 表示按列计算,axis=1 表示按行计算。 17 | 18 | 返回: 19 | 余弦相似度矩阵。 20 | """ 21 | if axis == 1: 22 | arr = arr.T # 转置,将行向量转换为列向量 23 | 24 | # 归一化列向量(除以 L2 范数) 25 | norm = np.linalg.norm(arr, axis=0, keepdims=True) # 计算每列的 L2 范数 26 | arr_normalized = arr / (norm + 1e-8) # 归一化 27 | 28 | # 计算余弦相似度矩阵 29 | cosine_sim = arr_normalized.T @ arr_normalized # 矩阵乘法计算点积 30 | 31 | return cosine_sim 32 | 33 | 34 | def cosine_similarity(A, B): 35 | """ 36 | 计算矩阵 A 和矩阵 B 的行向量之间的余弦相似度。 37 | 38 | 参数: 39 | A: 形状为 (m, d) 的矩阵,表示 m 个 d 维向量。 40 | B: 形状为 (n, d) 的矩阵,表示 n 个 d 维向量。 41 | 42 | 返回: 43 | 形状为 (m, n) 的余弦相似度矩阵。 44 | """ 45 | # 归一化 A 和 B 的行向量(除以 L2 范数) 46 | A_norm = np.linalg.norm(A, axis=1, keepdims=True) # 计算 A 的每行 L2 范数 47 | B_norm = np.linalg.norm(B, axis=1, keepdims=True) # 计算 B 的每行 L2 范数 48 | 49 | A_normalized = A / (A_norm + 1e-8) # 归一化 A 50 | B_normalized = B / (B_norm + 1e-8) # 归一化 B 51 | 52 | # 计算余弦相似度矩阵 53 | cosine_sim = A_normalized @ B_normalized.T # 矩阵乘法计算点积 54 | 55 | return cosine_sim 56 | 57 | 58 | def pearson_similarity(scores_i, scores_j): 59 | # 提取共同评分的索引 60 | common_ids = np.where((scores_i >= 0) & (scores_j >= 0))[0] 61 | if len(common_ids) == 0: 62 | return 0.0 # 无共同评分用户 63 | 64 | # 提取共同评分 65 | scores_i = scores_i[common_ids] 66 | scores_j = scores_j[common_ids] 67 | 68 | # 计算均值和差值 69 | mean_i = np.mean(scores_i) 70 | mean_j = np.mean(scores_j) 71 | diff_i = scores_i - mean_i 72 | diff_j = scores_j - mean_j 73 | 74 | # 计算分子和分母 75 | numerator = np.sum(diff_i * diff_j) 76 | denominator = np.sqrt(np.sum(diff_i ** 2)) * np.sqrt(np.sum(diff_j ** 2)) 77 | 78 | if denominator == 0: 79 | return 0.0 # 避免除以0 80 | 81 | return numerator / denominator 82 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/abqr/get_graph.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import numpy as np 5 | from scipy.sparse import lil_matrix, diags 6 | 7 | from config import FILE_MANAGER_ROOT 8 | from edmine.data.FileManager import FileManager 9 | from edmine.utils.parse import c2q_from_q_table 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--setting_name", type=str, default="pykt_setting") 14 | parser.add_argument("--dataset_name", type=str, default="assist2009-single-concept") 15 | args = parser.parse_args() 16 | params = vars(args) 17 | 18 | file_manager = FileManager(FILE_MANAGER_ROOT) 19 | setting_dir = file_manager.get_setting_dir(params["setting_name"]) 20 | abqr_dir = os.path.join(setting_dir, "ABQR") 21 | if not os.path.exists(abqr_dir): 22 | os.mkdir(abqr_dir) 23 | dataset_name = params["dataset_name"] 24 | save_path = os.path.join(abqr_dir, f"abqr_graph_{dataset_name}.pt") 25 | 26 | # 加载 q_table 和 c2q 27 | q_table = file_manager.get_q_table(dataset_name) 28 | c2q = c2q_from_q_table(q_table) 29 | num_question = q_table.shape[0] 30 | 31 | # 使用稀疏矩阵 LIL 格式(适合逐步填充) 32 | A = lil_matrix((num_question, num_question), dtype=np.float32) # 节省内存 33 | 34 | # 填充邻接矩阵(仅存储非零元素) 35 | for q_ids in c2q.values(): 36 | for i, q_i in enumerate(q_ids): 37 | for q_j in q_ids[i:]: 38 | A[q_i, q_j] = 1 39 | A[q_j, q_i] = 1 40 | 41 | # 添加自环(对角线元素) 42 | A.setdiag(1) # 直接操作稀疏矩阵的对角线 43 | 44 | # 计算度矩阵 D 和对称归一化 45 | degrees = np.array(A.sum(axis=1)).flatten() # 度数为稠密数组 46 | D_inv_sqrt = diags(1 / np.sqrt(degrees), offsets=0, format="csr") # CSR 格式高效计算 47 | 48 | # 稀疏矩阵乘法(避免中间稠密结果) 49 | A_normalized = D_inv_sqrt @ A @ D_inv_sqrt 50 | 51 | # 转换为 PyTorch 稀疏张量 52 | A_normalized = A_normalized.tocoo() # 转为 COO 格式 53 | indices = torch.stack([torch.tensor(A_normalized.row), torch.tensor(A_normalized.col)]) 54 | values = torch.tensor(A_normalized.data) 55 | graph = torch.sparse_coo_tensor(indices, values, A_normalized.shape) 56 | 57 | torch.save(graph, save_path) -------------------------------------------------------------------------------- /examples/exercise_recommendation/evaluate/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import inspect 3 | 4 | from utils import * 5 | 6 | from edmine.data.FileManager import FileManager 7 | from edmine.config.data import config_q_table 8 | from edmine.config.basic import config_logger 9 | from edmine.config.model import config_general_dl_model 10 | from edmine.model.load_model import load_dl_model 11 | from edmine.utils.log import get_now_time 12 | 13 | 14 | current_file_name = inspect.getfile(inspect.currentframe()) 15 | current_dir = os.path.dirname(current_file_name) 16 | settings_path = os.path.join(current_dir, "../../settings.json") 17 | with open(settings_path, "r") as f: 18 | settings = json.load(f) 19 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 20 | MODEL_DIR = settings["MODELS_DIR"] 21 | 22 | 23 | def config_dler(local_params): 24 | model_name, setting_name, train_file_name = get_model_info(local_params["model_dir_name"]) 25 | global_params = {} 26 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 27 | if local_params.get("save_log", False): 28 | log_path = os.path.join(MODEL_DIR, local_params["model_dir_name"], 29 | f"evaluate_log@{get_now_time().replace(' ', '@').replace(':', '-')}.txt") 30 | else: 31 | log_path = None 32 | config_logger(local_params, global_objects, log_path) 33 | config_general_dl_model(local_params, global_params) 34 | global_params["dler"] = { 35 | "kg4ex": { 36 | "batch_size": local_params.get("evaluate_batch_size", 1) 37 | }, 38 | "top_ns": eval(local_params["top_ns"]) 39 | } 40 | config_q_table(local_params, global_params, global_objects) 41 | if model_name == "KG4EX": 42 | config_kg4ex(local_params, global_objects, setting_name) 43 | model_dir = os.path.join(MODEL_DIR, local_params["model_dir_name"]) 44 | model = load_dl_model(global_params, global_objects, 45 | model_dir, local_params["model_file_name"], local_params["model_name_in_ckt"]) 46 | global_params["evaluator_config"] = {"model_name": model_name} 47 | global_objects["models"] = {model_name: model} 48 | 49 | return global_params, global_objects 50 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/evaluate/dlcd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torch.utils.data import DataLoader 3 | 4 | from config import config_dlcd 5 | from utils import get_model_info 6 | 7 | from edmine.utils.parse import str2bool 8 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 9 | from edmine.evaluator.DLCDEvaluator import DLCDEvaluator 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser() 14 | # 加载模型参数配置 15 | parser.add_argument("--model_dir_name", type=str, help="", 16 | default="NCD@@ncd_setting@@assist2009_train_fold_0@@seed_0@@2025-03-11@01-46-58") 17 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 18 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 19 | # 测试配置 20 | parser.add_argument("--dataset_name", type=str, default="assist2009") 21 | parser.add_argument("--test_file_name", type=str, help="文件名", default="assist2009_test.txt") 22 | parser.add_argument("--evaluate_batch_size", type=int, default=2048) 23 | # 冷启动问题 24 | parser.add_argument("--evaluate_overall", type=str2bool, default=True) 25 | parser.add_argument("--user_cold_start", type=int, default=-1, 26 | help="大于等于0则开启冷启动评估,即评估在训练数据集中练习记录数量小于等于k个的用户预测结果") 27 | parser.add_argument("--question_cold_start", type=int, default=-1, 28 | help="大于等于0则开启冷启动评估,即评估在训练数据集中出现次数小于等于k个的习题的预测结果") 29 | # 保存测试结果 30 | parser.add_argument("--save_log", type=str2bool, default=True) 31 | 32 | args = parser.parse_args() 33 | params = vars(args) 34 | 35 | global_params, global_objects = config_dlcd(params) 36 | 37 | dataset_test = BasicCognitiveDiagnosisDataset({ 38 | "setting_name": get_model_info(params["model_dir_name"])[1], 39 | "file_name": params["test_file_name"], 40 | "device": global_params["device"] 41 | }, global_objects) 42 | dataloader_test = DataLoader(dataset_test, batch_size=params["evaluate_batch_size"], shuffle=False) 43 | global_objects["data_loaders"] = {"test_loader": dataloader_test} 44 | evaluator = DLCDEvaluator(global_params, global_objects) 45 | evaluator.evaluate() 46 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/hawkes_kt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.hawkes_kt import config_hawkes_kt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.HawkesKT import HawkesKT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_emb", type=int, default=64) 25 | parser.add_argument("--time_log", type=int, default=5) 26 | # 是否自动裁剪batch序列 27 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 28 | 29 | # 设置参数空间 30 | parameters_space = { 31 | "weight_decay": [0.0001, 0.00001, 0.000001, 0], 32 | "dim_emb": [64, 128, 256], 33 | "time_log": [2, 5, 10] 34 | } 35 | space = { 36 | param_name: hp.choice(param_name, param_space) 37 | for param_name, param_space in parameters_space.items() 38 | } 39 | num = 1 40 | for parameter_space in parameters_space.values(): 41 | num *= len(parameter_space) 42 | if num > 100: 43 | max_evals = 20 + int(num * 0.2) 44 | elif num > 50: 45 | max_evals = 15 + int(num * 0.2) 46 | elif num > 20: 47 | max_evals = 10 + int(num * 0.2) 48 | elif num > 10: 49 | max_evals = 5 + int(num * 0.2) 50 | else: 51 | max_evals = num 52 | current_best_performance = 0 53 | fmin(get_objective_func(parser, config_hawkes_kt, "HawkesKT", HawkesKT), space, algo=tpe.suggest, max_evals=max_evals) 54 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/hyper_cd_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.hyper_cd import config_hyper_cd 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.HyperCD import HyperCD 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--num_layer", type=int, default=3) 25 | parser.add_argument("--dim_feature", type=int, default=512) 26 | parser.add_argument("--dim_emb", type=int, default=16) 27 | parser.add_argument("--leaky", type=float, default=0.8) 28 | 29 | # 设置参数空间 30 | parameters_space = { 31 | "num_layer": [3, 4], 32 | "dim_feature": [512, 1024], 33 | "dim_emb": [8, 16], 34 | } 35 | space = { 36 | param_name: hp.choice(param_name, param_space) 37 | for param_name, param_space in parameters_space.items() 38 | } 39 | num = 1 40 | for parameter_space in parameters_space.values(): 41 | num *= len(parameter_space) 42 | if num > 100: 43 | max_evals = 20 + int(num * 0.2) 44 | elif num > 50: 45 | max_evals = 15 + int(num * 0.2) 46 | elif num > 20: 47 | max_evals = 10 + int(num * 0.2) 48 | elif num > 10: 49 | max_evals = 5 + int(num * 0.2) 50 | else: 51 | max_evals = num 52 | current_best_performance = 0 53 | fmin(get_objective_func(parser, config_hyper_cd, "HyperCD", HyperCD), space, algo=tpe.suggest, max_evals=max_evals) 54 | 55 | -------------------------------------------------------------------------------- /edmine/metric/learning_path_recommendation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def promotion_report(initial_scores, final_scores, path_lengths, weights=None): 5 | """ 6 | 7 | Parameters 8 | ---------- 9 | initial_scores: list or array 10 | final_scores: list or array 11 | path_lengths: list or array 12 | 13 | Returns 14 | ------- 15 | report: dict 16 | 17 | AP: 18 | absolute promotion = final_score - initial_score 19 | APR: 20 | absolute promotion rate = \frac{absolute promotion}{path_length} 21 | RP: 22 | relative promotion = \frac{final_score - initial_score}{full_score} 23 | RPR: 24 | relative promotion rate = \frac{relative promotion}{path_length} 25 | NRP: 26 | normalized relative promotion = \frac{final_score - initial_score}{full_score - initial_score} 27 | NRPR: 28 | normalized relative promotion rate = \frac{normalized relative promotion}{path_length} 29 | """ 30 | if len(initial_scores) == 0 or len(final_scores) == 0 or len(path_lengths) == 0: 31 | return { 32 | "AP": -1, 33 | "APR": -1, 34 | "RP": -1, 35 | "RPR": -1, 36 | "NRP": -1, 37 | "NRPR": -1 38 | } 39 | ret = {} 40 | 41 | initial_scores = np.asarray(initial_scores) 42 | final_scores = np.asarray(final_scores) 43 | 44 | absp = final_scores - initial_scores 45 | 46 | if weights is not None: 47 | absp *= np.asarray(weights) 48 | 49 | ret["AP"] = absp 50 | 51 | absp_rate = absp / np.asarray(path_lengths) 52 | absp_rate[absp_rate == np.inf] = 0 53 | ret["APR"] = absp_rate 54 | 55 | full_score = np.asarray([1] * len(initial_scores)) 56 | 57 | relp = absp / full_score 58 | ret["RP"] = relp 59 | 60 | relp_rate = absp / (full_score * path_lengths) 61 | relp_rate[relp_rate == np.inf] = 0 62 | ret["RPR"] = relp_rate 63 | 64 | ret["NRP"] = absp / (full_score - initial_scores) 65 | 66 | norm_relp_rate = absp / ((full_score - initial_scores) * path_lengths) 67 | norm_relp_rate[norm_relp_rate == np.inf] = 0 68 | ret["NRPR"] = norm_relp_rate 69 | 70 | return {k: np.average(v) for k, v in ret.items()} 71 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/abqr_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from edmine.utils.parse import str2bool 6 | from config.abqr import config_abqr 7 | from utils import get_objective_func 8 | 9 | from edmine.model.sequential_kt_model.ABQR import ABQR 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=80) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.002) 21 | parser.add_argument("--weight_decay", type=float, default=0.00001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 梯度裁剪 24 | parser.add_argument("--enable_clip_grad", type=str2bool, default=True) 25 | parser.add_argument("--grad_clipped", type=float, default=15.0) 26 | # 模型参数 27 | parser.add_argument("--dim_emb", type=int, default=128) 28 | parser.add_argument("--dropout", type=float, default=0.3) 29 | # 是否自动裁剪batch序列 30 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 31 | 32 | # 设置参数空间 33 | parameters_space = { 34 | "dim_emb": [64, 128, 256], 35 | "dropout": [0.1, 0.2, 0.3, 0.4, 0.5], 36 | } 37 | space = { 38 | param_name: hp.choice(param_name, param_space) 39 | for param_name, param_space in parameters_space.items() 40 | } 41 | num = 1 42 | for parameter_space in parameters_space.values(): 43 | num *= len(parameter_space) 44 | if num > 100: 45 | max_evals = 20 + int(num * 0.2) 46 | elif num > 50: 47 | max_evals = 15 + int(num * 0.2) 48 | elif num > 20: 49 | max_evals = 10 + int(num * 0.2) 50 | elif num > 10: 51 | max_evals = 5 + int(num * 0.2) 52 | else: 53 | max_evals = num 54 | current_best_performance = 0 55 | fmin(get_objective_func(parser, config_abqr, "ABQR", ABQR), space, algo=tpe.suggest, max_evals=max_evals) 56 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/utils.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | from edmine.utils.use_torch import set_seed 4 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 5 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 6 | 7 | current_best_performance = -100 8 | 9 | def get_objective_func(parser, config_func, model_name, model_class): 10 | def objective(parameters): 11 | global current_best_performance 12 | args = parser.parse_args() 13 | params = vars(args) 14 | 15 | # 替换参数 16 | params["search_params"] = True 17 | params["save_model"] = False 18 | params["debug_mode"] = False 19 | params["use_cpu"] = False 20 | for param_name in parameters: 21 | params[param_name] = parameters[param_name] 22 | set_seed(params["seed"]) 23 | global_params, global_objects = config_func(params) 24 | 25 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 26 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 27 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 28 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 29 | 30 | global_objects["data_loaders"] = { 31 | "train_loader": dataloader_train, 32 | "valid_loader": dataloader_valid 33 | } 34 | global_objects["models"] = { 35 | model_name: model_class(global_params, global_objects).to(global_params["device"]) 36 | } 37 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 38 | trainer.train() 39 | performance_this = trainer.train_record.get_evaluate_result("valid", "valid")["main_metric"] 40 | 41 | if (performance_this - current_best_performance) >= 0.001: 42 | current_best_performance = performance_this 43 | print(f"current best params (performance is {performance_this}):\n " + 44 | ", ".join(list(map(lambda s: f"{s}: {parameters[s]}", parameters.keys())))) 45 | return -performance_this 46 | return objective -------------------------------------------------------------------------------- /edmine/model/module/PredictorLayer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class PredictorLayer(nn.Module): 5 | def __init__(self, predictor_config): 6 | super(PredictorLayer, self).__init__() 7 | self.predictor_config = predictor_config 8 | if predictor_config["type"] == "direct": 9 | dropout = predictor_config["dropout"] 10 | num_predict_layer = predictor_config["num_predict_layer"] 11 | dim_predict_in = predictor_config["dim_predict_in"] 12 | dim_predict_mid = predictor_config["dim_predict_mid"] 13 | activate_type = predictor_config["activate_type"] 14 | 15 | if activate_type == "tanh": 16 | act_func = nn.Tanh 17 | elif activate_type == "relu": 18 | act_func = nn.ReLU 19 | else: 20 | act_func = nn.Sigmoid 21 | 22 | dim_predict_out = predictor_config["dim_predict_out"] 23 | self.predict_layer = [] 24 | if num_predict_layer == 1: 25 | self.predict_layer.append(nn.Dropout(dropout)) 26 | self.predict_layer.append(nn.Linear(dim_predict_in, dim_predict_out)) 27 | self.predict_layer.append(nn.Sigmoid()) 28 | else: 29 | self.predict_layer.append(nn.Linear(dim_predict_in, dim_predict_mid)) 30 | for _ in range(num_predict_layer - 1): 31 | self.predict_layer.append(act_func()) 32 | self.predict_layer.append(nn.Dropout(dropout)) 33 | self.predict_layer.append(nn.Linear(dim_predict_mid, dim_predict_mid)) 34 | self.predict_layer.append(nn.Dropout(dropout)) 35 | self.predict_layer.append(nn.Linear(dim_predict_mid, dim_predict_out)) 36 | self.predict_layer.append(nn.Sigmoid()) 37 | self.predict_layer = nn.Sequential(*self.predict_layer) 38 | elif predictor_config["type"] == "dot": 39 | pass 40 | else: 41 | raise NotImplementedError() 42 | 43 | def forward(self, batch): 44 | y = self.predict_layer(batch) 45 | if self.predictor_config["type"] == "direct": 46 | last_layer_max_value = self.predictor_config["last_layer_max_value"] 47 | y = y * last_layer_max_value 48 | return y 49 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/ncd_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.ncd import config_ncd 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.NCD import NCD 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dropout", type=float, default=0.3) 25 | parser.add_argument("--num_predict_layer", type=int, default=2) 26 | parser.add_argument("--dim_predict_mid", type=int, default=64) 27 | parser.add_argument("--activate_type", type=str, default="sigmoid") 28 | 29 | # 设置参数空间 30 | parameters_space = { 31 | "train_batch_size": [512, 1024, 2048], 32 | "learning_rate": [0.0001, 0.001], 33 | "weight_decay": [0.0001, 0.00001, 0], 34 | "dropout": [0.1, 0.2, 0.3, 0.4, 0.5], 35 | } 36 | space = { 37 | param_name: hp.choice(param_name, param_space) 38 | for param_name, param_space in parameters_space.items() 39 | } 40 | num = 1 41 | for parameter_space in parameters_space.values(): 42 | num *= len(parameter_space) 43 | if num > 100: 44 | max_evals = 20 + int(num * 0.2) 45 | elif num > 50: 46 | max_evals = 15 + int(num * 0.2) 47 | elif num > 20: 48 | max_evals = 10 + int(num * 0.2) 49 | elif num > 10: 50 | max_evals = 5 + int(num * 0.2) 51 | else: 52 | max_evals = num 53 | current_best_performance = 0 54 | fmin(get_objective_func(parser, config_ncd, "NCD", NCD), space, algo=tpe.suggest, max_evals=max_evals) 55 | 56 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/dina_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.dina import config_dina 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.cognitive_diagnosis_model.DINA import DINA 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=256) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--max_slip", type=float, default=0.3) 25 | parser.add_argument("--max_guess", type=float, default=0.3) 26 | parser.add_argument("--max_step", type=int, default=500) 27 | parser.add_argument("--use_ste", type=str2bool, default=True) 28 | 29 | # 设置参数空间 30 | parameters_space = { 31 | "train_batch_size": [512, 1024, 2048], 32 | "learning_rate": [0.0001, 0.001], 33 | "weight_decay": [0.0001, 0.00001, 0], 34 | "max_slip": [0.1, 0.2, 0.3, 0.4], 35 | "max_guess": [0.1, 0.2, 0.3, 0.4] 36 | } 37 | space = { 38 | param_name: hp.choice(param_name, param_space) 39 | for param_name, param_space in parameters_space.items() 40 | } 41 | num = 1 42 | for parameter_space in parameters_space.values(): 43 | num *= len(parameter_space) 44 | if num > 100: 45 | max_evals = 20 + int(num * 0.2) 46 | elif num > 50: 47 | max_evals = 15 + int(num * 0.2) 48 | elif num > 20: 49 | max_evals = 10 + int(num * 0.2) 50 | elif num > 10: 51 | max_evals = 5 + int(num * 0.2) 52 | else: 53 | max_evals = num 54 | current_best_performance = 0 55 | fmin(get_objective_func(parser, config_dina, "DINA", DINA), space, algo=tpe.suggest, max_evals=max_evals) 56 | 57 | -------------------------------------------------------------------------------- /edmine/utils/use_torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | 5 | from edmine.utils.check import check_q_table 6 | 7 | 8 | def parse_q_table(q_table: np.ndarray, device: str): 9 | """ 10 | Processes a question-concept relationship matrix (q_table) to generate: 11 | A table mapping each question to its associated concepts. 12 | A mask table to handle padding for questions with fewer concepts than the maximum. 13 | :param q_table: A 2D NumPy array representing the question-concept relationship, where rows correspond to questions and columns correspond to concepts. A value of 1 indicates a relationship between a question and a concept. 14 | :param device: The device (e.g., CPU or GPU) where the output tensors should be allocated. 15 | :return: q2c_table, : A tensor of shape (num_questions, num_max_c_in_q) where each row contains the concept IDs associated with a question. Padding is used for questions with fewer concepts than num_max_c_in_q. || q2c_mask_table, A tensor of shape (num_questions, num_max_c_in_q) where each row contains a mask indicating valid concept IDs (1) and padding (0). 16 | """ 17 | check_q_table(q_table) 18 | q2c_table = [] 19 | q2c_mask_table = [] 20 | num_max_c_in_q = np.max(np.sum(q_table, axis=1)) 21 | num_question = q_table.shape[0] 22 | for i in range(num_question): 23 | cs = np.argwhere(q_table[i] == 1).reshape(-1).tolist() 24 | pad_len = num_max_c_in_q - len(cs) 25 | q2c_table.append(cs + [0] * pad_len) 26 | q2c_mask_table.append([1] * len(cs) + [0] * pad_len) 27 | q2c_table = torch.tensor(q2c_table).long().to(device) 28 | q2c_mask_table = torch.tensor(q2c_mask_table).long().to(device) 29 | return q2c_table, q2c_mask_table 30 | 31 | 32 | def is_cuda_available() -> bool: 33 | return torch.cuda.is_available() 34 | 35 | 36 | def is_mps_available() -> bool: 37 | return torch.backends.mps.is_available() 38 | 39 | 40 | def set_seed(seed): 41 | try: 42 | torch.manual_seed(seed) 43 | if torch.cuda.is_available(): 44 | torch.cuda.manual_seed_all(seed) 45 | torch.backends.cudnn.deterministic = True 46 | torch.backends.cudnn.benchmark = False 47 | except Exception as e: 48 | print("Set seed failed, details are ", e) 49 | pass 50 | np.random.seed(seed) 51 | random.seed(seed) 52 | 53 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/reinforce.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from set_params import * 5 | from config.reinforce import config_reinforce 6 | 7 | from edmine.utils.parse import str2bool 8 | from edmine.utils.use_torch import set_seed 9 | from edmine.utils.log import get_now_time 10 | from edmine.utils.data_io import read_kt_file 11 | from edmine.trainer.LPROnlineDRLTrainer import LPROnlineDRLTrainer 12 | from edmine.model.learning_path_recommendation_agent.Reinforce import Reinforce 13 | 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_step_trainer_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 17 | add_help=False) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.0001) 21 | parser.add_argument("--weight_decay", type=float, default=0) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 折扣因子 24 | parser.add_argument("--gamma", type=float, default=0.9, help="discount factor") 25 | # 模型参数 26 | parser.add_argument("--max_question_attempt", type=int, default=20) 27 | parser.add_argument("--num_layer_action_model", type=int, default=2) 28 | parser.add_argument("--num_layer_state_model", type=int, default=2) 29 | # 其它 30 | parser.add_argument("--save_model", type=str2bool, default=False) 31 | parser.add_argument("--use_wandb", type=str2bool, default=False) 32 | 33 | args = parser.parse_args() 34 | params = vars(args) 35 | set_seed(params["seed"]) 36 | global_params, global_objects = config_reinforce(params) 37 | 38 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 39 | setting_dir = global_objects["file_manager"].get_setting_dir(params["setting_name"]) 40 | global_objects["data"] = { 41 | "train": read_kt_file(os.path.join(setting_dir, params["train_file_name"])), 42 | "valid": read_kt_file(os.path.join(setting_dir, params["valid_file_name"])) 43 | } 44 | 45 | global_objects["agents"] = { 46 | "Reinforce": Reinforce(global_params, global_objects) 47 | } 48 | 49 | trainer = LPROnlineDRLTrainer(global_params, global_objects) 50 | trainer.train() 51 | -------------------------------------------------------------------------------- /edmine/model/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from edmine.model.module.calculation import wasserstein_distance_matmul 6 | 7 | 8 | def binary_cross_entropy(predict_score, ground_truth, device): 9 | if device == "mps": 10 | return F.binary_cross_entropy(predict_score.float(), ground_truth.float()) 11 | else: 12 | return F.binary_cross_entropy(predict_score.double(), ground_truth.double()) 13 | 14 | 15 | def wasserstein_distance(mean1, cov1, mean2, cov2): 16 | ret = torch.sum((mean1 - mean2) * (mean1 - mean2), -1) 17 | cov1_sqrt = torch.sqrt(torch.clamp(cov1, min=1e-24)) 18 | cov2_sqrt = torch.sqrt(torch.clamp(cov2, min=1e-24)) 19 | ret = ret + torch.sum((cov1_sqrt - cov2_sqrt) * (cov1_sqrt - cov2_sqrt), -1) 20 | return ret 21 | 22 | 23 | def d2s_1overx(distance): 24 | return 1/(1+distance) 25 | 26 | 27 | class WassersteinNCELoss(nn.Module): 28 | """UKT""" 29 | def __init__(self, temperature): 30 | super(WassersteinNCELoss, self).__init__() 31 | self.temperature = temperature 32 | self.activation = nn.ELU() 33 | 34 | def forward(self, batch_sample_one_mean, batch_sample_one_cov, batch_sample_two_mean, batch_sample_two_cov): 35 | batch_sample_one_cov = self.activation(batch_sample_one_cov) + 1 36 | batch_sample_two_cov = self.activation(batch_sample_two_cov) + 1 37 | sim11 = d2s_1overx(wasserstein_distance_matmul(batch_sample_one_mean, batch_sample_one_cov, batch_sample_one_mean, batch_sample_one_cov)) / self.temperature 38 | sim22 = d2s_1overx(wasserstein_distance_matmul(batch_sample_two_mean, batch_sample_two_cov, batch_sample_two_mean, batch_sample_two_cov)) / self.temperature 39 | sim12 = -d2s_1overx(wasserstein_distance_matmul(batch_sample_one_mean, batch_sample_one_cov, batch_sample_two_mean, batch_sample_two_cov)) / self.temperature 40 | d = sim12.shape[-1] 41 | sim11[..., range(d), range(d)] = float('-inf') 42 | sim22[..., range(d), range(d)] = float('-inf') 43 | raw_scores1 = torch.cat([sim12, sim11], dim=-1) 44 | raw_scores2 = torch.cat([sim22, sim12.transpose(-1, -2)], dim=-1) 45 | logits = torch.cat([raw_scores1, raw_scores2], dim=-2) 46 | labels = torch.arange(2 * d, dtype=torch.long, device=logits.device) 47 | nce_loss = F.cross_entropy(logits, labels) 48 | return nce_loss 49 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/config/rekt.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.config.data import config_q_table, config_sequential_kt_dataset 6 | from edmine.config.basic import config_logger 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.config.train import config_epoch_trainer, config_optimizer 9 | from edmine.config.train import config_wandb 10 | from edmine.data.FileManager import FileManager 11 | from edmine.utils.log import get_now_time 12 | from edmine.utils.data_io import save_params 13 | 14 | current_file_name = inspect.getfile(inspect.currentframe()) 15 | current_dir = os.path.dirname(current_file_name) 16 | settings_path = os.path.join(current_dir, "../../../settings.json") 17 | with open(settings_path, "r") as f: 18 | settings = json.load(f) 19 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 20 | MODELS_DIR = settings["MODELS_DIR"] 21 | 22 | 23 | def config_rekt(local_params): 24 | model_name = "ReKT" 25 | 26 | global_params = {} 27 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 28 | config_logger(local_params, global_objects) 29 | config_general_dl_model(local_params, global_params) 30 | global_params["loss_config"] = {} 31 | config_epoch_trainer(local_params, global_params, model_name) 32 | config_sequential_kt_dataset(local_params, global_params) 33 | config_optimizer(local_params, global_params, model_name) 34 | config_q_table(local_params, global_params, global_objects) 35 | 36 | # 模型参数 37 | global_params["models_config"] = { 38 | model_name: { 39 | "num_concept": local_params["num_concept"], 40 | "num_question": local_params["num_question"], 41 | "dim_emb": local_params["dim_emb"], 42 | "dropout": local_params["dropout"] 43 | } 44 | } 45 | 46 | if local_params["save_model"]: 47 | setting_name = local_params["setting_name"] 48 | train_file_name = local_params["train_file_name"] 49 | 50 | global_params["trainer_config"]["save_model_dir_name"] = ( 51 | f"{model_name}@@{setting_name}@@{train_file_name.replace('.txt', '')}@@seed_{local_params['seed']}@@" 52 | f"{get_now_time().replace(' ', '@').replace(':', '-')}") 53 | save_params(global_params, MODELS_DIR, global_objects["logger"]) 54 | config_wandb(local_params, global_params, model_name) 55 | 56 | return global_params, global_objects 57 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/prepare_dataset/sfkt_setting.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import numpy as np 5 | 6 | import config 7 | 8 | from edmine.data.FileManager import FileManager 9 | from edmine.dataset.split_seq import truncate2one_seq 10 | from edmine.dataset.split_dataset import kt_select_test_data 11 | from edmine.utils.data_io import write_kt_file, read_kt_file 12 | from edmine.utils.parse import get_kt_data_statics 13 | 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("--dataset_name", type=str, default="assist2009") 18 | args = parser.parse_args() 19 | params = vars(args) 20 | 21 | setting = { 22 | "name": "sfkt_setting", 23 | "max_seq_len": 2000, 24 | "min_seq_len": 2, 25 | "test_radio": 0.2, 26 | "valid_radio": 0.3, 27 | } 28 | 29 | file_manager = FileManager(config.FILE_MANAGER_ROOT) 30 | file_manager.add_new_setting(setting["name"], setting) 31 | data = read_kt_file(file_manager.get_preprocessed_path(params["dataset_name"])) 32 | if params["dataset_name"] in ["junyi2015", "edi2020-task1"]: 33 | # 只取长度最长的5000条序列 34 | seq_lens = list(map(lambda x: x["seq_len"], data)) 35 | max_indices = np.argpartition(np.array(seq_lens), -5000)[-5000:] 36 | data_ = [] 37 | for i in max_indices: 38 | data_.append(data[i]) 39 | data = data_ 40 | q_table = file_manager.get_q_table(params["dataset_name"]) 41 | data_statics = get_kt_data_statics(data, q_table) 42 | print(f"data statics: {data_statics}") 43 | 44 | dataset = truncate2one_seq(data, 2, 2000, True, True) 45 | test_radio = setting["test_radio"] 46 | valid_radio = setting["valid_radio"] 47 | dataset_train_valid, dataset_test = kt_select_test_data(dataset, test_radio, False) 48 | num_valid = int(len(dataset_train_valid) * valid_radio) 49 | random.shuffle(dataset_train_valid) 50 | dataset_valid = dataset_train_valid[:num_valid] 51 | dataset_train = dataset_train_valid[num_valid:] 52 | 53 | setting_dir = file_manager.get_setting_dir(setting["name"]) 54 | write_kt_file(dataset_test, os.path.join(setting_dir, f"{params['dataset_name']}_test.txt")) 55 | write_kt_file(dataset_valid, os.path.join(setting_dir, f"{params['dataset_name']}_valid.txt")) 56 | write_kt_file(dataset_train, os.path.join(setting_dir, f"{params['dataset_name']}_train.txt")) 57 | 58 | -------------------------------------------------------------------------------- /examples/data_preprocess/kt_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import inspect 4 | 5 | from edmine.data.FileManager import FileManager 6 | from edmine.data.KTDataProcessor import KTDataProcessor 7 | from edmine.utils.data_io import write_kt_file, read_json 8 | 9 | 10 | current_file_name = inspect.getfile(inspect.currentframe()) 11 | current_dir = os.path.dirname(current_file_name) 12 | settings_path = os.path.join(current_dir, "../settings.json") 13 | settings = read_json(settings_path) 14 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 15 | 16 | 17 | if __name__ == "__main__": 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--dataset_name", type=str, default="DBE-KT22", 20 | choices=("assist2009", "assist2009-full", "assist2012", "assist2015", "assist2017", 21 | "algebra2005", "algebra2006", "algebra2008", 22 | "bridge2algebra2006", "bridge2algebra2008", 23 | "edi2020-task1", "edi2020-task34", 24 | "SLP-bio", "SLP-chi", "SLP-eng", "SLP-geo", "SLP-his", "SLP-mat", "SLP-phy", 25 | "ednet-kt1", "slepemapy-anatomy", "xes3g5m", "statics2011", "junyi2015", "poj", 26 | "DBE-KT22")) 27 | 28 | args = parser.parse_args() 29 | params = vars(args) 30 | file_manager = FileManager(FILE_MANAGER_ROOT) 31 | 32 | params["data_path"] = file_manager.get_dataset_raw_path(params["dataset_name"]) 33 | print(f"processing {params['dataset_name']} ...") 34 | data_processor = KTDataProcessor(params, file_manager) 35 | data_uniformed = data_processor.preprocess_data() 36 | Q_table = data_processor.Q_table 37 | data_statics_raw = data_processor.statics_raw 38 | data_statics_preprocessed = data_processor.statics_preprocessed 39 | 40 | print(f"saving data of {params['dataset_name']} ...") 41 | dataset_name = params["dataset_name"] 42 | data_path = file_manager.get_preprocessed_path(dataset_name) 43 | write_kt_file(data_uniformed, data_path) 44 | file_manager.save_data_statics_raw(data_statics_raw, params["dataset_name"]) 45 | file_manager.save_data_statics_processed(data_statics_preprocessed, dataset_name) 46 | file_manager.save_q_table(Q_table, dataset_name) 47 | file_manager.save_data_id_map(data_processor.get_all_id_maps(), dataset_name) 48 | print(f"finsh processing and saving successfully") 49 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/dkvmn_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.dkvmn import config_dkvmn 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.DKVMN import DKVMN 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_key", type=int, default=64) 25 | parser.add_argument("--dim_value", type=int, default=64) 26 | parser.add_argument("--dropout", type=float, default=0.2) 27 | parser.add_argument("--num_predict_layer", type=int, default=1) 28 | parser.add_argument("--dim_predict_mid", type=int, default=64) 29 | parser.add_argument("--activate_type", type=str, default="sigmoid") 30 | # 是否自动裁剪batch序列 31 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 32 | 33 | # 设置参数空间 34 | parameters_space = { 35 | "weight_decay": [0.0001, 0.00001, 0], 36 | "dim_key": [32, 64], 37 | "dim_value": [32, 64], 38 | "dropout": [0.1, 0.2, 0.3], 39 | } 40 | space = { 41 | param_name: hp.choice(param_name, param_space) 42 | for param_name, param_space in parameters_space.items() 43 | } 44 | num = 1 45 | for parameter_space in parameters_space.values(): 46 | num *= len(parameter_space) 47 | if num > 100: 48 | max_evals = 20 + int(num * 0.2) 49 | elif num > 50: 50 | max_evals = 15 + int(num * 0.2) 51 | elif num > 20: 52 | max_evals = 10 + int(num * 0.2) 53 | elif num > 10: 54 | max_evals = 5 + int(num * 0.2) 55 | else: 56 | max_evals = num 57 | fmin(get_objective_func(parser, config_dkvmn, "DKVMN", DKVMN), space, algo=tpe.suggest, max_evals=max_evals) 58 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/rcd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.rcd import config_rcd 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.RCD import RCD 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=1024) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=2048) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.0001) 25 | parser.add_argument("--weight_decay", type=float, default=0) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 其它 28 | parser.add_argument("--save_model", type=str2bool, default=False) 29 | parser.add_argument("--use_wandb", type=str2bool, default=False) 30 | 31 | args = parser.parse_args() 32 | params = vars(args) 33 | set_seed(params["seed"]) 34 | global_params, global_objects = config_rcd(params) 35 | 36 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 37 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 38 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 39 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 40 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 41 | 42 | global_objects["data_loaders"] = { 43 | "train_loader": dataloader_train, 44 | "valid_loader": dataloader_valid 45 | } 46 | global_objects["models"] = { 47 | "RCD": RCD(global_params, global_objects).to(global_params["device"]) 48 | } 49 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 50 | trainer.train() 51 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/config/mikt.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.config.data import config_q_table, config_sequential_kt_dataset 6 | from edmine.config.basic import config_logger 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.config.train import config_epoch_trainer, config_optimizer 9 | from edmine.config.train import config_wandb 10 | from edmine.data.FileManager import FileManager 11 | from edmine.utils.log import get_now_time 12 | from edmine.utils.data_io import save_params 13 | 14 | current_file_name = inspect.getfile(inspect.currentframe()) 15 | current_dir = os.path.dirname(current_file_name) 16 | settings_path = os.path.join(current_dir, "../../../settings.json") 17 | with open(settings_path, "r") as f: 18 | settings = json.load(f) 19 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 20 | MODELS_DIR = settings["MODELS_DIR"] 21 | 22 | 23 | def config_mikt(local_params): 24 | model_name = "MIKT" 25 | 26 | global_params = {} 27 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 28 | config_logger(local_params, global_objects) 29 | config_general_dl_model(local_params, global_params) 30 | global_params["loss_config"] = {} 31 | config_epoch_trainer(local_params, global_params, model_name) 32 | config_sequential_kt_dataset(local_params, global_params) 33 | config_optimizer(local_params, global_params, model_name) 34 | config_q_table(local_params, global_params, global_objects) 35 | 36 | # 模型参数 37 | global_params["models_config"] = { 38 | model_name: { 39 | "num_concept": local_params["num_concept"], 40 | "num_question": local_params["num_question"], 41 | "dim_emb": local_params["dim_emb"], 42 | "dim_state": local_params["dim_state"], 43 | "dropout": local_params["dropout"], 44 | "seq_len": local_params["seq_len"], 45 | } 46 | } 47 | 48 | if local_params["save_model"]: 49 | setting_name = local_params["setting_name"] 50 | train_file_name = local_params["train_file_name"] 51 | 52 | global_params["trainer_config"]["save_model_dir_name"] = ( 53 | f"{model_name}@@{setting_name}@@{train_file_name.replace('.txt', '')}@@seed_{local_params['seed']}@@" 54 | f"{get_now_time().replace(' ', '@').replace(':', '-')}") 55 | save_params(global_params, MODELS_DIR, global_objects["logger"]) 56 | config_wandb(local_params, global_params, model_name) 57 | 58 | return global_params, global_objects 59 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/user_exercise_based_CF/rec_strategy.py: -------------------------------------------------------------------------------- 1 | def rec_method_based_on_que_sim(users_history, similar_questions, top_n): 2 | rec_ques = {x["user_id"]: [] for x in users_history} 3 | for item_data in users_history: 4 | seq_len = item_data["seq_len"] 5 | question_seq = item_data["question_seq"][:seq_len] 6 | correct_seq = item_data["correctness_seq"][:seq_len] 7 | answered_ques = set(question_seq) 8 | target_question = question_seq[-1] 9 | if sum(correct_seq) != seq_len: 10 | for q_id, correctness in zip(question_seq[::-1], correct_seq[::-1]): 11 | if correctness == 0: 12 | target_question = q_id 13 | break 14 | 15 | similar_ques_sorted = similar_questions[target_question] 16 | num_rec = 0 17 | for q_id in similar_ques_sorted: 18 | if num_rec >= top_n: 19 | break 20 | if q_id in answered_ques: 21 | continue 22 | num_rec += 1 23 | rec_ques[item_data["user_id"]].append(q_id) 24 | 25 | return rec_ques 26 | 27 | 28 | def rec_method_based_on_user_sim(users_history, similar_users, question_diff, th, top_n): 29 | users_answered_ques = {} 30 | for item_data in users_history: 31 | users_answered_ques[item_data["user_id"]] = set(item_data["question_seq"][:item_data["seq_len"]]) 32 | 33 | rec_ques = {x["user_id"]: [] for x in users_history} 34 | for item_data in users_history: 35 | user_id = item_data["user_id"] 36 | seq_len = item_data["seq_len"] 37 | question_seq = item_data["question_seq"][:seq_len] 38 | correct_seq = item_data["correctness_seq"][:seq_len] 39 | answered_ques = set(question_seq) 40 | average_diff = 1 - sum(correct_seq) / seq_len 41 | while len(rec_ques[user_id]) < top_n: 42 | # 如果阈值过小,可能不能满足推荐top n个习题,加大阈值 43 | th += 0.05 44 | for sim_user_id in similar_users[user_id]: 45 | if sim_user_id not in users_answered_ques: 46 | continue 47 | for q_id in (users_answered_ques[sim_user_id] - answered_ques): 48 | q_diff = question_diff[q_id] 49 | if abs(average_diff - q_diff) < 0.1: 50 | rec_ques[user_id].append(q_id) 51 | if len(rec_ques[user_id]) >= top_n: 52 | break 53 | if len(rec_ques[user_id]) >= top_n: 54 | break 55 | 56 | return rec_ques -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/d3qn_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.d3qn import config_d3qn 6 | from utils import get_objective_func 7 | 8 | from edmine.model.learning_path_recommendation_agent.D3QN import D3QN 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_epoch_trainer_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 13 | add_help=False) 14 | # batch size 15 | parser.add_argument("--buffer_size", type=int, default=5000) 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | # 优化器 18 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 19 | parser.add_argument("--learning_rate", type=float, default=0.0001) 20 | parser.add_argument("--weight_decay", type=float, default=0) 21 | parser.add_argument("--momentum", type=float, default=0.9) 22 | # 折扣因子和探索概率 23 | parser.add_argument("--gamma", type=float, default=0.9, help="discount factor") 24 | parser.add_argument("--epsilon", type=float, default=0.1, help="ε-greedy") 25 | # 模型参数 26 | parser.add_argument("--max_question_attempt", type=int, default=20) 27 | parser.add_argument("--dim_c_feature", type=int, default=64) 28 | parser.add_argument("--dim_q_feature", type=int, default=128) 29 | parser.add_argument("--num_layer_c_rec_model", type=int, default=2) 30 | parser.add_argument("--num_layer_q_rec_model", type=int, default=2) 31 | 32 | # 设置参数空间 33 | parameters_space = { 34 | "buffer_size": [500, 1000], 35 | "learning_rate": [0.00001, 0.0001, 0.001], 36 | "gamma": [0.9, 0.95, 0.99], 37 | "epsilon": [0.05, 0.1, 0.2], 38 | } 39 | space = { 40 | param_name: hp.choice(param_name, param_space) 41 | for param_name, param_space in parameters_space.items() 42 | } 43 | num = 1 44 | for parameter_space in parameters_space.values(): 45 | num *= len(parameter_space) 46 | if num > 100: 47 | max_evals = 20 + int(num * 0.2) 48 | elif num > 50: 49 | max_evals = 15 + int(num * 0.2) 50 | elif num > 20: 51 | max_evals = 10 + int(num * 0.2) 52 | elif num > 10: 53 | max_evals = 5 + int(num * 0.2) 54 | else: 55 | max_evals = num 56 | current_best_performance = 0 57 | fmin(get_objective_func(parser, config_d3qn, "D3QN", D3QN), space, algo=tpe.suggest, max_evals=max_evals) 58 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/mirt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.mirt import config_mirt 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.MIRT import MIRT 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=256) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=512) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.001) 25 | parser.add_argument("--weight_decay", type=float, default=0.0001) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 模型参数 28 | parser.add_argument("--a_range", type=float, default=1) 29 | # 其它 30 | parser.add_argument("--save_model", type=str2bool, default=False) 31 | parser.add_argument("--use_wandb", type=str2bool, default=False) 32 | 33 | args = parser.parse_args() 34 | params = vars(args) 35 | set_seed(params["seed"]) 36 | global_params, global_objects = config_mirt(params) 37 | 38 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 39 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 40 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 41 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 42 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 43 | 44 | global_objects["data_loaders"] = { 45 | "train_loader": dataloader_train, 46 | "valid_loader": dataloader_valid 47 | } 48 | global_objects["models"] = { 49 | "MIRT": MIRT(global_params, global_objects).to(global_params["device"]) 50 | } 51 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 52 | trainer.train() 53 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/rcd/build_u_e_graph.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from config import FILE_MANAGER_ROOT 5 | 6 | from edmine.utils.data_io import read_cd_file 7 | from edmine.data.FileManager import FileManager 8 | from edmine.utils.parse import q2c_from_q_table 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--setting_name", type=str, default="ncd_setting") 14 | parser.add_argument("--dataset_name", type=str, default="assist2009") 15 | parser.add_argument("--train_file_name", type=str, default="assist2009_train_fold_0.txt") 16 | args = parser.parse_args() 17 | params = vars(args) 18 | 19 | file_manager = FileManager(FILE_MANAGER_ROOT) 20 | setting_dir = file_manager.get_setting_dir(params["setting_name"]) 21 | train_data_path = os.path.join(setting_dir, params["train_file_name"]) 22 | graph_dir = os.path.join(setting_dir, "RCD") 23 | if not os.path.exists(graph_dir): 24 | os.mkdir(graph_dir) 25 | u_from_e_path = os.path.join(graph_dir, f"u_from_e_{params['train_file_name']}") 26 | e_from_u_path = os.path.join(graph_dir, f"e_from_u_{params['train_file_name']}") 27 | 28 | train_data = read_cd_file(train_data_path) 29 | data_statics_path = os.path.join(setting_dir, f"{params['dataset_name']}_statics.txt") 30 | with open(data_statics_path, "r") as f: 31 | s = f.readline() 32 | num_user = int(s.split(":")[1].strip()) 33 | q_table = file_manager.get_q_table(params['dataset_name']) 34 | q2c = q2c_from_q_table(q_table) 35 | num_question, num_concept = q_table.shape[0], q_table.shape[1] 36 | 37 | if not os.path.exists(u_from_e_path): 38 | u_from_e = '' # e(src) to k(dst) 39 | for interaction in train_data: 40 | q_id = interaction['question_id'] 41 | user_id = interaction['user_id'] 42 | c_ids = q2c[q_id] 43 | for _ in c_ids: 44 | u_from_e += str(q_id) + '\t' + str(user_id + num_question) + '\n' 45 | with open(u_from_e_path, 'w') as f: 46 | f.write(u_from_e) 47 | 48 | if not os.path.exists(e_from_u_path): 49 | e_from_u = '' # k(src) to k(dst) 50 | for interaction in train_data: 51 | q_id = interaction['question_id'] 52 | user_id = interaction['user_id'] 53 | c_ids = q2c[q_id] 54 | for _ in c_ids: 55 | e_from_u += str(user_id + num_question) + '\t' + str(q_id) + '\n' 56 | with open(e_from_u_path, 'w') as f: 57 | f.write(e_from_u) 58 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/gikt/get_graph.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | 5 | from config import FILE_MANAGER_ROOT 6 | 7 | from edmine.data.FileManager import FileManager 8 | from edmine.utils.parse import c2q_from_q_table, q2c_from_q_table 9 | 10 | 11 | def gen_gikt_graph(question2concept, concept2question, q_neighbor_size, c_neighbor_size): 12 | num_question = len(question2concept) 13 | num_concept = len(concept2question) 14 | q_neighbors = np.zeros([num_question, q_neighbor_size], dtype=np.int32) 15 | c_neighbors = np.zeros([num_concept, c_neighbor_size], dtype=np.int32) 16 | for q_id, neighbors in question2concept.items(): 17 | if len(neighbors) >= q_neighbor_size: 18 | q_neighbors[q_id] = np.random.choice(neighbors, q_neighbor_size, replace=False) 19 | else: 20 | q_neighbors[q_id] = np.random.choice(neighbors, q_neighbor_size, replace=True) 21 | for c_id, neighbors in concept2question.items(): 22 | if len(neighbors) >= c_neighbor_size: 23 | c_neighbors[c_id] = np.random.choice(neighbors, c_neighbor_size, replace=False) 24 | else: 25 | c_neighbors[c_id] = np.random.choice(neighbors, c_neighbor_size, replace=True) 26 | return q_neighbors, c_neighbors 27 | 28 | 29 | if __name__ == "__main__": 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument("--setting_name", type=str, default="pykt_setting") 32 | parser.add_argument("--dataset_name", type=str, default="assist2009") 33 | args = parser.parse_args() 34 | params = vars(args) 35 | 36 | file_manager = FileManager(FILE_MANAGER_ROOT) 37 | setting_dir = file_manager.get_setting_dir(params["setting_name"]) 38 | gikt_dir = os.path.join(setting_dir, "GIKT") 39 | if not os.path.exists(gikt_dir): 40 | os.mkdir(gikt_dir) 41 | dataset_name = params["dataset_name"] 42 | question_neighbors_path = os.path.join(gikt_dir, f"gikt_question_neighbors_{dataset_name}.npy") 43 | concept_neighbors_path = os.path.join(gikt_dir, f"gikt_concept_neighbors_{dataset_name}.npy") 44 | 45 | q_table = file_manager.get_q_table(dataset_name) 46 | c2q = c2q_from_q_table(q_table) 47 | q2c = q2c_from_q_table(q_table) 48 | num_max_concept = int(q_table.sum(axis=1).max()) 49 | num_q, num_c = q_table.shape[0], q_table.shape[1] 50 | 51 | question_neighbors, concept_neighbors = gen_gikt_graph(q2c, c2q, num_max_concept, min(20, int(num_q / num_c))) 52 | np.save(question_neighbors_path, question_neighbors) 53 | np.save(concept_neighbors_path, concept_neighbors) 54 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/atdkt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.atdkt import config_atdkt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.ATDKT import ATDKT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_emb", type=int, default=64) 25 | parser.add_argument("--dim_latent", type=int, default=256) 26 | parser.add_argument("--num_rnn_layer", type=int, default=1) 27 | parser.add_argument("--dropout", type=float, default=0.2) 28 | parser.add_argument("--IK_start", type=int, default=30) 29 | parser.add_argument("--w_QT_loss", type=float, default=1) 30 | parser.add_argument("--w_IK_loss", type=float, default=1) 31 | # 是否自动裁剪batch序列 32 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 33 | 34 | # 设置参数空间 35 | parameters_space = { 36 | "weight_decay": [0.0001, 0.00001, 0], 37 | "dim_emb": [64, 256], 38 | "dropout": [0.1, 0.2, 0.3], 39 | "w_QT_loss": [0.1, 0.5, 1], 40 | "w_IK_loss": [0.1, 0.5, 1], 41 | } 42 | space = { 43 | param_name: hp.choice(param_name, param_space) 44 | for param_name, param_space in parameters_space.items() 45 | } 46 | num = 1 47 | for parameter_space in parameters_space.values(): 48 | num *= len(parameter_space) 49 | if num > 100: 50 | max_evals = 20 + int(num * 0.2) 51 | elif num > 50: 52 | max_evals = 15 + int(num * 0.2) 53 | elif num > 20: 54 | max_evals = 10 + int(num * 0.2) 55 | elif num > 10: 56 | max_evals = 5 + int(num * 0.2) 57 | else: 58 | max_evals = num 59 | current_best_performance = 0 60 | fmin(get_objective_func(parser, config_atdkt, "ATDKT", ATDKT), space, algo=tpe.suggest, max_evals=max_evals) 61 | -------------------------------------------------------------------------------- /edmine/evaluator/SequentialDLKTEvaluator4FTAcc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from tqdm import tqdm 4 | from copy import deepcopy 5 | 6 | from edmine.evaluator.DLEvaluator import DLEvaluator 7 | from edmine.metric.knowledge_tracing import get_kt_metric 8 | 9 | 10 | class SequentialDLKTEvaluator4FTAcc(DLEvaluator): 11 | def __init__(self, params, objects): 12 | super().__init__(params, objects) 13 | 14 | def inference(self, model, data_loader): 15 | seq_start = self.params["sequential_dlkt"]["seq_start"] 16 | predict_score_all = [] 17 | ground_truth_all = [] 18 | for batch in tqdm(data_loader, desc="one step inference"): 19 | q2c = self.objects["dataset"]["q2c"] 20 | question_seqs = batch["question_seq"][:, 1:].detach().cpu().numpy() 21 | correctness_seqs = batch["correctness_seq"][:, 1:].detach().cpu().numpy() 22 | predict_lens = (batch["seq_len"] - 1).detach().cpu().numpy() 23 | predict_score_seqs = model.get_predict_score(batch)["predict_score_batch"].detach().cpu().numpy() 24 | for q_seq, c_seq, predict_len, ps_seq in zip(question_seqs, correctness_seqs, predict_lens, predict_score_seqs): 25 | if predict_len < (seq_start-1): 26 | continue 27 | history_count = defaultdict(int) 28 | history_correct = defaultdict(int) 29 | for q_id, correctness, ps in zip(q_seq[seq_start-2:predict_len], c_seq[seq_start-2:predict_len], ps_seq[seq_start-2:predict_len]): 30 | c_ids = q2c[q_id] 31 | for c_id in c_ids: 32 | num_exercised = history_count[c_id] 33 | if num_exercised == 0: 34 | predict_score_all.append(ps) 35 | ground_truth_all.append(correctness) 36 | history_count[c_id] += 1 37 | history_correct[c_id] += correctness 38 | return get_kt_metric(ground_truth_all, predict_score_all) 39 | 40 | def log_inference_results(self): 41 | for data_loader_name, inference_result in self.inference_results.items(): 42 | self.objects["logger"].info(f"evaluate result of {data_loader_name}") 43 | performance = inference_result 44 | self.objects["logger"].info( 45 | f" first trans performances are AUC: " 46 | f"{performance['AUC']:<9.5}, ACC: {performance['ACC']:<9.5}, " 47 | f"RMSE: {performance['RMSE']:<9.5}, MAE: {performance['MAE']:<9.5}, ") 48 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/evaluate/offline_single_goal_evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from edmine.utils.data_io import read_kt_file 5 | from edmine.utils.parse import str2bool 6 | from edmine.evaluator.LPREvaluator import LPREvaluator 7 | 8 | from config import config_lpr 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | # 初始化学生状态的数据集,有些KT模型需要用到一些副信息,所以添加kt_setting_name 14 | parser.add_argument("--kt_setting_name", type=str, default="pykt_setting") 15 | parser.add_argument("--setting_name", type=str, default="LPR_offline_setting") 16 | parser.add_argument("--test_file_name", type=str, default="assist2009_single_goal_test.txt") 17 | # KT模型配置 18 | parser.add_argument("--model_dir_name", type=str, 19 | default=r"qDKT@@pykt_setting@@assist2009_train@@seed_0@@2025-07-18@20-22-14") 20 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 21 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 22 | parser.add_argument("--dataset_name", type=str, default="assist2009", help="for Q table") 23 | # kt模型的batch大小 24 | parser.add_argument("--batch_size", type=int, default=64) 25 | # 智能体配置 26 | parser.add_argument("--agent_dir_name", type=str, 27 | help="随机推荐无需训练,仅用agent name表示参数:RandomRecQC-20,随机推荐20个知识点下的习题" 28 | "AStarRecConcept-4-5,表示使用A*算法搜索最短学习路径,最多学习4个知识点,每个知识点最多推荐5道习题", 29 | default=r"RandomRecQC-20") 30 | parser.add_argument("--agent_file_name", type=str, help="文件名", default="saved.ckt") 31 | # 掌握阈值和是否打印学习过程 32 | parser.add_argument("--master_threshold", type=float, default=0.6) 33 | parser.add_argument("--render", type=str2bool, default=False) 34 | # 随机种子 35 | parser.add_argument("--seed", type=int, default=0) 36 | parser.add_argument("--save_log", type=str2bool, default=False) 37 | parser.add_argument("--save_all_sample", type=str2bool, default=False) 38 | args = parser.parse_args() 39 | params = vars(args) 40 | 41 | global_params, global_objects = config_lpr(params) 42 | 43 | setting_name = params["setting_name"] 44 | test_file_name = params["test_file_name"] 45 | setting_dir = global_objects["file_manager"].get_setting_dir(setting_name) 46 | test_data = read_kt_file(os.path.join(setting_dir, test_file_name)) 47 | global_objects["data"] = { 48 | "test": test_data 49 | } 50 | 51 | evaluator = LPREvaluator(global_params, global_objects) 52 | evaluator.evaluate() 53 | -------------------------------------------------------------------------------- /examples/learning_path_recommendation/train/a2c.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from set_params import * 5 | from config.a2c import config_a2c 6 | 7 | from edmine.utils.parse import str2bool 8 | from edmine.utils.use_torch import set_seed 9 | from edmine.utils.log import get_now_time 10 | from edmine.utils.data_io import read_kt_file 11 | from edmine.trainer.LPROnlineDRLTrainer import LPROnlineDRLTrainer 12 | from edmine.model.learning_path_recommendation_agent.A2C import A2C 13 | 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_step_trainer_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 17 | add_help=False) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.0001) 21 | parser.add_argument("--weight_decay", type=float, default=0) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 折扣因子 24 | parser.add_argument("--gamma", type=float, default=0.9, help="discount factor") 25 | # 模型参数 26 | parser.add_argument("--max_question_attempt", type=int, default=20) 27 | parser.add_argument("--num_layer_action_model", type=int, default=2) 28 | parser.add_argument("--num_layer_state_model", type=int, default=2) 29 | parser.add_argument("--interval_step", type=int, default=100) 30 | # 其它 31 | parser.add_argument("--save_model", type=str2bool, default=False) 32 | parser.add_argument("--use_wandb", type=str2bool, default=False) 33 | 34 | args = parser.parse_args() 35 | params = vars(args) 36 | set_seed(params["seed"]) 37 | global_params, global_objects = config_a2c(params) 38 | 39 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 40 | setting_dir = global_objects["file_manager"].get_setting_dir(params["setting_name"]) 41 | global_objects["data"] = { 42 | "train": read_kt_file(os.path.join(setting_dir, params["train_file_name"])), 43 | "valid": read_kt_file(os.path.join(setting_dir, params["valid_file_name"])) 44 | } 45 | 46 | agent = A2C(global_params, global_objects) 47 | global_objects["agents"] = { 48 | "A2C": agent 49 | } 50 | 51 | # 每隔interval_step步都执行一次copy_state_model 52 | global_params["trainer_config"]["interval_execute_config"] = { 53 | params["interval_step"]: [agent.copy_state_model] 54 | } 55 | 56 | trainer = LPROnlineDRLTrainer(global_params, global_objects) 57 | trainer.train() 58 | -------------------------------------------------------------------------------- /examples/exercise_recommendation/evaluate/kg4ex.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from utils import get_model_info 4 | from config import config_dler 5 | 6 | from edmine.utils.data_io import read_kt_file, read_mlkc_data 7 | from edmine.utils.parse import str2bool 8 | from edmine.dataset.KG4EXDataset import * 9 | from edmine.evaluator.DLEREvaluator import DLEREvaluator 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser() 14 | # 加载模型参数配置 15 | parser.add_argument("--model_dir_name", type=str, help="", 16 | default="KG4EX@@ER_offline_setting@@assist2009_train_triples_dkt_0.2@@seed_0@@2025-03-13@20-19-52") 17 | parser.add_argument("--model_file_name", type=str, help="文件名", default="saved.ckt") 18 | parser.add_argument("--model_name_in_ckt", type=str, help="文件名", default="best_valid") 19 | # 测试配置 20 | parser.add_argument("--top_ns", type=str, default="[5,10,20]") 21 | parser.add_argument("--dataset_name", type=str, default="assist2009") 22 | parser.add_argument("--user_data_file_name", type=str, default="assist2009_user_data.txt") 23 | parser.add_argument("--test_mlkc_file_name", type=str, default="assist2009_dkt_mlkc_test.txt") 24 | parser.add_argument("--test_pkc_file_name", type=str, default="assist2009_pkc_test.txt") 25 | parser.add_argument("--test_efr_file_name", type=str, default="assist2009_efr_0.2_test.txt") 26 | parser.add_argument("--evaluate_batch_size", type=int, default=4) 27 | # 保存测试结果 28 | parser.add_argument("--save_log", type=str2bool, default=True) 29 | args = parser.parse_args() 30 | params = vars(args) 31 | 32 | global_params, global_objects = config_dler(params) 33 | 34 | setting_name = get_model_info(params["model_dir_name"])[1] 35 | setting_dir = global_objects["file_manager"].get_setting_dir(setting_name) 36 | kg4ex_dir = os.path.join(setting_dir, "KG4EX") 37 | users_data = read_kt_file(os.path.join(setting_dir, params['user_data_file_name'])) 38 | users_data_dict = {} 39 | for user_data in users_data: 40 | users_data_dict[user_data["user_id"]] = user_data 41 | global_objects["data_loaders"] = { 42 | # users_data_dict和mlkc是计算指标时需要的数据,所有推荐模型都要,第3个元素则是各个模型推理时需要的数据 43 | "test_loader": (users_data_dict, 44 | read_mlkc_data(os.path.join(kg4ex_dir, params["test_mlkc_file_name"])), 45 | (read_mlkc_data(os.path.join(kg4ex_dir, params["test_pkc_file_name"])), 46 | read_mlkc_data(os.path.join(kg4ex_dir, params["test_efr_file_name"])))) 47 | } 48 | evaluator = DLEREvaluator(global_params, global_objects) 49 | evaluator.evaluate() 50 | 51 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/hier_cdf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.hier_cdf import config_hier_cdf 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.HierCDF import HierCDF 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=64) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=128) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.0001) 25 | parser.add_argument("--weight_decay", type=float, default=0) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 参数 28 | parser.add_argument("--dim_hidden", type=int, default=16) 29 | parser.add_argument("--w_penalty_loss", type=float, default=0.001) 30 | # 其它 31 | parser.add_argument("--save_model", type=str2bool, default=False) 32 | parser.add_argument("--use_wandb", type=str2bool, default=False) 33 | 34 | args = parser.parse_args() 35 | params = vars(args) 36 | set_seed(params["seed"]) 37 | global_params, global_objects = config_hier_cdf(params) 38 | 39 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 40 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 41 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 42 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 43 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 44 | 45 | global_objects["data_loaders"] = { 46 | "train_loader": dataloader_train, 47 | "valid_loader": dataloader_valid 48 | } 49 | global_objects["models"] = { 50 | "HierCDF": HierCDF(global_params, global_objects).to(global_params["device"]) 51 | } 52 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 53 | trainer.train() 54 | -------------------------------------------------------------------------------- /edmine/model/cognitive_diagnosis_model/MIRT.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from edmine.model.module.EmbedLayer import EmbedLayer 6 | from edmine.model.cognitive_diagnosis_model.DLCognitiveDiagnosisModel import DLCognitiveDiagnosisModel 7 | from edmine.model.registry import register_model 8 | 9 | MODEL_NAME = "MIRT" 10 | 11 | 12 | @register_model(MODEL_NAME) 13 | class MIRT(nn.Module, DLCognitiveDiagnosisModel): 14 | model_name = MODEL_NAME 15 | 16 | def __init__(self, params, objects): 17 | super(MIRT, self).__init__() 18 | self.params = params 19 | self.objects = objects 20 | 21 | self.embed_layer = EmbedLayer(self.params["models_config"][MODEL_NAME]["embed_config"]) 22 | 23 | def forward(self, batch): 24 | user_id = batch["user_id"] 25 | question_id = batch["question_id"] 26 | 27 | model_config = self.params["models_config"][MODEL_NAME] 28 | a_range = model_config["a_range"] 29 | 30 | theta = torch.squeeze(self.embed_layer.get_emb("theta", user_id), dim=-1) 31 | a = torch.squeeze(self.embed_layer.get_emb("a", question_id), dim=-1) 32 | if a_range > 0: 33 | a = a_range * torch.sigmoid(a) 34 | else: 35 | a = F.softplus(a) 36 | b = torch.squeeze(self.embed_layer.get_emb("b", question_id), dim=-1) 37 | if torch.max(theta != theta) or torch.max(a != a) or torch.max(b != b): # pragma: no cover 38 | raise ValueError('ValueError:theta,a,b may contains nan! The a_range is too large.') 39 | 40 | return 1 / (1 + torch.exp(- torch.sum(torch.multiply(a, theta), dim=-1) + b)) 41 | 42 | def get_predict_loss(self, batch): 43 | predict_score = self.forward(batch) 44 | ground_truth = batch["correctness"] 45 | if self.params["device"] == "mps": 46 | loss = torch.nn.functional.binary_cross_entropy(predict_score.float(), ground_truth.float()) 47 | else: 48 | loss = torch.nn.functional.binary_cross_entropy(predict_score.double(), ground_truth.double()) 49 | num_sample = batch["correctness"].shape[0] 50 | return { 51 | "total_loss": loss, 52 | "losses_value": { 53 | "predict loss": { 54 | "value": loss.detach().cpu().item() * num_sample, 55 | "num_sample": num_sample 56 | }, 57 | }, 58 | "predict_score": predict_score 59 | } 60 | 61 | def get_predict_score(self, batch): 62 | predict_score = self.forward(batch) 63 | return { 64 | "predict_score": predict_score, 65 | } 66 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/dkt_forget_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.dkt_forget import config_dkt_forget 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.DKTForget import DKTForget 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_emb", type=int, default=64) 25 | parser.add_argument("--dim_latent", type=int, default=256) 26 | parser.add_argument("--rnn_type", type=str, default="gru") 27 | parser.add_argument("--num_rnn_layer", type=int, default=1) 28 | parser.add_argument("--dropout", type=float, default=0.3) 29 | parser.add_argument("--num_predict_layer", type=int, default=2) 30 | parser.add_argument("--dim_predict_mid", type=int, default=256) 31 | parser.add_argument("--activate_type", type=str, default="sigmoid") 32 | # 是否自动裁剪batch序列 33 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 34 | 35 | # 设置参数空间 36 | parameters_space = { 37 | "weight_decay": [0.0001, 0.00001, 0], 38 | "dim_emb": [64, 128], 39 | "dim_latent": [64, 256], 40 | "dropout": [0.1, 0.2, 0.3], 41 | } 42 | space = { 43 | param_name: hp.choice(param_name, param_space) 44 | for param_name, param_space in parameters_space.items() 45 | } 46 | num = 1 47 | for parameter_space in parameters_space.values(): 48 | num *= len(parameter_space) 49 | if num > 100: 50 | max_evals = 20 + int(num * 0.2) 51 | elif num > 50: 52 | max_evals = 15 + int(num * 0.2) 53 | elif num > 20: 54 | max_evals = 10 + int(num * 0.2) 55 | elif num > 10: 56 | max_evals = 5 + int(num * 0.2) 57 | else: 58 | max_evals = num 59 | current_best_performance = 0 60 | fmin(get_objective_func(parser, config_dkt_forget, "DKTForget", DKTForget), space, algo=tpe.suggest, max_evals=max_evals) 61 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/irt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.irt import config_irt 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.IRT import IRT 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=256) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=512) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.001) 25 | parser.add_argument("--weight_decay", type=float, default=0.0001) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 模型参数 28 | parser.add_argument("--value_range", type=float, default=1) 29 | parser.add_argument("--a_range", type=float, default=1) 30 | parser.add_argument("--D", type=float, default=1.702) 31 | # 其它 32 | parser.add_argument("--save_model", type=str2bool, default=False) 33 | parser.add_argument("--use_wandb", type=str2bool, default=False) 34 | 35 | args = parser.parse_args() 36 | params = vars(args) 37 | set_seed(params["seed"]) 38 | global_params, global_objects = config_irt(params) 39 | 40 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 41 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 42 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 43 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 44 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 45 | 46 | global_objects["data_loaders"] = { 47 | "train_loader": dataloader_train, 48 | "valid_loader": dataloader_valid 49 | } 50 | global_objects["models"] = { 51 | "IRT": IRT(global_params, global_objects).to(global_params["device"]) 52 | } 53 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 54 | trainer.train() 55 | -------------------------------------------------------------------------------- /edmine/config/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from edmine.config.data import config_q_table 5 | from edmine.config.model import config_general_dl_model 6 | from edmine.model.load_model import load_dl_model 7 | from edmine.utils.data_io import read_json 8 | 9 | 10 | def config_lpr_env(local_params, global_params, global_objects, model_dir): 11 | global_params["kt_model_config"] = { 12 | "seq_data_keys": ["question_seq", "correctness_seq", "mask_seq"], 13 | "id_data_keys": ["seq_len"] 14 | } 15 | config_general_dl_model(local_params, global_params) 16 | if local_params.get("dataset_name", False): 17 | config_q_table(local_params, global_params, global_objects) 18 | model_dir_name = local_params["model_dir_name"] 19 | model_name, setting_name, train_file_name = get_model_info(local_params["model_dir_name"]) 20 | if model_dir_name.startswith("DIMKT@@"): 21 | setting_name = local_params["kt_setting_name"] 22 | config_dimkt(local_params, global_params, global_objects, setting_name, train_file_name) 23 | model_dir = os.path.join(model_dir, model_dir_name) 24 | model = load_dl_model(global_params, global_objects, 25 | model_dir, local_params["model_file_name"], local_params["model_name_in_ckt"]) 26 | global_params["env_config"] = {"model_name": model_name} 27 | global_objects["models"] = {model_name: model} 28 | 29 | 30 | def get_model_info(model_dir_name): 31 | model_info = model_dir_name.split("@@") 32 | model_name, setting_name, train_file_name = model_info[0], model_info[1], model_info[2] 33 | return model_name, setting_name, train_file_name 34 | 35 | 36 | def config_dimkt(local_params, global_params, global_objects, setting_name, train_file_name): 37 | # 读取diff数据 38 | setting_dir = global_objects["file_manager"].get_setting_dir(setting_name) 39 | dimkt_dir = os.path.join(setting_dir, "DIMKT") 40 | diff_path = os.path.join(dimkt_dir, train_file_name + "_dimkt_diff.json") 41 | diff = read_json(diff_path) 42 | question_difficulty = {} 43 | concept_difficulty = {} 44 | for k, v in diff["question_difficulty"].items(): 45 | question_difficulty[int(k)] = v 46 | for k, v in diff["concept_difficulty"].items(): 47 | concept_difficulty[int(k)] = v 48 | global_objects["dimkt"] = { 49 | "question_difficulty": question_difficulty, 50 | "concept_difficulty": concept_difficulty 51 | } 52 | q2c_diff_table = [0] * local_params["num_concept"] 53 | for c_id, c_diff_id in concept_difficulty.items(): 54 | q2c_diff_table[c_id] = c_diff_id 55 | global_objects["dimkt"]["q2c_diff_table"] = torch.LongTensor(q2c_diff_table).to(global_params["device"]) 56 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/dkt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.dkt import config_dkt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.DKT import DKT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_concept", type=int, default=256) 25 | parser.add_argument("--dim_correctness", type=int, default=256) 26 | parser.add_argument("--dim_latent", type=int, default=256) 27 | parser.add_argument("--rnn_type", type=str, default="gru") 28 | parser.add_argument("--num_rnn_layer", type=int, default=1) 29 | parser.add_argument("--dropout", type=float, default=0.3) 30 | parser.add_argument("--num_predict_layer", type=int, default=2) 31 | parser.add_argument("--dim_predict_mid", type=int, default=256) 32 | parser.add_argument("--activate_type", type=str, default="sigmoid") 33 | # 是否自动裁剪batch序列 34 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 35 | 36 | # 设置参数空间 37 | parameters_space = { 38 | "weight_decay": [0.0001, 0.00001, 0], 39 | "dim_concept": [64, 128], 40 | "dim_correctness": [64, 128], 41 | "dim_latent": [64, 128, 256], 42 | "dropout": [0.1, 0.2, 0.3], 43 | } 44 | space = { 45 | param_name: hp.choice(param_name, param_space) 46 | for param_name, param_space in parameters_space.items() 47 | } 48 | num = 1 49 | for parameter_space in parameters_space.values(): 50 | num *= len(parameter_space) 51 | if num > 100: 52 | max_evals = 20 + int(num * 0.2) 53 | elif num > 50: 54 | max_evals = 15 + int(num * 0.2) 55 | elif num > 20: 56 | max_evals = 10 + int(num * 0.2) 57 | elif num > 10: 58 | max_evals = 5 + int(num * 0.2) 59 | else: 60 | max_evals = num 61 | current_best_performance = 0 62 | fmin(get_objective_func(parser, config_dkt, "DKT", DKT), space, algo=tpe.suggest, max_evals=max_evals) 63 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/config/abqr.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | import torch 5 | 6 | from edmine.config.data import config_q_table, config_sequential_kt_dataset 7 | from edmine.config.basic import config_logger 8 | from edmine.config.model import config_general_dl_model 9 | from edmine.config.train import config_epoch_trainer, config_optimizer 10 | from edmine.config.train import config_wandb 11 | from edmine.data.FileManager import FileManager 12 | from edmine.utils.log import get_now_time 13 | from edmine.utils.data_io import save_params 14 | 15 | current_file_name = inspect.getfile(inspect.currentframe()) 16 | current_dir = os.path.dirname(current_file_name) 17 | settings_path = os.path.join(current_dir, "../../../settings.json") 18 | with open(settings_path, "r") as f: 19 | settings = json.load(f) 20 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 21 | MODELS_DIR = settings["MODELS_DIR"] 22 | 23 | 24 | def config_abqr(local_params): 25 | model_name = "ABQR" 26 | 27 | global_params = {} 28 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 29 | config_logger(local_params, global_objects) 30 | config_general_dl_model(local_params, global_params) 31 | global_params["loss_config"] = { 32 | "gcl loss": 1 33 | } 34 | config_epoch_trainer(local_params, global_params, model_name) 35 | config_sequential_kt_dataset(local_params, global_params) 36 | config_optimizer(local_params, global_params, model_name) 37 | config_q_table(local_params, global_params, global_objects) 38 | 39 | setting_dir = global_objects["file_manager"].get_setting_dir(local_params["setting_name"]) 40 | abqr_dir = os.path.join(setting_dir, "ABQR") 41 | dataset_name = local_params["dataset_name"] 42 | graph_path = os.path.join(abqr_dir, f"abqr_graph_{dataset_name}.pt") 43 | global_objects["ABQR"] = { 44 | "gcn_adj": torch.load(graph_path).to(global_params["device"]) 45 | } 46 | 47 | # 模型参数 48 | global_params["models_config"] = { 49 | model_name: { 50 | "dim_emb": local_params["dim_emb"], 51 | "dropout": local_params["dropout"], 52 | } 53 | } 54 | 55 | if local_params["save_model"]: 56 | setting_name = local_params["setting_name"] 57 | train_file_name = local_params["train_file_name"] 58 | 59 | global_params["trainer_config"]["save_model_dir_name"] = ( 60 | f"{model_name}@@{setting_name}@@{train_file_name.replace('.txt', '')}@@seed_{local_params['seed']}@@" 61 | f"{get_now_time().replace(' ', '@').replace(':', '-')}") 62 | save_params(global_params, MODELS_DIR, global_objects["logger"]) 63 | config_wandb(local_params, global_params, model_name) 64 | 65 | return global_params, global_objects 66 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/dina.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.dina import config_dina 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.DINA import DINA 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=256) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=512) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.001) 25 | parser.add_argument("--weight_decay", type=float, default=0.0001) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 模型参数 28 | parser.add_argument("--max_slip", type=float, default=0.4) 29 | parser.add_argument("--max_guess", type=float, default=0.4) 30 | parser.add_argument("--max_step", type=int, default=1000) 31 | parser.add_argument("--use_ste", type=str2bool, default=True) 32 | # 其它 33 | parser.add_argument("--save_model", type=str2bool, default=False) 34 | parser.add_argument("--use_wandb", type=str2bool, default=False) 35 | 36 | args = parser.parse_args() 37 | params = vars(args) 38 | set_seed(params["seed"]) 39 | global_params, global_objects = config_dina(params) 40 | 41 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 42 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 43 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 44 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 45 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 46 | 47 | global_objects["data_loaders"] = { 48 | "train_loader": dataloader_train, 49 | "valid_loader": dataloader_valid 50 | } 51 | global_objects["models"] = { 52 | "DINA": DINA(global_params, global_objects).to(global_params["device"]) 53 | } 54 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 55 | trainer.train() 56 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/ncd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.ncd import config_ncd 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.NCD import NCD 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=1024) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=2048) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.001) 25 | parser.add_argument("--weight_decay", type=float, default=0.00001) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 模型参数 28 | parser.add_argument("--dropout", type=float, default=0.5) 29 | parser.add_argument("--num_predict_layer", type=int, default=2) 30 | parser.add_argument("--dim_predict_mid", type=int, default=64) 31 | parser.add_argument("--activate_type", type=str, default="sigmoid") 32 | # 其它 33 | parser.add_argument("--save_model", type=str2bool, default=False) 34 | parser.add_argument("--use_wandb", type=str2bool, default=False) 35 | 36 | args = parser.parse_args() 37 | params = vars(args) 38 | set_seed(params["seed"]) 39 | global_params, global_objects = config_ncd(params) 40 | 41 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 42 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 43 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 44 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 45 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 46 | 47 | global_objects["data_loaders"] = { 48 | "train_loader": dataloader_train, 49 | "valid_loader": dataloader_valid 50 | } 51 | global_objects["models"] = { 52 | "NCD": NCD(global_params, global_objects).to(global_params["device"]) 53 | } 54 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 55 | trainer.train() 56 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/config/ckt.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.config.data import config_q_table, config_sequential_kt_dataset 6 | from edmine.config.basic import config_logger 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.config.train import config_epoch_trainer, config_optimizer 9 | from edmine.config.train import config_wandb 10 | from edmine.data.FileManager import FileManager 11 | from edmine.utils.log import get_now_time 12 | from edmine.utils.data_io import save_params 13 | 14 | current_file_name = inspect.getfile(inspect.currentframe()) 15 | current_dir = os.path.dirname(current_file_name) 16 | settings_path = os.path.join(current_dir, "../../../settings.json") 17 | with open(settings_path, "r") as f: 18 | settings = json.load(f) 19 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 20 | MODELS_DIR = settings["MODELS_DIR"] 21 | 22 | 23 | def config_ckt(local_params): 24 | model_name = "CKT" 25 | 26 | global_params = {} 27 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 28 | config_logger(local_params, global_objects) 29 | config_general_dl_model(local_params, global_params) 30 | global_params["loss_config"] = {} 31 | config_epoch_trainer(local_params, global_params, model_name) 32 | config_sequential_kt_dataset(local_params, global_params) 33 | config_optimizer(local_params, global_params, model_name) 34 | config_q_table(local_params, global_params, global_objects) 35 | 36 | # 模型参数 37 | global_params["models_config"] = { 38 | model_name: { 39 | "embed_config": { 40 | "question": { 41 | "num_item": local_params["num_question"], 42 | "dim_item": local_params["dim_emb"] 43 | }, 44 | "correctness": { 45 | "init_method": "init_correctness_1", 46 | "dim_item": local_params["dim_emb"] 47 | } 48 | }, 49 | "num_layer": local_params["num_layer"], 50 | "kernel_size": local_params["kernel_size"], 51 | "dropout": local_params["dropout"] 52 | } 53 | } 54 | 55 | if local_params["save_model"]: 56 | setting_name = local_params["setting_name"] 57 | train_file_name = local_params["train_file_name"] 58 | 59 | global_params["trainer_config"]["save_model_dir_name"] = ( 60 | f"{model_name}@@{setting_name}@@{train_file_name.replace('.txt', '')}@@seed_{local_params['seed']}@@" 61 | f"{get_now_time().replace(' ', '@').replace(':', '-')}") 62 | save_params(global_params, MODELS_DIR, global_objects["logger"]) 63 | config_wandb(local_params, global_params, model_name) 64 | 65 | return global_params, global_objects 66 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/hyper_cd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from set_params import * 6 | from config.hyper_cd import config_hyper_cd 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.utils.use_torch import set_seed 10 | from edmine.utils.log import get_now_time 11 | from edmine.dataset.CognitiveDiagnosisDataset import BasicCognitiveDiagnosisDataset 12 | from edmine.model.cognitive_diagnosis_model.HyperCD import HyperCD 13 | from edmine.trainer.DLCognitiveDiagnosisTrainer import DLCognitiveDiagnosisTrainer 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_scheduler_args(), setup_clip_args(), setup_grad_acc_args()], 18 | add_help=False) 19 | # batch size 20 | parser.add_argument("--train_batch_size", type=int, default=256) 21 | parser.add_argument("--evaluate_batch_size", type=int, default=1024) 22 | # 优化器 23 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 24 | parser.add_argument("--learning_rate", type=float, default=0.0001) 25 | parser.add_argument("--weight_decay", type=float, default=0.0005) 26 | parser.add_argument("--momentum", type=float, default=0.9) 27 | # 模型参数 28 | parser.add_argument("--num_layer", type=int, default=3) 29 | parser.add_argument("--dim_feature", type=int, default=512) 30 | parser.add_argument("--dim_emb", type=int, default=16) 31 | parser.add_argument("--leaky", type=float, default=0.8) 32 | # 其它 33 | parser.add_argument("--save_model", type=str2bool, default=False) 34 | parser.add_argument("--use_wandb", type=str2bool, default=False) 35 | 36 | args = parser.parse_args() 37 | params = vars(args) 38 | set_seed(params["seed"]) 39 | global_params, global_objects = config_hyper_cd(params) 40 | 41 | global_objects["logger"].info(f"{get_now_time()} start loading and processing dataset") 42 | dataset_train = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["train"], global_objects) 43 | dataloader_train = DataLoader(dataset_train, batch_size=params["train_batch_size"], shuffle=True) 44 | dataset_valid = BasicCognitiveDiagnosisDataset(global_params["datasets_config"]["valid"], global_objects) 45 | dataloader_valid = DataLoader(dataset_valid, batch_size=params["train_batch_size"], shuffle=False) 46 | 47 | global_objects["data_loaders"] = { 48 | "train_loader": dataloader_train, 49 | "valid_loader": dataloader_valid 50 | } 51 | global_objects["models"] = { 52 | "HyperCD": HyperCD(global_params, global_objects).to(global_params["device"]) 53 | } 54 | trainer = DLCognitiveDiagnosisTrainer(global_params, global_objects) 55 | trainer.train() 56 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/qdckt/get_difficulty.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from collections import defaultdict 4 | 5 | from config import FILE_MANAGER_ROOT 6 | 7 | from edmine.utils.data_io import read_kt_file, write_json 8 | from edmine.data.FileManager import FileManager 9 | from edmine.utils.parse import q2c_from_q_table 10 | 11 | 12 | def parse_difficulty(kt_data_, num_question_diff, num_question): 13 | questions_frequency, questions_accuracy = defaultdict(int), defaultdict(int) 14 | n_sum = 0 15 | n_correct = 0 16 | for item_data in kt_data_: 17 | seq_len = item_data["seq_len"] 18 | question_seq = item_data["question_seq"] 19 | correctness_seq = item_data["correctness_seq"] 20 | n_sum += seq_len 21 | n_correct += sum(correctness_seq) 22 | for i in range(seq_len): 23 | q_id = question_seq[i] 24 | questions_frequency[q_id] += 1 25 | questions_accuracy[q_id] += correctness_seq[i] 26 | 27 | ave_acc = n_correct / n_sum 28 | for q_id in range(num_question): 29 | if questions_frequency[q_id] == 0: 30 | diff = 1 - ave_acc 31 | else: 32 | diff = 1 - (questions_accuracy[q_id] + 5 * ave_acc) / (questions_frequency[q_id] + 5) 33 | questions_accuracy[q_id] = int((num_question_diff - 1) * diff) 34 | return questions_accuracy 35 | 36 | 37 | if __name__ == "__main__": 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument("--setting_name", type=str, default="pykt_setting") 40 | parser.add_argument("--dataset_name", type=str, default="assist2009") 41 | parser.add_argument("--train_file_name", type=str, default="assist2009_train_fold_0.txt") 42 | parser.add_argument("--num_question_diff", type=int, default=100) 43 | args = parser.parse_args() 44 | params = vars(args) 45 | 46 | file_manager = FileManager(FILE_MANAGER_ROOT) 47 | setting_dir = file_manager.get_setting_dir(params["setting_name"]) 48 | dimkt_dir = os.path.join(setting_dir, "QDCKT") 49 | if not os.path.exists(dimkt_dir): 50 | os.mkdir(dimkt_dir) 51 | train_file_name = params["train_file_name"] 52 | save_path = os.path.join(dimkt_dir, train_file_name.replace(".txt", "_qdckt_diff.json")) 53 | if not os.path.exists(save_path): 54 | kt_data = read_kt_file(os.path.join(setting_dir, train_file_name)) 55 | q_table = file_manager.get_q_table(params["dataset_name"]) 56 | q2c = q2c_from_q_table(q_table) 57 | num_q = q_table.shape[0] 58 | 59 | difficulty_info = { 60 | "num_question_diff": params["num_question_diff"], 61 | "question_difficulty": parse_difficulty(kt_data, params["num_question_diff"], num_q) 62 | } 63 | write_json(difficulty_info, save_path) 64 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/dimkt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.dimkt import config_dimkt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.DIMKT import DIMKT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # scheduler配置 24 | parser.add_argument("--enable_scheduler", type=str2bool, default=True) 25 | parser.add_argument("--scheduler_type", type=str, default="StepLR", 26 | choices=("StepLR", "MultiStepLR", "CosineAnnealingLR")) 27 | parser.add_argument("--scheduler_step", type=int, default=10) 28 | parser.add_argument("--scheduler_milestones", type=str, default="[5, 10]") 29 | parser.add_argument("--scheduler_gamma", type=float, default=0.5) 30 | parser.add_argument("--scheduler_T_max", type=int, default=10) 31 | parser.add_argument("--scheduler_eta_min", type=float, default=0.0001) 32 | # 模型参数 33 | parser.add_argument("--dim_emb", type=int, default=128) 34 | parser.add_argument("--dropout", type=float, default=0.1) 35 | # 是否自动裁剪batch序列 36 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 37 | 38 | # 设置参数空间 39 | parameters_space = { 40 | "weight_decay": [0.0001, 0.00001, 0], 41 | "dim_emb": [64, 128, 256], 42 | "dropout": [0.1, 0.2, 0.3], 43 | } 44 | space = { 45 | param_name: hp.choice(param_name, param_space) 46 | for param_name, param_space in parameters_space.items() 47 | } 48 | num = 1 49 | for parameter_space in parameters_space.values(): 50 | num *= len(parameter_space) 51 | if num > 100: 52 | max_evals = 20 + int(num * 0.2) 53 | elif num > 50: 54 | max_evals = 15 + int(num * 0.2) 55 | elif num > 20: 56 | max_evals = 10 + int(num * 0.2) 57 | elif num > 10: 58 | max_evals = 5 + int(num * 0.2) 59 | else: 60 | max_evals = num 61 | current_best_performance = 0 62 | fmin(get_objective_func(parser, config_dimkt, "DIMKT", DIMKT), space, algo=tpe.suggest, max_evals=max_evals) 63 | -------------------------------------------------------------------------------- /edmine/env/learning_path_recommendation/KTEnv.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | 4 | 5 | class DLSequentialKTEnv(gym.Env): 6 | def __init__(self, params, objects): 7 | super().__init__() 8 | self.params = params 9 | self.objects = objects 10 | 11 | def _batch(self, history_data, next_rec_data=None): 12 | has_next_rec_data = int(next_rec_data is not None) 13 | if type(history_data) is dict: 14 | history_data = [history_data] 15 | if type(next_rec_data) is dict: 16 | next_rec_data = [next_rec_data] 17 | if next_rec_data is None: 18 | next_rec_data = [None] * len(history_data) 19 | if type(history_data) is list and type(next_rec_data) is list: 20 | max_seq_len = max(list(map(lambda x: len(x["correctness_seq"]), history_data))) + has_next_rec_data 21 | batch = {k: [] for k in history_data[0].keys()} 22 | for item_data, next_rec in zip(history_data, next_rec_data): 23 | seq_len = len(item_data["correctness_seq"]) + has_next_rec_data 24 | batch["seq_len"].append(seq_len) 25 | for k, v in item_data.items(): 26 | if type(v) is list: 27 | if next_rec is None: 28 | seq = v + [0] * (max_seq_len - seq_len) 29 | else: 30 | seq = v + [next_rec[k]] + [0] * (max_seq_len - seq_len) 31 | batch[k].append(seq) 32 | else: 33 | if k != "seq_len": 34 | batch[k].append(v) 35 | for k in batch.keys(): 36 | if k not in ["weight_seq", "hint_factor_seq", "attempt_factor_seq", "time_factor_seq", "correct_float"]: 37 | batch[k] = torch.tensor(batch[k]).long().to(self.params["device"]) 38 | else: 39 | batch[k] = torch.tensor(batch[k]).float().to(self.params["device"]) 40 | return batch 41 | else: 42 | raise NotImplemented() 43 | 44 | def step(self, data): 45 | history_data = data["history_data"] 46 | next_rec_data = data.get("next_rec_data", None) 47 | batch = self._batch(history_data, next_rec_data) 48 | model_name = self.params["env_config"]["model_name"] 49 | model = self.objects["models"][model_name] 50 | model.eval() 51 | with torch.no_grad(): 52 | state = model.get_knowledge_state(batch) 53 | batch_size = batch["correctness_seq"].shape[0] 54 | predict_score_batch = model.get_predict_score(batch)["predict_score_batch"] 55 | observation = predict_score_batch[torch.arange(batch_size), batch["seq_len"] - 2] 56 | return observation, state 57 | -------------------------------------------------------------------------------- /edmine/metric/knowledge_tracing.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import math 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from sklearn.metrics import roc_auc_score, mean_squared_error, accuracy_score, mean_absolute_error 7 | 8 | 9 | def root_mean_squared_error(y_true, y_pred): 10 | return mean_squared_error(y_true, y_pred) ** 0.5 11 | 12 | 13 | def get_kt_metric(y_true, y_score): 14 | assert len(y_true) == len(y_score), "len of y_true and len of y_score must be equal" 15 | with warnings.catch_warnings(): 16 | warnings.filterwarnings("ignore") 17 | if (len(y_true) == 0): 18 | AUC = -1. 19 | else: 20 | AUC = roc_auc_score(y_true, y_score) 21 | if math.isnan(AUC): 22 | AUC = -1. 23 | y_pred = [1 if p >= 0.5 else 0 for p in y_score] 24 | if (len(y_true) == 0): 25 | return { 26 | "AUC": -1., 27 | "ACC": -1., 28 | "MAE": -1., 29 | "RMSE": -1. 30 | } 31 | else: 32 | return { 33 | "AUC": AUC, 34 | "ACC": accuracy_score(y_true=y_true, y_pred=y_pred), 35 | "MAE": mean_absolute_error(y_true=y_true, y_pred=y_score), 36 | "RMSE": root_mean_squared_error(y_true=y_true, y_pred=y_score) 37 | } 38 | 39 | 40 | def core_metric(predict_score, ground_truth, question_ids, allow_replace=True): 41 | question_ids_ = np.unique(question_ids) 42 | predict_score_balanced = [] 43 | ground_truth_balanced = [] 44 | 45 | for q_id in tqdm(question_ids_, desc=f"calculate core metric, {'repeated' if allow_replace else 'non-repeated'}"): 46 | predict_score4q_id = predict_score[question_ids == q_id] 47 | ground_truth4q_id = ground_truth[question_ids == q_id] 48 | num_right = np.sum(ground_truth4q_id == 1) 49 | num_wrong = np.sum(ground_truth4q_id == 0) 50 | 51 | if num_right == 0 or num_wrong == 0: 52 | continue 53 | 54 | # 从label为1和0的测试数据中随机选相同数量(官方提供的代码上来看,是允许重复选取的) 55 | if allow_replace: 56 | num_balance = (num_wrong + num_right) // 2 57 | else: 58 | num_balance = min(num_wrong, num_right) 59 | index_right = np.random.choice(np.where(ground_truth4q_id == 1)[0], num_balance, replace=allow_replace) 60 | index_wrong = np.random.choice(np.where(ground_truth4q_id == 0)[0], num_balance, replace=allow_replace) 61 | index_balanced = list(index_right) + list(index_wrong) 62 | predict_score_balanced.append(predict_score4q_id[index_balanced]) 63 | ground_truth_balanced.append(ground_truth4q_id[index_balanced]) 64 | 65 | predict_score_balanced = np.concatenate(predict_score_balanced) 66 | ground_truth_balanced = np.concatenate(ground_truth_balanced) 67 | 68 | return get_kt_metric(ground_truth_balanced, predict_score_balanced) 69 | -------------------------------------------------------------------------------- /examples/cognitive_diagnosis/train/config/mirt.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import inspect 4 | 5 | from edmine.config.data import config_q_table, config_cd_dataset 6 | from edmine.config.basic import config_logger 7 | from edmine.config.model import config_general_dl_model 8 | from edmine.config.train import config_epoch_trainer, config_optimizer 9 | from edmine.config.train import config_wandb 10 | from edmine.data.FileManager import FileManager 11 | from edmine.utils.log import get_now_time 12 | from edmine.utils.data_io import save_params 13 | 14 | current_file_name = inspect.getfile(inspect.currentframe()) 15 | current_dir = os.path.dirname(current_file_name) 16 | settings_path = os.path.join(current_dir, "../../../settings.json") 17 | with open(settings_path, "r") as f: 18 | settings = json.load(f) 19 | FILE_MANAGER_ROOT = settings["FILE_MANAGER_ROOT"] 20 | MODELS_DIR = settings["MODELS_DIR"] 21 | 22 | 23 | def config_mirt(local_params): 24 | model_name = "MIRT" 25 | 26 | global_params = {} 27 | global_objects = {"file_manager": FileManager(FILE_MANAGER_ROOT)} 28 | config_logger(local_params, global_objects) 29 | config_general_dl_model(local_params, global_params) 30 | global_params["loss_config"] = {} 31 | config_epoch_trainer(local_params, global_params, model_name) 32 | config_cd_dataset(local_params, global_params, global_objects) 33 | config_optimizer(local_params, global_params, model_name) 34 | config_q_table(local_params, global_params, global_objects) 35 | 36 | # 模型参数 37 | global_params["models_config"] = { 38 | model_name: { 39 | "embed_config": { 40 | "theta": { 41 | "num_item": local_params["num_user"], 42 | "dim_item": local_params["num_concept"] 43 | }, 44 | "a": { 45 | "num_item": local_params["num_question"], 46 | "dim_item": local_params["num_concept"] 47 | }, 48 | "b": { 49 | "num_item": local_params["num_question"], 50 | "dim_item": 1 51 | }, 52 | }, 53 | "a_range": local_params["a_range"], 54 | } 55 | } 56 | 57 | if local_params["save_model"]: 58 | setting_name = local_params["setting_name"] 59 | train_file_name = local_params["train_file_name"] 60 | 61 | global_params["trainer_config"]["save_model_dir_name"] = ( 62 | f"{model_name}@@{setting_name}@@{train_file_name.replace('.txt', '')}@@seed_{local_params['seed']}@@" 63 | f"{get_now_time().replace(' ', '@').replace(':', '-')}") 64 | save_params(global_params, MODELS_DIR, global_objects["logger"]) 65 | config_wandb(local_params, global_params, model_name) 66 | 67 | return global_params, global_objects 68 | -------------------------------------------------------------------------------- /examples/knowledge_tracing/train/qdkt_search_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from hyperopt import fmin, tpe, hp 3 | 4 | from set_params import * 5 | from config.qdkt import config_qdkt 6 | from utils import get_objective_func 7 | 8 | from edmine.utils.parse import str2bool 9 | from edmine.model.sequential_kt_model.qDKT import qDKT 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser(parents=[setup_common_args(), setup_clip_args(), setup_grad_acc_args()], 14 | add_help=False) 15 | # batch size 16 | parser.add_argument("--train_batch_size", type=int, default=64) 17 | parser.add_argument("--evaluate_batch_size", type=int, default=256) 18 | # 优化器 19 | parser.add_argument("--optimizer_type", type=str, default="adam", choices=("adam", "sgd")) 20 | parser.add_argument("--learning_rate", type=float, default=0.001) 21 | parser.add_argument("--weight_decay", type=float, default=0.0001) 22 | parser.add_argument("--momentum", type=float, default=0.9) 23 | # 模型参数 24 | parser.add_argument("--dim_concept", type=int, default=64) 25 | parser.add_argument("--dim_question", type=int, default=64) 26 | parser.add_argument("--dim_correctness", type=int, default=64) 27 | parser.add_argument("--dim_latent", type=int, default=64) 28 | parser.add_argument("--rnn_type", type=str, default="gru") 29 | parser.add_argument("--num_rnn_layer", type=int, default=1) 30 | parser.add_argument("--dropout", type=float, default=0.3) 31 | parser.add_argument("--num_predict_layer", type=int, default=3) 32 | parser.add_argument("--dim_predict_mid", type=int, default=128) 33 | parser.add_argument("--activate_type", type=str, default="relu") 34 | # 是否自动裁剪batch序列 35 | parser.add_argument("--auto_clip_seq", type=str2bool, default=False) 36 | 37 | # 设置参数空间 38 | parameters_space = { 39 | "weight_decay": [0.0001, 0.00001, 0], 40 | "dim_question": [64, 128], 41 | "dim_concept": [64, 128], 42 | "dim_correctness": [64, 128], 43 | "dim_latent": [64, 128, 256], 44 | "dropout": [0.1, 0.2, 0.3], 45 | } 46 | space = { 47 | param_name: hp.choice(param_name, param_space) 48 | for param_name, param_space in parameters_space.items() 49 | } 50 | num = 1 51 | for parameter_space in parameters_space.values(): 52 | num *= len(parameter_space) 53 | if num > 100: 54 | max_evals = 20 + int(num * 0.2) 55 | elif num > 50: 56 | max_evals = 15 + int(num * 0.2) 57 | elif num > 20: 58 | max_evals = 10 + int(num * 0.2) 59 | elif num > 10: 60 | max_evals = 5 + int(num * 0.2) 61 | else: 62 | max_evals = num 63 | current_best_performance = 0 64 | fmin(get_objective_func(parser, config_qdkt, "qDKT", qDKT), space, algo=tpe.suggest, max_evals=max_evals) 65 | --------------------------------------------------------------------------------