├── .gitattributes ├── .idea ├── .gitignore ├── vcs.xml ├── misc.xml ├── inspectionProfiles │ ├── profiles_settings.xml │ └── Project_Default.xml ├── modules.xml └── kgc-rec.iml ├── __pycache__ ├── m_inits.cpython-36.pyc ├── m_layers.cpython-36.pyc ├── m_models.cpython-36.pyc ├── m_utils.cpython-36.pyc ├── m_utils.cpython-37.pyc └── metrics.cpython-36.pyc ├── requirements.txt ├── data ├── negative.p ├── user_course.p ├── user_video.p ├── adjacency_matrix.p ├── concept_course.p ├── concept_embedding.p ├── rate_matrix.p ├── user_action.p └── user_course_teacher.p ├── output └── m_rating_pred_bestmrr.p ├── .gitignore ├── m_inits.py ├── metrics.py ├── README.md ├── data_utils.py ├── m_utils.py ├── m_models.py ├── m_train.py ├── m_layers.py └── data_utils.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | *.p filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /__pycache__/m_inits.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/m_inits.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/m_layers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/m_layers.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/m_models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/m_models.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/m_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/m_utils.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/m_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/m_utils.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parklize/kgc-rec/HEAD/__pycache__/metrics.cpython-36.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.13.1 2 | tabulate==0.8.7 3 | numpy==1.19.5 4 | scipy==1.5.4 5 | scikit_learn==0.24.2 6 | -------------------------------------------------------------------------------- /data/negative.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ae89c7d5985f5c6b7c0764fdccc16a85799fce02ddfade16ecdf84a273ae46ac 3 | size 1604161 4 | -------------------------------------------------------------------------------- /data/user_course.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:84f6cf03774ffcea41723cb29d0970c273dc6a3db332431089c5dd2877b4b340 3 | size 1203159 4 | -------------------------------------------------------------------------------- /data/user_video.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1c900c7eb03aaf56173a880e56982113971fb50ee562fd13d971662d2e0c42cd 3 | size 44918174 4 | -------------------------------------------------------------------------------- /data/adjacency_matrix.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:62ea2a542ac1c645f6030d88967b610770b4afcde987cf3831d6b39f4f0299c7 3 | size 42179344 4 | -------------------------------------------------------------------------------- /data/concept_course.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4fc9ddd0464a05998434e9bc08dd0d44d9252d3ae06fed65498a0cb13ac3830 3 | size 100977759 4 | -------------------------------------------------------------------------------- /data/concept_embedding.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ef2464fc2aa665fb8173686ba3897f6d6608c020e9553cd4d3523fb590e90b4d 3 | size 8414959 4 | -------------------------------------------------------------------------------- /data/rate_matrix.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:71239cabde0172311a9ddcad88270702a6a2ad337417395f4cb28b6ccf1a1529 3 | size 337433639 4 | -------------------------------------------------------------------------------- /data/user_action.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e9c3b705d39987e44404fb0d6a6268cc52cdcb9c8b0e5bd1798430a6823aa2d0 3 | size 337433639 4 | -------------------------------------------------------------------------------- /data/user_course_teacher.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:31c52a96761dd7fab82390a34824b34e7b0e685a9e4b0fd180b6fc5176d31d1d 3 | size 2777084 4 | -------------------------------------------------------------------------------- /output/m_rating_pred_bestmrr.p: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4727ccf3fc2c79d3cbf1e9bb96e51ed35ab425a526768909ea787e836f1b54cb 3 | size 168716900 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build and Release Folders 2 | bin-debug/ 3 | bin-release/ 4 | [Oo]bj/ 5 | [Bb]in/ 6 | 7 | # Other files and folders 8 | .settings/ 9 | 10 | # Executables 11 | *.swf 12 | *.air 13 | *.ipa 14 | *.apk 15 | 16 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties` 17 | # should NOT be excluded as they contain compiler settings and other important 18 | # information for Eclipse / Flash Builder. 19 | -------------------------------------------------------------------------------- /.idea/kgc-rec.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /m_inits.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def init_variable(size, dim, name=None): 6 | std = np.sqrt(2/dim) 7 | return tf.Variable(tf.random_uniform([size, dim], -std, std), name=name) 8 | 9 | 10 | def glorot(shape, name=None): 11 | """Glorot & Bengio (AISTATS 2010) init.""" 12 | # init_range = np.sqrt(6.0/(shape[0]+shape[1])) 13 | # initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32) 14 | # initial = tf.random_uniform(shape, stddev=0.1) 15 | 16 | # return tf.Variable(tf.random_normal(shape, stddev=1.0)) 17 | return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1)) 18 | # return tf.Variable(tf.truncated_normal(shape=shape, stddev=np.sqrt(2/shape[0]))) 19 | 20 | 21 | def zeros(shape, name=None): 22 | """All zeros.""" 23 | initial = tf.zeros(shape, dtype=tf.float32) 24 | return tf.Variable(initial, name=name) -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def loss(rating,rate): 4 | err = tf.square(tf.subtract(rating,rate)) 5 | los = tf.reduce_sum(err) 6 | return los 7 | 8 | def auc(rate, negative, length): 9 | test = tf.gather_nd(rate, negative) 10 | topk = tf.nn.top_k(test, 100).indices 11 | where = tf.where(tf.equal(topk, tf.constant(99,shape=[length,100]))) 12 | auc = tf.split(where,num_or_size_splits=2,axis=1)[1] 13 | ran_auc = tf.Variable(tf.random_uniform(shape=[length, 1], minval=0, maxval=100, dtype=tf.int64)) 14 | auc = tf.reduce_mean(tf.cast(tf.less(auc - ran_auc, 0), dtype=tf.float32)) 15 | return auc 16 | 17 | 18 | def hr(rate, negative, length, k=5): 19 | test = tf.gather_nd(rate, negative) 20 | topk = tf.nn.top_k(test, k).indices 21 | isIn = tf.cast(tf.equal(topk, 99), dtype=tf.float32) 22 | row = tf.reduce_sum(isIn, axis=1) 23 | all = tf.reduce_sum(row) 24 | return all/length 25 | 26 | 27 | def mrr(rate, negative, length): 28 | test = tf.gather_nd(rate, negative) 29 | topk = tf.nn.top_k(test, 100).indices 30 | mrr_ = tf.reduce_sum(1 / tf.add(tf.split(value=tf.where(tf.equal(topk, tf.constant(99, shape=[length, 100]))), 31 | num_or_size_splits=2, axis=1)[1], 1)) 32 | mrr = mrr_/length 33 | return mrr 34 | 35 | 36 | def ndcg(rate, negative, length, k=5): 37 | test = tf.gather_nd(rate, negative) 38 | topk = tf.nn.top_k(test, k).indices 39 | n = tf.split(value=tf.where(tf.equal(topk, tf.constant(99, shape=[length, k]))), num_or_size_splits=2, axis=1)[1] 40 | ndcg = tf.reduce_sum(tf.log(2.0) / tf.log(tf.cast(tf.add(n, tf.constant(2, dtype=tf.int64)), 41 | dtype=tf.float32)))/length 42 | return ndcg 43 | 44 | 45 | def env(rate, negative, length): 46 | hrat1 = hr(rate,negative,length,k=1) 47 | hrat5 = hr(rate,negative,length,k=5) 48 | hrat10 = hr(rate,negative,length,k=10) 49 | hrat20 = hr(rate,negative,length,k=20) 50 | ndcg5 = ndcg(rate,negative,length,k=5) 51 | ndcg10 = ndcg(rate,negative,length,k=10) 52 | ndcg20 = ndcg(rate,negative,length,k=20) 53 | mr = mrr(rate,negative,length) 54 | au = auc(rate,negative,length) 55 | return hrat1, hrat5, hrat10, hrat20, ndcg5, ndcg10, ndcg20, mr, au 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Python](https://img.shields.io/badge/python-3.6-blue?logo=python&logoColor=gold) 2 | ![Tensorflow](https://img.shields.io/badge/tensorflow-1.13.1-blue?logo=tensorflow) 3 | ![GitHub](https://img.shields.io/github/stars/parklize/kgc-rec) 4 | ![GitHub](https://img.shields.io/github/forks/parklize/kgc-rec) 5 | 6 | 7 | # Recommending Knowledge Concepts on MOOC Platforms with Meta-path-based Representation Learning 8 | 9 | This repository contains the implementation for [EDM2021](https://educationaldatamining.org/edm2021/) paper - "Recommending Knowledge Concepts on MOOC Platforms with Meta-path-based Representation Learning". This work is inspired by and built on top of [ACKRec](https://github.com/JockWang/ACKRec). 10 | 11 | 12 | 13 | ## Abstract 14 | 15 | Massive Open Online Courses (MOOCs) which enable large- scale open online learning for massive users have been playing an important role in modern education for both students as well as professionals. To keep users' interest in MOOCs, recommender systems have been studied and deployed to recommend courses or videos that a user might be interested in. However, recommending courses and videos which usually cover a wide range of knowledge concepts does not consider user interests or learning needs regarding some specific concepts. This paper focuses on the task of recommending knowledge concepts of interest to users, which is challenging due to the sparsity of user-concept interactions given a large number of concepts. 16 | 17 | In this paper, we propose an approach by modeling information on MOOC platforms (e.g., teacher, video, course, and school) as a Heterogeneous Information Network (HIN) to learn user and concept representations using Graph Convolutional Networks based on user-user and concept-concept relationships via meta-paths in the HIN. We incorporate those learned user and concept representations into an extended matrix factorization frame- work to predict the preference of concepts for each user. Our experiments on a real-world MOOC dataset show that the proposed approach outperforms several baselines and state- of-the-art methods for predicting and recommending concepts of interest to users. 18 | 19 | 20 | 21 | ## Main environments 22 | 23 | **Laptop used for experiments**: Intel(R) Core(TM) i5-8365U processor laptop with 16GB RAM 24 | 25 | **Main packages:** Python3.6; Tensorflow1.13.1 26 | 27 | 28 | 29 | 30 | 31 | ## Folder structure 32 | 33 | ```python 34 | ├── data # the folder contains MOOCCube data (input) used for experiments 35 | ├── output # output folder 36 | requirements.txt # packages to be installed using pip install -r requirements.txt 37 | data_utils.py # for evaluation of predicted results using trained model 38 | data_utils.ipynb # for data preprocessing 39 | m_train.py # for training the model 40 | m_inits.py 41 | m_layers.py 42 | m_models.py 43 | m_utils.py 44 | metrics.py 45 | ``` 46 | 47 | 48 | 49 | ## Usage 50 | 51 | **Data**: You can download the big files in the data folder alternatively [here](https://drive.google.com/drive/folders/1B_syoPiqZiFROreCRFlJTqRmcqK5KOd-?usp=sharing); original dataset can be found on [moocdata](http://moocdata.cn/data/MOOCCube). 52 | 53 | Use the following command to train our method $MOOCIR_{a1}$ on the MOOCCube dataset. The output include ```m_rating_pred_bestmrr.p``` file for predicted item score matrix for all users. 54 | 55 | ```bash 56 | $ python m_train.py 57 | ``` 58 | 59 | After the above step, you can use ```data_utils.py``` to get the results regarding evaluation metrics on the test set. The default setting ```m_rating_pred_bestmrr.p``` from above for the variable ```pred_matrix_f``` in the ```data_utils.py```. 60 | 61 | ```bash 62 | $ python data_utils.py 63 | ``` 64 | 65 | 66 | 67 | ## Citation 68 | 69 | Guangyuan Piao, "Recommending Knowledge Concepts on MOOC Platforms with Meta-path-based Representation Learning", Educational Data Mining, Paris, France, 2021. [[PDF](https://parklize.github.io/publications/EDM2021.pdf)] [[BibTex](https://parklize.github.io/bib/EDM2021.bib)] 70 | 71 | -------------------------------------------------------------------------------- /data_utils.py: -------------------------------------------------------------------------------- 1 | import pickle as pkl 2 | import numpy as np 3 | from sklearn.metrics import ndcg_score 4 | np.random.seed(0) 5 | 6 | 7 | def evaluate(pred_rating_matrix, training_matrix, ground_truth_matrix, user_specific=False, for_csv=False): 8 | """ 9 | Prepare negative item indices to sample 99 items for each positive in testing 10 | For each user get testing items, 11 | For each testing item, paring with 99 negative samples 12 | Calculate relevant eval metric 13 | Average the evaluation score over all users as defined in the paper 14 | 15 | Parameters 16 | ----------------------- 17 | user_specific: mean of user specific average scores (scores for multiple items of a user is averaged) 18 | for_csv: only print csv files if True for dataframe analysis 19 | """ 20 | user_size = pred_rating_matrix.shape[0] 21 | item_size = pred_rating_matrix.shape[1] 22 | item_indices = list(range(item_size)) 23 | # print("user size:{}, item size:{}".format(user_size, item_size)) 24 | 25 | # Get negative item indices 26 | # Get testing items (ground truth - training) 27 | hr5 = 0. 28 | hr10 = 0. 29 | hr20 = 0. 30 | mrr = 0. 31 | ndcg5 = 0. 32 | ndcg10 = 0. 33 | ndcg20 = 0. 34 | valid_testing_size = 0 35 | valid_testing_pairs = list() # how many 100 (one pos, 99 neg) pairs tested 36 | for i in range(user_size): 37 | # per user 38 | hr5_ = 0. 39 | hr10_ = 0. 40 | hr20_ = 0. 41 | mrr_ = 0. 42 | ndcg5_ = 0. 43 | ndcg10_ = 0. 44 | ndcg20_ = 0. 45 | # if i % 50 == 0: 46 | # print("{}-th user".format(i)) 47 | # negative indices for the current user 48 | # neg_indices = list(set(item_indices)-set(ground_truth_matrix[i])) 49 | # neg_indices = [_ for _ in item_indices if _ not in np.where(ground_truth_matrix[i]>0)[0]] 50 | neg_indices = np.where(ground_truth_matrix[i] == 0)[0] 51 | 52 | # testing indices for the current user 53 | training_indices = np.where(training_matrix[i] > 0)[0] 54 | testing_indices = [x for x in np.where(ground_truth_matrix[i] > 0)[0] if x not in training_indices] 55 | # print("testing indices:{}".format(testing_indices)) 56 | if len(testing_indices) > 0: 57 | for ti in testing_indices: 58 | # get testing size*99 items from neg_indices 59 | selected_neg_indices = np.random.choice(np.array(neg_indices), 99, replace=False) 60 | # print(selected_neg_indices) 61 | indices = np.array(list(selected_neg_indices)+[ti]) 62 | # valid_testing_pairs.append(np.stack((np.array([i] * 100), indices), axis=1)) 63 | indices_sorted = np.argsort(pred_rating_matrix[i][indices]) 64 | # print(valid_testing_size, pred_rating_matrix[i][indices]) 65 | # print(indices_sorted) 66 | ground_truth_indices = [99] 67 | # ground_truth_indices = list(range(len(indices)-1, len(indices)-1-len(testing_indices), -1)) 68 | hr5_ex = 1. if len(intersection(ground_truth_indices, indices_sorted[-5:])) > 0 else 0. 69 | hr5_ += hr5_ex 70 | hr10_ex = 1. if len(intersection(ground_truth_indices, indices_sorted[-10:])) > 0 else 0. 71 | hr10_ += hr10_ex 72 | hr20_ex = 1. if len(intersection(ground_truth_indices, indices_sorted[-20:])) > 0 else 0. 73 | hr20_ += hr20_ex 74 | index = np.max([np.where(indices_sorted == ind) for ind in ground_truth_indices][0][0]) 75 | # sorted is reversed rank for mrr 76 | rank = len(indices_sorted) - index 77 | mrr_ex = (1./rank) 78 | mrr_ += mrr_ex 79 | # NDCG@K 80 | y_true = np.asarray([[0.]*len(selected_neg_indices)+[1]]) 81 | y_pred = np.asarray([pred_rating_matrix[i][indices]]) 82 | # print(y_true.shape, y_pred.shape) 83 | ndcg5_ex = ndcg_score(y_true, y_pred, k=5) 84 | ndcg5_ += ndcg5_ex 85 | ndcg10_ex = ndcg_score(y_true, y_pred, k=10) 86 | ndcg10_ += ndcg10_ex 87 | ndcg20_ex = ndcg_score(y_true, y_pred, k=20) 88 | ndcg20_ += ndcg20_ex 89 | 90 | valid_testing_size += 1 91 | 92 | print(i, len(training_indices), len(testing_indices), 93 | hr5_ex, hr10_ex, hr20_ex, ndcg5_ex, ndcg10_ex, ndcg20_ex, mrr_ex) 94 | 95 | if user_specific: 96 | num_examples = float(len(testing_indices)) 97 | hr5 += hr5_/num_examples 98 | hr10 += hr10_/num_examples 99 | hr20 += hr20_/num_examples 100 | ndcg5 += ndcg5_/num_examples 101 | ndcg10 += ndcg10_/num_examples 102 | ndcg20 += ndcg20_/num_examples 103 | mrr += mrr_/num_examples 104 | else: 105 | hr5 += hr5_ 106 | hr10 += hr10_ 107 | hr20 += hr20_ 108 | ndcg5 += ndcg5_ 109 | ndcg10 += ndcg10_ 110 | ndcg20 += ndcg20_ 111 | mrr += mrr_ 112 | 113 | # Store testing pairs 114 | # np.save("testing_pairs", np.array(valid_testing_pairs), allow_pickle=False) 115 | if not for_csv: 116 | if user_specific: 117 | print("valid testing size:{}".format(user_size)) 118 | print( 119 | "hr@5:{:7.4f} hr@10:{:7.4f} hr@20:{:7.4f} mrr:{:7.4f} ndcg@5:{:7.4f} ndcg@10:{:7.4f} ndcg@20:{:7.4f}".format( 120 | hr5 / user_size, 121 | hr10 / user_size, 122 | hr20 / user_size, 123 | mrr / user_size, 124 | ndcg5 / user_size, 125 | ndcg10 / user_size, 126 | ndcg20 / user_size 127 | )) 128 | else: 129 | print("valid testing size:{}".format(valid_testing_size)) 130 | print("hr@5:{:7.4f} hr@10:{:7.4f} hr@20:{:7.4f} mrr:{:7.4f} ndcg@5:{:7.4f} ndcg@10:{:7.4f} ndcg@20:{:7.4f}".format( 131 | hr5/valid_testing_size, 132 | hr10/valid_testing_size, 133 | hr20/valid_testing_size, 134 | mrr/valid_testing_size, 135 | ndcg5/valid_testing_size, 136 | ndcg10/valid_testing_size, 137 | ndcg20/valid_testing_size 138 | )) 139 | 140 | 141 | def intersection(lst1, lst2): 142 | lst3 = [value for value in lst1 if value in lst2] 143 | return lst3 144 | 145 | 146 | if __name__ == "__main__": 147 | # Ground truth 148 | training_matrix_f = './data/adjacency_matrix.p' 149 | ground_truth_matrix_f = './data/user_action.p' 150 | # Prediction file 151 | pred_matrix_f = './output/m_rating_pred_bestmrr.p' 152 | 153 | # ------------------------------------------- 154 | # Load matrix 155 | with open(training_matrix_f, 'rb') as f: 156 | training_matrix = pkl.load(f) 157 | if not isinstance(training_matrix, np.matrix): 158 | training_matrix = training_matrix.todense() 159 | else: 160 | training_matrix = np.array(training_matrix) 161 | # np.save("training_matrix", training_matrix, allow_pickle=False) 162 | # with open("./MOOCCube/data-for-kgcrec/negative.p", 'rb') as f: 163 | # negative = pkl.load(f) 164 | # np.save("negative", negative, allow_pickle=False) 165 | with open(ground_truth_matrix_f, 'rb') as f: 166 | ground_truth_matrix = pkl.load(f) 167 | if not isinstance(ground_truth_matrix, np.matrix): 168 | ground_truth_matrix = ground_truth_matrix.todense() 169 | else: 170 | ground_truth_matrix = np.array(ground_truth_matrix) 171 | with open(pred_matrix_f, 'rb') as f: 172 | pred_matrix = pkl.load(f) 173 | # print(pred_matrix) 174 | 175 | # Evaluation 176 | evaluate(pred_matrix, training_matrix, ground_truth_matrix, user_specific=False, for_csv=False) 177 | -------------------------------------------------------------------------------- /m_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle as pkl 3 | import random 4 | import scipy.sparse as sp 5 | import json 6 | import tabulate 7 | import time 8 | 9 | DATA_FOLDER = "./data" # my prepared dataset from MOOCCube 10 | 11 | 12 | def load_data(user, item): 13 | support_user = [] 14 | support_item = [] 15 | # rating matrix 16 | with open(DATA_FOLDER+'/rate_matrix.p', 'rb') as source: 17 | rating = pkl.load(source) 18 | if not isinstance(rating, np.matrix): 19 | rating = rating.todense() 20 | rating = np.log1p(rating) 21 | print("rating", rating) 22 | # concept w2v features (dim=100) 23 | with open(DATA_FOLDER+'/concept_embedding.p', 'rb') as source: 24 | concept_w2v = np.array(pkl.load(source)) 25 | print("concept_w2v", concept_w2v) 26 | # concept bow features (dim=616) 27 | # with open(DATA_FOLDER+'/concept_feature_bow.p', 'rb') as source: 28 | # concept_bow = pkl.load(source).todense() 29 | # concept = np.hstack((concept_w2v, concept_bow)) 30 | concept = concept_w2v 31 | # dim=100+616=716 32 | # features_item = preprocess_features(concept.astype(np.float32)) 33 | features_item = concept 34 | 35 | # user features (2136, 2844) total sum = 2136 - user-course relationship? 36 | # with open(DATA_FOLDER+'/UC.p', 'rb') as source: 37 | # features = pkl.load(source) 38 | # if not isinstance(features, np.matrix): 39 | # features = features.todense() 40 | # features_user = preprocess_features(features.astype(np.float32)) 41 | # I will change it to avg. of learned concept embeddings for each user 42 | features = np.matmul(rating, concept) 43 | features = features / rating.sum(axis=1) # avg 44 | features_user = preprocess_features(features.astype(np.float32)) 45 | print("features_user", features_user) 46 | 47 | # uku - user-concept-user relationship 48 | if 'uku' in user or 'kuk' in item: 49 | with open(DATA_FOLDER+'/adjacency_matrix.p', 'rb') as source: 50 | adjacency_matrix = pkl.load(source) 51 | if not isinstance(adjacency_matrix, np.matrix): 52 | adjacency_matrix = adjacency_matrix.todense() 53 | 54 | # to float will speed up a lot 55 | uk = adjacency_matrix.astype(np.float32) 56 | # stime = time.time() 57 | ku = uk.T 58 | # print("transposed uk in {}s".format(time.time()-stime)) 59 | if 'uku' in user: # user-user via concept 60 | stime = time.time() 61 | # uk_user = uk.dot(uk.T) + np.eye(uk.shape[0]) 62 | uk_user = uk.dot(ku) + np.eye(uk.shape[0]) 63 | uku = preprocess_adjacency(uk_user, name="uku") 64 | support_user.append(uku) 65 | print("added uku in {}s".format(time.time()-stime)) 66 | print(uku) 67 | if 'kuk' in item: # concept-concept via user 68 | stime = time.time() 69 | # ku_item = uk.T.dot(uk) + np.eye(uk.T.shape[0]) 70 | ku_item = ku.dot(uk) + np.eye(ku.shape[0]) 71 | kuk = preprocess_adjacency(ku_item, name="kuk") 72 | support_item.append(kuk) 73 | print("added kuk in {}s".format(time.time()-stime)) 74 | print(kuk) 75 | 76 | # ucu # user-user via course 77 | stime = time.time() 78 | if 'ucu' in user: 79 | with open(DATA_FOLDER+'/user_course.p', 'rb') as source: 80 | uc = pkl.load(source) 81 | if not isinstance(uc, np.matrix): 82 | uc = uc.todense() 83 | uc = uc.dot(uc.T) + np.eye(uc.shape[0]) 84 | ucu = preprocess_adjacency(uc, name="ucu") 85 | support_user.append(ucu) 86 | print("added ucu in {}s".format(time.time()-stime)) 87 | print(ucu) 88 | 89 | # uctcu # user-user via teacher 90 | stime = time.time() 91 | if 'uctcu' in user: 92 | with open(DATA_FOLDER+'/user_course_teacher.p', 'rb') as source: 93 | uct = pkl.load(source) 94 | if not isinstance(uct, np.matrix): 95 | uct = uct.todense() 96 | uct = uct.dot(uct.T) + np.eye(uct.shape[0]) 97 | uctcu = preprocess_adjacency(uct, name="uctcu") 98 | support_user.append(uctcu) 99 | print("added uctcu in {}s".format(time.time()-stime)) 100 | print(uctcu) 101 | 102 | # uvu # user-user via video 103 | stime = time.time() 104 | if 'uvu' in user: 105 | with open(DATA_FOLDER+'/user_video.p', 'rb') as source: 106 | uv = pkl.load(source) 107 | if not isinstance(uv, np.matrix): 108 | uv = uv.todense() 109 | uv = uv.dot(uv.T) + np.eye(uv.shape[0]) 110 | uvu = preprocess_adjacency(uv, name="uvu") 111 | support_user.append(uvu) 112 | print('added uvu in {}s'.format(time.time()-stime)) 113 | print(uvu) 114 | 115 | # kck # concept-course-concept relationship 116 | stime = time.time() 117 | if 'kck' in item: 118 | with open(DATA_FOLDER+'/concept_course.p', 'rb') as source: 119 | kc = pkl.load(source) 120 | if not isinstance(kc, np.matrix): 121 | kc = kc.todense() 122 | kc = kc.astype(np.float32) # to speed up 123 | kc = kc.dot(kc.T) + np.eye(kc.shape[0]) 124 | kck = preprocess_adjacency(kc, name="kck") 125 | support_item.append(kck) 126 | print("added kck in {}s".format(time.time()-stime)) 127 | print(kck) 128 | 129 | support_user = np.array(support_user) 130 | support_item = np.array(support_item) 131 | print(support_user.shape) 132 | print(support_item.shape) 133 | # assert len(item) == support_item.shape[0] 134 | # assert len(user) == support_user.shape[0] 135 | 136 | # todo dummy input when testing MF only 137 | # support_item = np.array([np.zeros((21037, 21037))]) 138 | # print(support_item.shape) 139 | 140 | # print("support_user", support_user) 141 | # print("support_item", support_item) 142 | 143 | # negative sample 144 | with open(DATA_FOLDER+'/negative.p', 'rb') as source: 145 | negative = pkl.load(source) 146 | # print("negative", negative) 147 | 148 | return rating, adjacency_matrix, features_item, features_user, support_user, support_item, negative 149 | 150 | 151 | # def get_user_emb(concepts, concept_emb): 152 | # 153 | 154 | def preprocess_features(features): 155 | """ Preprocess to make row sum as prob. i.e., sum=1 """ 156 | rowsum = np.array(features.sum(1)) 157 | r_inv = np.power(rowsum, -1).flatten() 158 | r_inv[np.isinf(r_inv)] = 0. 159 | r_mat_inv = np.diag(r_inv) 160 | features = r_mat_inv.dot(features) 161 | return features 162 | 163 | 164 | def preprocess_adj(adjacency, name=""): 165 | rowsum = np.array(adjacency.sum(1)) 166 | d_inv_sqrt = np.power(rowsum, -0.5).flatten() 167 | print("Degree matrix-0.5 power for {} - {}".format(name, d_inv_sqrt)) 168 | d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. 169 | d_mat_inv_sqrt = np.diag(d_inv_sqrt) 170 | return adjacency.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt)*1e2 171 | 172 | 173 | def preprocess_adjacency(A, name=""): 174 | """ 175 | https://towardsdatascience.com/how-to-do-deep-learning-on-graphs-with-graph-convolutional-networks-7d2250723780 176 | 2 different normalization where one is preprocess_adj - https://tkipf.github.io/graph-convolutional-networks/ 177 | """ 178 | D = np.array(np.sum(A, axis=0)).flatten() 179 | print("Degree matrix for {} - {}".format(name, D)) 180 | D = np.matrix(np.diag(D)) 181 | return D**-1 * A * 1e2 182 | 183 | 184 | def construct_feed_dict(placeholders, features_user, features_item, rating, biases_list_user, 185 | biases_list_item, negative): 186 | feed_dict = dict() 187 | feed_dict.update({placeholders['rating']: rating}) 188 | feed_dict.update({placeholders['features_user']: features_user}) 189 | feed_dict.update({placeholders['features_item']: features_item}) 190 | feed_dict.update({placeholders['support_user'][i]: biases_list_user[i] for i in range(len(biases_list_user))}) 191 | feed_dict.update({placeholders['support_item'][i]: biases_list_item[i] for i in range(len(biases_list_item))}) 192 | feed_dict.update({placeholders['negative']: negative}) 193 | return feed_dict 194 | 195 | 196 | def radom_negative_sample(user_action, item_size): 197 | negative_sample = [] 198 | for u in user_action: 199 | sample = [] 200 | i = 0 201 | while i < 99: 202 | t = random.randint(0, item_size-1) 203 | if t not in user_action[u]: 204 | sample.append([u, t]) 205 | i += 1 206 | sample.append([u, user_action[u][-1]]) 207 | negative_sample.append(sample) 208 | return np.array(negative_sample) 209 | 210 | 211 | def getRateMatrix(user_action, item_size): 212 | """ Get rate matrix where rate = # of clicks of a concept """ 213 | row = [] 214 | col = [] 215 | dat = [] 216 | for u in user_action: 217 | ls = set(user_action[u]) 218 | for k in ls: 219 | row.append(u) 220 | col.append(k) 221 | dat.append(user_action[u].count(k)) 222 | coo_matrix = sp.coo_matrix((dat, (row, col)), shape=(len(user_action), item_size)) 223 | with open('./data/rate_matrix_new.p', 'wb') as source: 224 | pkl.dump(coo_matrix.toarray(), source) 225 | 226 | 227 | def adj_to_bias(adj, sizes, nhood=1): 228 | nb_graphs = adj.shape[0] 229 | mt = np.empty(adj.shape) 230 | for g in range(nb_graphs): 231 | mt[g] = np.eye(adj.shape[1]) 232 | for _ in range(nhood): 233 | 234 | mt[g] = np.matmul(mt[g], (adj[g] + np.eye(adj.shape[1]))) 235 | for i in range(sizes[g]): 236 | for j in range(sizes[g]): 237 | if mt[g][i][j] > 0.0: 238 | mt[g][i][j] = 1.0 -------------------------------------------------------------------------------- /m_models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from m_layers import * 3 | from metrics import * 4 | 5 | flags = tf.app.flags 6 | FLAGS = flags.FLAGS 7 | 8 | 9 | class Model(object): 10 | def __init__(self, **kwargs): 11 | allowed_kwargs = {'name', 'logging'} 12 | for kwarg in kwargs.keys(): 13 | assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg 14 | name = kwargs.get('name') 15 | if not name: 16 | name = self.__class__.__name__.lower() 17 | self.name = name 18 | 19 | logging = kwargs.get('logging', False) 20 | self.logging = logging 21 | 22 | self.vars = {} 23 | self.placeholders = {} 24 | 25 | self.layers = [] 26 | self.activations = [] 27 | 28 | self.inputs = None 29 | self.outputs = None 30 | 31 | self.loss = 0 32 | self.test = None 33 | self.alphas = None 34 | 35 | def _build(self): 36 | raise NotImplementedError 37 | 38 | def build(self): 39 | """ Wrapper for _build() """ 40 | with tf.variable_scope(self.name): 41 | self._build() 42 | 43 | # Build sequential layer model 44 | self.activations.append(self.inputs) 45 | for i in range(len(self.layers)): 46 | print("Processing GCN-{}-{}th layer".format(self.name, i)) 47 | hidden = self.layers[i](self.activations[-1]) 48 | if i == 3: 49 | # self.test = self.layers[i].test 50 | self.test = hidden 51 | self.activations.append(hidden) 52 | self.outputs = self.activations[-1] 53 | self._loss() 54 | 55 | def _loss(self): 56 | raise NotImplementedError 57 | 58 | def save(self, sess=None): 59 | if not sess: 60 | raise AttributeError("TensorFlow session not provided.") 61 | saver = tf.train.Saver(self.vars) 62 | save_path = saver.save(sess, "./output/%s.ckpt" % self.name) 63 | print("Model saved in file: %s" % save_path) 64 | 65 | def load(self, sess=None): 66 | if not sess: 67 | raise AttributeError("TensorFlow session not provided.") 68 | saver = tf.train.Saver(self.vars) 69 | save_path = "./output/%s.ckpt" % self.name 70 | saver.restore(sess, save_path) 71 | print("Model restored from file: %s" % save_path) 72 | 73 | 74 | class GCN(Model): 75 | def __init__(self, placeholders, input_dim, tag, length, parentvars, **kwargs): 76 | """ 77 | Parameters 78 | ---------------- 79 | tag: "user" or "item" 80 | """ 81 | super(GCN, self).__init__(**kwargs) 82 | 83 | self.inputs = placeholders['features_'+tag] 84 | self.input_dim = input_dim 85 | self.output_dim = FLAGS.output_dim 86 | self.placeholders = placeholders 87 | self.tag = tag 88 | self.length = length 89 | 90 | # todo weaving 91 | self.parentvars = parentvars 92 | 93 | self.build() 94 | 95 | def _loss(self): 96 | # Weight decay loss 97 | for i in range(len(self.layers)): 98 | for var in self.layers[i].vars.values(): 99 | self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var) 100 | 101 | def _build(self): 102 | 103 | self.layers.append(GraphConvolution(input_dim=self.input_dim, 104 | # output_dim=FLAGS.hidden1, 105 | output_dim=self.output_dim, 106 | length=self.length, 107 | placeholders=self.placeholders, 108 | tag=self.tag, 109 | act=tf.nn.relu, 110 | dropout=True, 111 | sparse_inputs=False, 112 | logging=self.logging, 113 | name='first' + self.tag, 114 | featureless=True)) 115 | 116 | # self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1, 117 | # # output_dim=FLAGS.hidden1, 118 | # output_dim=self.output_dim, 119 | # length=self.length, 120 | # placeholders=self.placeholders, 121 | # tag=self.tag, 122 | # act=tf.nn.relu, 123 | # dropout=True, 124 | # sparse_inputs=False, 125 | # logging=self.logging, 126 | # # name='first'+self.tag, 127 | # featureless=False)) 128 | # 129 | # self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1, 130 | # output_dim=FLAGS.hidden2, 131 | # length=self.length, 132 | # placeholders=self.placeholders, 133 | # tag=self.tag, 134 | # act=tf.nn.relu, 135 | # dropout=True, 136 | # logging=self.logging)) 137 | # 138 | # self.layers.append(GraphConvolution(input_dim=FLAGS.hidden2, 139 | # output_dim=self.output_dim, 140 | # length=self.length, 141 | # placeholders=self.placeholders, 142 | # tag=self.tag, 143 | # act=tf.nn.relu, 144 | # dropout=True, 145 | # logging=self.logging)) 146 | 147 | self.layers.append(SimpleAttLayer(attention_size=32, 148 | tag=self.tag, 149 | parentvars=self.parentvars, 150 | time_major=False)) 151 | 152 | 153 | class MOOCUM(): 154 | def __init__(self, placeholders, input_dim_user, input_dim_item, user_dim, item_dim, learning_rate): 155 | """ 156 | Parameters 157 | ----------------- 158 | input_dim_user: user feature dim 159 | input_dim_item: item feature dim 160 | user dim: size of users 161 | item dim: size of items 162 | """ 163 | self.name = "MOOCUM" 164 | self.placeholders = placeholders 165 | self.negative = placeholders['negative'] 166 | self.length = user_dim 167 | self.user_dim = user_dim 168 | self.item_dim = item_dim 169 | 170 | self.vars = {} 171 | self.userModel = GCN(placeholders=self.placeholders, input_dim=input_dim_user, tag='user', length=user_dim, 172 | parentvars=self.vars) 173 | self.itemModel = GCN(placeholders=self.placeholders, input_dim=input_dim_item, tag='item', length=item_dim, 174 | parentvars=self.vars) 175 | self.user = self.userModel.outputs 176 | self.item = self.itemModel.outputs 177 | self.layers = [] 178 | self.rate_matrix = None 179 | self.xuij = None 180 | self.result = None 181 | self.l2_loss = 0 182 | self.los = 0 183 | self.hrat1 = 0 184 | self.hrat5 = 0 185 | self.hrat10 = 0 186 | self.hrat20 = 0 187 | self.ndcg5 = 0 188 | self.ndcg10 = 0 189 | self.ndcg20 = 0 190 | self.mrr = 0 191 | self.err = None 192 | self.auc = 0 193 | # self.mse = 0 194 | self.optimizer = tf.train.AdamOptimizer(learning_rate) 195 | self.train_op = None 196 | 197 | self.build() 198 | 199 | def build(self): 200 | self.layers.append(RateLayer(self.placeholders, 201 | self.user, self.item, 202 | user_dim=self.user_dim, 203 | item_dim=self.item_dim, 204 | parentvars=self.vars 205 | )) 206 | output = None 207 | for i in range(len(self.layers)): 208 | print("Using {} layer{}".format(self.name, i)) 209 | output = self.layers[i]() 210 | self.rate_matrix, self.xuij, self.rate1, self.rate2, self.bias = output 211 | self.loss() 212 | self.train() 213 | self.env() 214 | 215 | def train(self): 216 | self.train_op = self.optimizer.minimize(self.los) 217 | 218 | def env(self): 219 | self.result = tf.nn.top_k(self.rate_matrix, 10).indices 220 | self.hrat() 221 | self.ndcg() 222 | self.mr() 223 | self.au() 224 | # self.ms() 225 | 226 | def loss(self): 227 | rating_matrix = self.placeholders['rating'] 228 | # regularization in the paper 229 | self.l2_loss += self.userModel.loss # l2 loss from Using 230 | self.l2_loss += self.itemModel.loss # l2 loss from itemModel 231 | for i in range(len(self.layers)): 232 | for var in self.layers[i].vars.values(): 233 | self.l2_loss += FLAGS.weight_decay * tf.nn.l2_loss(var) 234 | # add self vars 235 | for var in self.vars.values(): 236 | self.l2_loss += FLAGS.weight_decay * tf.nn.l2_loss(var) 237 | 238 | # Debug Inf, NAN values # Clip to get rid of Inf, NAN 239 | sigmoid_val = tf.sigmoid(self.xuij) 240 | # print("sigmoid val max", tf.reduce_max(sigmoid_val), "min", tf.reduce_min(sigmoid_val)) 241 | self.los = -tf.reduce_mean(tf.log(tf.clip_by_value(sigmoid_val, 1e-10, 1.0))) + self.l2_loss 242 | 243 | def hrat(self): 244 | self.hrat1 = hr(self.rate_matrix, self.negative, self.length, k=1) 245 | self.hrat5 = hr(self.rate_matrix, self.negative, self.length, k=5) 246 | self.hrat10 = hr(self.rate_matrix, self.negative, self.length, k=10) 247 | self.hrat20 = hr(self.rate_matrix, self.negative, self.length, k=20) 248 | 249 | def ndcg(self): 250 | self.ndcg5 = ndcg(self.rate_matrix, self.negative, self.length, k=5) 251 | self.ndcg10 = ndcg(self.rate_matrix, self.negative, self.length, k=10) 252 | self.ndcg20 = ndcg(self.rate_matrix, self.negative, self.length, k=20) 253 | 254 | def mr(self): 255 | self.mrr = mrr(self.rate_matrix, self.negative, self.length) 256 | 257 | def au(self): 258 | self.auc = auc(self.rate_matrix, self.negative, self.length) 259 | 260 | def save(self, sess=None, info=""): 261 | if not sess: 262 | raise AttributeError("TensorFlow session not provided.") 263 | saver = tf.train.Saver() 264 | save_path = saver.save(sess, "./output/{}-{}.ckpt".format(self.name, info)) 265 | print("Model saved in file: %s" % save_path) 266 | 267 | def load(self, sess=None): 268 | if not sess: 269 | raise AttributeError("TensorFlow session not provided.") 270 | # saver = tf.train.Saver(self.vars) 271 | saver = tf.train.Saver() 272 | save_path = "./output/Mv.3/%s-besthr5.ckpt" % self.name 273 | saver.restore(sess, save_path) 274 | print("Model restored from file: %s" % save_path) -------------------------------------------------------------------------------- /m_train.py: -------------------------------------------------------------------------------- 1 | from m_utils import * 2 | import tensorflow as tf 3 | print(tf.__version__) 4 | from m_models import * 5 | import time 6 | import numpy as np 7 | from scipy import sparse 8 | 9 | # ------------------------------------------ 10 | # Set random seed 11 | seed = 123 12 | np.random.seed(seed) 13 | tf.set_random_seed(seed) 14 | 15 | # ------------------------------------------ 16 | # Set params 17 | learning_rate = .01 18 | decay_rate = 1 19 | global_steps = 500 20 | decay_steps = 100 21 | samples = 1024 22 | batches = 30 # 856067/1024=837 23 | print("learning rate:", learning_rate) 24 | print("global steps:", global_steps) 25 | print("samples:", samples) 26 | print("batches", batches) 27 | 28 | # ------------------------------------------ 29 | # Settings 30 | flags = tf.app.flags 31 | FLAGS = flags.FLAGS 32 | flags.DEFINE_integer('hidden1', 256, 'Number of units in hidden layer 1.') 33 | flags.DEFINE_integer('hidden2', 128, 'Number of units in hidden layer 2.') 34 | flags.DEFINE_float('dropout', 0, 'Dropout rate (1 - keep probability).') # default .5 35 | flags.DEFINE_float('weight_decay', 1e-8, 'Weight for L2 loss on embedding matrix.') # default 5e-4 36 | flags.DEFINE_integer('output_dim', 100, 'Output_dim of user final embedding.') # default 64, in paper it seems 100 37 | flags.DEFINE_integer('latent_dim', 30, 'Latent_dim of user&item.') 38 | 39 | # ------------------------------------------ 40 | # Load data 41 | support_string_user = ['ucu', 'uvu', 'uctcu', 'uku'] 42 | support_string_item = ['kuk', 'kck'] 43 | rating, adjacency_matrix, features_item, features_user, support_user, support_item, negative = \ 44 | load_data(user=support_string_user, item=support_string_item) 45 | 46 | # User size item size 47 | user_dim = rating.shape[0] 48 | item_dim = rating.shape[1] 49 | 50 | # Get non-zero indicies 51 | straining_matrix = sparse.csr_matrix(rating) 52 | uids, iids = straining_matrix.nonzero() 53 | print("uids size", len(uids)) 54 | 55 | # user_support 56 | support_num_user = len(support_string_user) 57 | # item_support 58 | support_num_item = len(support_string_item) 59 | # Define placeholders 60 | placeholders = { 61 | 'rating': tf.placeholder(dtype=tf.float32, shape=rating.shape, name="rating"), 62 | 'features_user': tf.placeholder(dtype=tf.float32, shape=features_user.shape, name='features_user'), 63 | 'features_item': tf.placeholder(dtype=tf.float32, shape=features_item.shape, name="features_item"), 64 | 'support_user': [tf.placeholder(dtype=tf.float32, name='support'+str(_)) for _ in range(support_num_user)], 65 | 'support_item': [tf.placeholder(dtype=tf.float32, name='support'+str(_)) for _ in range(support_num_item)], 66 | 'dropout': tf.placeholder_with_default(0., shape=(), name='dropout'), 67 | 'negative': tf.placeholder(dtype=tf.int32, shape=negative.shape, name='negative'), 68 | 'batch_u': tf.placeholder(tf.int32, shape=(None, 1), name="user"), 69 | 'batch_i': tf.placeholder(tf.int32, shape=(None, 1), name="item_pos"), 70 | 'batch_j': tf.placeholder(tf.int32, shape=(None, 1), name="item_neg") 71 | } 72 | global_ = tf.Variable(tf.constant(0)) 73 | learning = tf.train.exponential_decay(learning_rate, global_, decay_steps, decay_rate, staircase=False) 74 | 75 | # Create Model 76 | model = MOOCUM(placeholders, 77 | input_dim_user=features_user.shape[1], 78 | input_dim_item=features_item.shape[1], 79 | user_dim=user_dim, 80 | item_dim=item_dim, 81 | learning_rate=learning) 82 | 83 | # Initialize session 84 | sess = tf.Session() 85 | 86 | # Init variables 87 | sess.run(tf.global_variables_initializer()) 88 | 89 | # Load from previous session 90 | # model.load(sess=sess) 91 | 92 | # ------------------------------------------ 93 | # Train model 94 | start_time = time.time() 95 | epoch = 0 96 | mrr_best = 0 97 | hrat5_best = 0 98 | hrat10_best = 0 99 | hrat20_best = 0 100 | ndcgat5_best = 0 101 | ndcgat10_best = 0 102 | ndcgat20_best = 0 103 | 104 | # Construct feed dictionary 105 | feed_dict = construct_feed_dict(placeholders, features_user, features_item, rating, support_user, 106 | support_item, negative) 107 | 108 | total_params = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) 109 | print("Total params of current model: {}".format(total_params)) 110 | 111 | while epoch < global_steps: 112 | for _ in range(batches): 113 | features = np.matmul(adjacency_matrix, features_item) 114 | features_user = features / adjacency_matrix.sum(axis=1) # avg of emb 115 | 116 | # Prepare batches 117 | # First we sample [samples] uniform indices 118 | idx = np.random.randint(low=0, high=len(uids), size=samples) 119 | # print("random sample indices:", idx[:10]) 120 | # User batch matching idx 121 | batch_u = uids[idx].reshape(-1, 1) 122 | # Pos item 123 | batch_i = iids[idx].reshape(-1, 1) 124 | # Neg item 125 | batch_j = np.random.randint( 126 | low=0, 127 | high=item_dim, 128 | size=(samples, 1), 129 | dtype="int32" 130 | ) 131 | # To feed, need to change dtype 132 | batch_u = batch_u.astype("float32") 133 | batch_i = batch_i.astype("float32") 134 | batch_j = batch_j.astype("float32") 135 | 136 | # Update feed_dict 137 | feed_dict.update({placeholders['features_user']: features_user}) 138 | feed_dict.update({placeholders['features_item']: features_item}) 139 | feed_dict.update({placeholders['batch_u']: batch_u}) 140 | feed_dict.update({placeholders['batch_i']: batch_i}) 141 | feed_dict.update({placeholders['batch_j']: batch_j}) 142 | feed_dict.update({placeholders['dropout']: FLAGS.dropout}) 143 | feed_dict.update({global_: epoch}) 144 | 145 | # Train with batch 146 | _, los, l2_los, alpha1, alpha2, HR1, HR5, HR10, HR20, NDCG5, NDCG10, NDCG20, MRR, AUC, user, item, result, \ 147 | rate_matrix, rate1, rate2, bias, alphas_user, alphas_item = \ 148 | sess.run([model.train_op, model.los, model.l2_loss, 149 | model.layers[-1].vars['alpha1'], model.layers[-1].vars['alpha2'], model.hrat1, 150 | model.hrat5, model.hrat10, model.hrat20, 151 | model.ndcg5, model.ndcg10, model.ndcg20, 152 | model.mrr, model.auc, 153 | model.user, model.item, 154 | model.result, model.rate_matrix, 155 | model.rate1, model.rate2, model.bias, 156 | # model.layers[-1].vars['alphas_user'], model.layers[-1].vars['alphas_item']], # for mfAtt 157 | model.userModel.layers[-1].vars['alphas_user'], model.itemModel.layers[-1].vars['alphas_item']], 158 | feed_dict) 159 | 160 | if epoch % 1 == 0: 161 | aLine = time.ctime() + \ 162 | " {:10.2f}s passed".format(time.time()-start_time) + \ 163 | " Train" + str(epoch) + \ 164 | " Total-Loss:{:8.6f}".format(los) + \ 165 | " L2-Loss:{:8.6f}".format(l2_los) + \ 166 | " Model-Loss:{:8.6f}".format(los-l2_los) + \ 167 | " HR@1:{:8.6f}".format(HR1) + \ 168 | " HR@5:{:8.6f}".format(HR5) + \ 169 | " HR@10:{:8.6f}".format(HR10) + \ 170 | " HR@20:{:8.6f}".format(HR20) + \ 171 | " nDCG@5:{:8.6f}".format(NDCG5) + \ 172 | " nDCG@10:{:8.6f}".format(NDCG10) + \ 173 | " nDCG@20:{:8.6f}".format(NDCG20) + \ 174 | " MRR:{:8.6f}".format(MRR) + \ 175 | " AUC:{:8.6f}".format(AUC) + \ 176 | " Alpha1:{:8.5f}".format(alpha1) + \ 177 | " Alpha2:{:8.5f}".format(alpha2) + \ 178 | " rate:{}".format(rate_matrix[0][:5]) + \ 179 | " bias:{}".format(bias[0][:5]) + \ 180 | " rate1:{}".format(rate1[0][:5]) + \ 181 | " rate2:{}".format(rate2[0][:5]) + \ 182 | " alphas_user:{}".format(alphas_user[:, :5]) + \ 183 | " alphas_item:{}".format(alphas_item[:, :5]) 184 | print(aLine) 185 | epoch += 1 186 | 187 | # Save rating prediction with best performance 188 | if epoch > 100: 189 | # if HR5 > hrat5_best: 190 | # print("Best HR5-{} updated at epoch:{}".format(HR5, epoch)) 191 | # hrat5_best = HR5 192 | # with open('./output/m_rating_pred_besthr5.p', 'wb') as f: 193 | # pkl.dump(rate_matrix, f) 194 | # # Save 195 | # model.save(sess, info="besthr5") 196 | # np.save('./output/alphas_user_besthr5', alphas_user) 197 | # np.save('./output/alphas_item_besthr5', alphas_user) 198 | # if HR10 > hrat10_best: 199 | # print("Best HR10-{} updated at epoch:{}".format(HR10, epoch)) 200 | # hrat10_best = HR10 201 | # with open('./output/m_rating_pred_besthr10.p', 'wb') as f: 202 | # pkl.dump(rate_matrix, f) 203 | # # Save 204 | # model.save(sess, info="besthr10") 205 | # np.save('./output/alphas_user_besthr10', alphas_user) 206 | # np.save('./output/alphas_item_besthr10', alphas_user) 207 | # if HR20 > hrat20_best: 208 | # print("Best HR20-{} updated at epoch:{}".format(HR20, epoch)) 209 | # hrat20_best = HR20 210 | # with open('./output/m_rating_pred_besthr20.p', 'wb') as f: 211 | # pkl.dump(rate_matrix, f) 212 | # # Save 213 | # model.save(sess, info="besthr20") 214 | # np.save('./output/alphas_user_besthr20', alphas_user) 215 | # np.save('./output/alphas_item_besthr20', alphas_user) 216 | # if NDCG5 > ndcgat5_best: 217 | # print("Best NDCG5-{} updated at epoch:{}".format(NDCG5, epoch)) 218 | # ndcgat5_best = NDCG5 219 | # with open('./output/m_rating_pred_bestndcg5.p', 'wb') as f: 220 | # pkl.dump(rate_matrix, f) 221 | # # Save 222 | # model.save(sess, info="bestndcg5") 223 | # np.save('./output/alphas_user_bestndcg5', alphas_user) 224 | # np.save('./output/alphas_item_bestndcg5', alphas_user) 225 | # if NDCG10 > ndcgat10_best: 226 | # print("Best NDCG10-{} updated at epoch:{}".format(NDCG10, epoch)) 227 | # ndcgat10_best = NDCG10 228 | # with open('./output/m_rating_pred_bestndcg10.p', 'wb') as f: 229 | # pkl.dump(rate_matrix, f) 230 | # # Save 231 | # model.save(sess, info="bestndcg10") 232 | # np.save('./output/alphas_user_bestndcg10', alphas_user) 233 | # np.save('./output/alphas_item_bestndcg10', alphas_user) 234 | # if NDCG20 > ndcgat20_best: 235 | # print("Best NDCG20-{} updated at epoch:{}".format(NDCG20, epoch)) 236 | # ndcgat20_best = NDCG20 237 | # with open('./output/m_rating_pred_bestndcg20.p', 'wb') as f: 238 | # pkl.dump(rate_matrix, f) 239 | # # Save 240 | # model.save(sess, info="bestndcg20") 241 | # np.save('./output/alphas_user_bestndcg20', alphas_user) 242 | # np.save('./output/alphas_item_bestndcg20', alphas_user) 243 | if MRR > mrr_best: 244 | print("Best MRR-{} updated at epoch:{}".format(MRR, epoch)) 245 | mrr_best = MRR 246 | with open('./output/m_rating_pred_bestmrr.p', 'wb') as f: 247 | pkl.dump(rate_matrix, f) 248 | # Save 249 | model.save(sess, info="bestmrr") 250 | np.save('./output/alphas_user_mrr', alphas_user) 251 | np.save('./output/alphas_item_mrr', alphas_user) 252 | 253 | # Save rating prediction every 50 epoch 254 | # if (epoch) % 50 == 0: 255 | # with open('./output/m_rating_pred_ep{}.p'.format(epoch-1), 'wb') as f: 256 | # pkl.dump(rate_matrix, f) 257 | # # Save 258 | # model.save(sess, info="ep{}".format(epoch)) -------------------------------------------------------------------------------- /m_layers.py: -------------------------------------------------------------------------------- 1 | from m_inits import * 2 | # import tensorflow as tf 3 | import tensorflow.compat.v1 as tf 4 | import numpy as np 5 | 6 | flags = tf.app.flags 7 | FLAGS = flags.FLAGS 8 | 9 | # global unique layer ID dictionary for layer name assignment 10 | _LAYER_UIDS = {} 11 | 12 | 13 | def get_layer_uid(layer_name=''): 14 | """Helper function, assigns unique layer IDs.""" 15 | if layer_name not in _LAYER_UIDS: 16 | _LAYER_UIDS[layer_name] = 1 17 | return 1 18 | else: 19 | _LAYER_UIDS[layer_name] += 1 20 | return _LAYER_UIDS[layer_name] 21 | 22 | 23 | def sparse_dropout(x, keep_prob, noise_shape): 24 | """Dropout for sparse tensors.""" 25 | random_tensor = keep_prob 26 | random_tensor += tf.random_uniform(noise_shape) 27 | dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) 28 | pre_out = tf.sparse_retain(x, dropout_mask) 29 | return pre_out * (1./keep_prob) 30 | 31 | 32 | def dot(x, y, sparse=False): 33 | """Wrapper for tf.matmul (sparse vs dense).""" 34 | if sparse: 35 | res = tf.sparse_tensor_dense_matmul(x, y) 36 | else: 37 | res = tf.matmul(x, y) 38 | return res 39 | 40 | 41 | class Layer(object): 42 | """Base layer class. Defines basic API for all layer objects. 43 | Implementation inspired by keras (http://keras.io). 44 | 45 | # Properties 46 | name: String, defines the variable scope of the layer. 47 | logging: Boolean, switches Tensorflow histogram logging on/off 48 | 49 | # Methods 50 | _call(inputs): Defines computation graph of layer 51 | (i.e. takes input, returns output) 52 | __call__(inputs): Wrapper for _call() 53 | _log_vars(): Log all variables 54 | """ 55 | 56 | def __init__(self, **kwargs): 57 | allowed_kwargs = {'name', 'logging'} 58 | for kwarg in kwargs.keys(): 59 | assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg 60 | name = kwargs.get('name') 61 | if not name: 62 | layer = self.__class__.__name__.lower() 63 | name = layer + '_' + str(get_layer_uid(layer)) 64 | self.name = name 65 | self.vars = {} 66 | logging = kwargs.get('logging', False) 67 | self.logging = logging 68 | self.sparse_inputs = False 69 | self.test = [] 70 | 71 | def _call(self, inputs): 72 | return inputs 73 | 74 | def __call__(self, inputs): 75 | with tf.name_scope(self.name): 76 | if self.logging and not self.sparse_inputs: 77 | tf.summary.histogram(self.name + '/inputs', inputs) 78 | outputs = self._call(inputs) 79 | if self.logging: 80 | tf.summary.histogram(self.name + '/outputs', outputs) 81 | return outputs 82 | 83 | def _log_vars(self): 84 | for var in self.vars: 85 | tf.summary.histogram(self.name + '/vars/' + var, self.vars[var]) 86 | 87 | 88 | class GraphConvolution(Layer): 89 | """Graph convolution layer.""" 90 | def __init__(self, input_dim, output_dim, length, placeholders, tag, dropout=0., 91 | sparse_inputs=False, act=tf.nn.relu, bias=False, 92 | featureless=False, **kwargs): 93 | super(GraphConvolution, self).__init__(**kwargs) 94 | 95 | if dropout: 96 | self.dropout = placeholders['dropout'] 97 | else: 98 | self.dropout = 0. 99 | 100 | self.act = act 101 | self.support = placeholders['support_'+tag] 102 | self.sparse_inputs = sparse_inputs 103 | self.featureless = featureless 104 | self.bias = bias 105 | self.tag = tag 106 | self.length = length 107 | 108 | # helper variable for sparse dropout 109 | # self.num_features_nonzero = placeholders['num_features_nonzero'] 110 | 111 | with tf.variable_scope(self.name + '_' + self.tag + '_vars'): 112 | for i in range(len(self.support)): 113 | if not self.featureless: 114 | self.vars['weights_' + str(i)] = glorot([input_dim, output_dim], name='weights_' + str(i)) 115 | else: 116 | # print(placeholders["features_{}".format(self.tag)].shape[0]) 117 | self.vars['weights_' + str(i)] = glorot([int(placeholders["features_{}".format(self.tag)].shape[0]), 118 | output_dim], name='weights_' + str(i)) 119 | print("weights_{} dim:{},{}".format(i, self.vars['weights_' + str(i)].shape[0], 120 | self.vars['weights_' + str(i)].shape[1])) 121 | # self.vars['bias_'+str(i)] = zeros([output_dim,], name='bias_' + str(i)) 122 | self.vars['bias_' + str(i)] = tf.zeros(shape=(self.length, 1), name='bias_' + str(i)) 123 | 124 | if self.logging: 125 | self._log_vars() 126 | 127 | def _call(self, inputs): 128 | """ 129 | inputs: H(l) 130 | outputs: H(l+1) 131 | """ 132 | print("GCN _call inputs shape", inputs.shape) 133 | supports = list() 134 | for i in range(len(self.support)): 135 | print("Processing {}-th support_{}".format(i, self.tag)) 136 | if self.name == 'first'+self.tag: #这里注释了 137 | print("Name including first{}, x=inputs".format(self.tag)) 138 | x = inputs 139 | else: 140 | x = inputs[i] 141 | # x = inputs #做成concat需要修改三个地方,修改这里的输入,add输出,移除attention 142 | 143 | # dropout 144 | x = tf.nn.dropout(x, 1-self.dropout) 145 | 146 | print("x shape", x.shape) 147 | 148 | # convolve 149 | # support = tf.matmul(self.support[i], x) 150 | if not self.featureless: 151 | """ Here if using content features """ 152 | pre_sup = dot(x, self.vars['weights_' + str(i)]) 153 | else: 154 | """ If not, use weights for training """ 155 | pre_sup = self.vars['weights_' + str(i)] 156 | support = dot(self.support[i], pre_sup) 157 | # self.test.append(self.vars['bias_' + str(i)]) 158 | support = support + self.vars['bias_' + str(i)] 159 | supports.append(self.act(support)) 160 | # output = tf.add_n(supports) #这里解除注释了 161 | output = supports #这里注释了 162 | print("GCN output", len(output)) 163 | # bias 164 | # return output 165 | return self.act(output) # support already had self.act(support) why again? 166 | 167 | 168 | class RatLayer(): 169 | def __init__(self, user, item, act=tf.nn.relu): 170 | self.user = user 171 | self.item = item 172 | self.act = act 173 | 174 | def __call__(self): 175 | rate_matrix = tf.matmul(self.user, tf.transpose(self.item)) 176 | return self.act(rate_matrix) 177 | 178 | 179 | class RateLayer(): 180 | def __init__(self, placeholders, user, item, user_dim, item_dim, parentvars, ac=tf.nn.relu): 181 | self.user = user 182 | self.item = item 183 | self.batch_u = placeholders['batch_u'] 184 | self.batch_i = placeholders['batch_i'] 185 | self.batch_j = placeholders['batch_j'] 186 | self.name = 'RateLayer' 187 | self.ac = ac 188 | self.vars = {} 189 | with tf.name_scope(self.name + '_vars'): 190 | 191 | self.vars["user_latent"] = init_variable(user_dim, int(FLAGS.latent_dim), name='user_latent_matrix') 192 | self.vars["item_latent"] = init_variable(item_dim, int(FLAGS.latent_dim), name='item_latent_matrix') 193 | 194 | # project user emb to item emb space 195 | self.vars['emb_projection'] = \ 196 | init_variable(int(FLAGS.output_dim), int(FLAGS.output_dim), name='emb_projection_matrix') 197 | self.vars['item_bias'] = init_variable(item_dim, 1, "item_bias") 198 | self.vars['alpha1'] = tf.Variable(initial_value=0., name='alpha1') 199 | self.vars['alpha2'] = tf.Variable(initial_value=1., name='alpha2') 200 | 201 | def __call__(self): 202 | """ 203 | Eq 10 in Attentional Graph Convolutional Networks for Knowledge Concept Recommendation 204 | in MOOCs in a Heterogeneous View 205 | """ 206 | # MF 207 | u_factors = tf.nn.embedding_lookup(self.vars['user_latent'], self.batch_u) 208 | i_factors = tf.nn.embedding_lookup(self.vars['item_latent'], self.batch_i) 209 | j_factors = tf.nn.embedding_lookup(self.vars['item_latent'], self.batch_j) 210 | rate_matrix1_i = tf.reduce_sum(u_factors * i_factors, axis=2) 211 | rate_matrix1_j = tf.reduce_sum(u_factors * j_factors, axis=2) 212 | rate_matrix1 = tf.matmul(self.vars['user_latent'], tf.transpose(self.vars['item_latent'])) 213 | print("rate_matrix1 shape:", rate_matrix1.shape) 214 | # Emb 215 | u_emb = tf.nn.embedding_lookup(self.user, self.batch_u) 216 | i_emb = tf.nn.embedding_lookup(self.item, self.batch_i) 217 | j_emb = tf.nn.embedding_lookup(self.item, self.batch_j) 218 | u_emb = tf.squeeze(u_emb, axis=1) 219 | i_emb = tf.squeeze(i_emb, axis=1) 220 | j_emb = tf.squeeze(j_emb, axis=1) 221 | u_emb = tf.matmul(u_emb, self.vars['emb_projection']) # project to item space 222 | rate_matrix2_i = tf.reduce_sum(u_emb * i_emb, axis=1) 223 | rate_matrix2_j = tf.reduce_sum(u_emb * j_emb, axis=1) 224 | projected_user = tf.matmul(self.user, self.vars["emb_projection"]) 225 | rate_matrix2 = tf.matmul(projected_user, tf.transpose(self.item)) 226 | print("rate_matrix2_i shape:", rate_matrix2_i.shape) 227 | print("rate_matrix2 shape:", rate_matrix2.shape) 228 | # Bias 229 | i_bias = tf.nn.embedding_lookup(self.vars['item_bias'], self.batch_i) 230 | j_bias = tf.nn.embedding_lookup(self.vars['item_bias'], self.batch_j) 231 | i_bias = tf.reshape(i_bias, [-1, 1]) 232 | j_bias = tf.reshape(j_bias, [-1, 1]) 233 | # print("i_bias shape:", i_bias.shape) 234 | 235 | # full prediction 236 | rate_matrix_i = rate_matrix1_i+self.vars['alpha2']*rate_matrix2_i+i_bias 237 | rate_matrix_j = rate_matrix1_j+self.vars['alpha2']*rate_matrix2_j+j_bias 238 | rate_matrix = rate_matrix1+self.vars['alpha2']*rate_matrix2+tf.transpose(self.vars['item_bias']) 239 | 240 | # pos-neg diff. 241 | xuij = rate_matrix_i - rate_matrix_j 242 | 243 | return rate_matrix, xuij, rate_matrix2, rate_matrix2, tf.transpose(self.vars['item_bias']) 244 | 245 | 246 | class AttRateLayer(): 247 | """ Combine attention and rating together """ 248 | def __init__(self, placeholders, user, item, user_dim, item_dim, parentvars, ac=tf.nn.relu, time_major=False): 249 | print("Initializing AttRateLayer") 250 | self.user = user 251 | self.item = item 252 | self.time_major = time_major 253 | self.batch_u = placeholders['batch_u'] 254 | self.batch_i = placeholders['batch_i'] 255 | self.batch_j = placeholders['batch_j'] 256 | self.name = 'AttRateLayer' 257 | self.ac = ac 258 | self.vars = {} 259 | with tf.name_scope(self.name + '_vars'): 260 | self.vars["user_latent"] = init_variable(user_dim, int(FLAGS.latent_dim), name='user_latent_matrix') 261 | self.vars["item_latent"] = init_variable(item_dim, int(FLAGS.latent_dim), name='item_latent_matrix') 262 | 263 | # project user emb to item emb space 264 | self.vars['emb_projection'] = \ 265 | init_variable(int(FLAGS.output_dim), int(FLAGS.output_dim), name='emb_projection_matrix') 266 | self.vars['item_bias'] = init_variable(item_dim, 1, "item_bias") 267 | self.vars['alpha1'] = tf.Variable(initial_value=0., name='alpha1') 268 | self.vars['alpha2'] = tf.Variable(initial_value=1., name='alpha2') 269 | 270 | def __call__(self): 271 | print("Calling AttRatelayer") 272 | """ 273 | Eq 10 in Attentional Graph Convolutional Networks for Knowledge Concept Recommendation 274 | in MOOCs in a Heterogeneous View 275 | """ 276 | # Attention 277 | def attention(ratelayer, inputs, tag, attention_size=32): 278 | ratelayer.attention_size = attention_size 279 | ratelayer.tag = tag 280 | if isinstance(inputs, tuple): 281 | print("Attention layer - inputs is tuple, concat") 282 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 283 | inputs = tf.concat(inputs, 2) 284 | 285 | if ratelayer.time_major: 286 | # (T,B,D) => (B,T,D) 287 | inputs = tf.transpose(inputs, [1, 0, 2]) 288 | 289 | hidden_size = inputs.shape[2].value # D value - hidden size of the RNN layer 290 | print("hidden_size in attention layer", hidden_size) 291 | print("Att input shape", inputs.shape) 292 | 293 | # Trainable parameters 294 | with tf.variable_scope('v_' + ratelayer.tag): 295 | w_omega = tf.get_variable(initializer=tf.random_normal( 296 | [hidden_size+FLAGS.latent_dim, ratelayer.attention_size], stddev=0.1), name='w_omega') 297 | ratelayer.vars['w_omega'] = w_omega 298 | # b_omega = tf.get_variable(initializer=tf.random_normal( 299 | # [ratelayer.attention_size], stddev=0.1), name='b_omega') 300 | # ratelayer.vars['b_omega'] = b_omega 301 | u_omega = tf.get_variable(initializer=tf.random_normal( 302 | [ratelayer.attention_size], stddev=0.1), name='u_omega') 303 | ratelayer.vars['u_omega'] = u_omega 304 | b_v = tf.get_variable(initializer=tf.random_normal([1], stddev=0.1), name='b_v') 305 | ratelayer.vars['b_v'] = b_v 306 | # init for projection vars 307 | ratelayer.vars['project_'+self.tag] = tf.get_variable( 308 | initializer=tf.random_normal([FLAGS.latent_dim, FLAGS.latent_dim], stddev=0.1), 309 | name='project_' + ratelayer.tag+'_matrix') 310 | ratelayer.vars['project_bias_'+ratelayer.tag] = tf.get_variable( 311 | initializer=tf.random_normal([FLAGS.latent_dim], stddev=0.1), 312 | name='b_projection_'+ratelayer.tag) 313 | 314 | # transform and tile 315 | ratelayer.vars['projected_' + ratelayer.tag + '_latent'] = \ 316 | dot(ratelayer.vars[ratelayer.tag + '_latent'], ratelayer.vars['project_' + ratelayer.tag]) \ 317 | + ratelayer.vars['project_bias_' + ratelayer.tag] 318 | ratelayer.vars['projected_' + ratelayer.tag + '_latent'] = \ 319 | tf.nn.sigmoid(ratelayer.vars['projected_'+ratelayer.tag+'_latent']) 320 | projected_latent = tf.tile( 321 | tf.expand_dims(ratelayer.vars['projected_' + ratelayer.tag + '_latent'], axis=0), 322 | [inputs.shape[0], 1, 1]) 323 | 324 | # concat and non-linear attention additive one like 325 | # in https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html 326 | v1 = tf.concat([inputs, projected_latent], axis=2) 327 | v = tf.tanh(tf.tensordot(v1, w_omega, axes=1)) 328 | vu = tf.tensordot(v, u_omega, axes=1, name='vu') 329 | 330 | # For each of the timestamps its vector of size A from `v` is reduced with `u` vector 331 | print("vu shape", vu.shape) # vu shape (4, 2005) 332 | alphas = tf.nn.softmax(vu, name='alphas', axis=0) # (B,T) shape 333 | 334 | # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape 335 | output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 0) 336 | return output, alphas 337 | 338 | self.user, self.vars['alphas_user'] = attention(self, self.user, 'user') 339 | self.item, self.vars['alphas_item'] = attention(self, self.item, 'item') 340 | 341 | # MF 342 | u_factors = tf.nn.embedding_lookup(self.vars['user_latent'], self.batch_u) 343 | i_factors = tf.nn.embedding_lookup(self.vars['item_latent'], self.batch_i) 344 | j_factors = tf.nn.embedding_lookup(self.vars['item_latent'], self.batch_j) 345 | rate_matrix1_i = tf.reduce_sum(u_factors * i_factors, axis=2) 346 | rate_matrix1_j = tf.reduce_sum(u_factors * j_factors, axis=2) 347 | rate_matrix1 = tf.matmul(self.vars['user_latent'], tf.transpose(self.vars['item_latent'])) 348 | print("rate_matrix1 shape:", rate_matrix1.shape) 349 | # Emb 350 | u_emb = tf.nn.embedding_lookup(self.user, self.batch_u) 351 | i_emb = tf.nn.embedding_lookup(self.item, self.batch_i) 352 | j_emb = tf.nn.embedding_lookup(self.item, self.batch_j) 353 | u_emb = tf.squeeze(u_emb, axis=1) 354 | i_emb = tf.squeeze(i_emb, axis=1) 355 | j_emb = tf.squeeze(j_emb, axis=1) 356 | u_emb = tf.matmul(u_emb, self.vars['emb_projection']) # project to item space 357 | rate_matrix2_i = tf.reduce_sum(u_emb * i_emb, axis=1) 358 | rate_matrix2_j = tf.reduce_sum(u_emb * j_emb, axis=1) 359 | projected_user = tf.matmul(self.user, self.vars["emb_projection"]) 360 | rate_matrix2 = tf.matmul(projected_user, tf.transpose(self.item)) 361 | print("rate_matrix2_i shape:", rate_matrix2_i.shape) 362 | print("rate_matrix2 shape:", rate_matrix2.shape) 363 | # Bias 364 | i_bias = tf.nn.embedding_lookup(self.vars['item_bias'], self.batch_i) 365 | j_bias = tf.nn.embedding_lookup(self.vars['item_bias'], self.batch_j) 366 | i_bias = tf.reshape(i_bias, [-1, 1]) 367 | j_bias = tf.reshape(j_bias, [-1, 1]) 368 | # print("i_bias shape:", i_bias.shape) 369 | 370 | # full prediction 371 | rate_matrix_i = rate_matrix1_i+self.vars['alpha2']*rate_matrix2_i+i_bias 372 | rate_matrix_j = rate_matrix1_j+self.vars['alpha2']*rate_matrix2_j+j_bias 373 | rate_matrix = rate_matrix1+self.vars['alpha2']*rate_matrix2+tf.transpose(self.vars['item_bias']) 374 | 375 | # pos-neg diff. 376 | xuij = rate_matrix_i - rate_matrix_j 377 | 378 | return rate_matrix, xuij, rate_matrix1, rate_matrix2, tf.transpose(self.vars['item_bias']) 379 | 380 | 381 | class SimpleAttLayer(): 382 | """ 383 | Eq 6 Attentional Graph Convolutional Networks for Knowledge Concept Recommendation 384 | in MOOCs in a Heterogeneous View 385 | """ 386 | def __init__(self, attention_size, tag, parentvars, time_major=False): 387 | print("Initializing SimpleAttLayer - tag:"+tag) 388 | self.attention_size = attention_size 389 | self.time_major = time_major 390 | self.tag = tag 391 | self.vars = {} 392 | 393 | def __call__(self, inputs): 394 | if isinstance(inputs, tuple): 395 | print("Attention layer - inputs is tuple, concat") 396 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 397 | inputs = tf.concat(inputs, 2) 398 | 399 | if self.time_major: 400 | # (T,B,D) => (B,T,D) 401 | inputs = tf.transpose(inputs, [1, 0, 2]) 402 | 403 | hidden_size = inputs.shape[2].value # D value - hidden size of the RNN layer 404 | print("hidden_size in attention layer", hidden_size) 405 | print("Att input shape", inputs.shape) 406 | 407 | # Trainable parameters 408 | with tf.variable_scope('v_'+self.tag): 409 | # Applying fully connected layer with non-linear activation to each of the B*T timestamps; 410 | # the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size 411 | w_omega = tf.get_variable(initializer=tf.random_normal([hidden_size, self.attention_size], stddev=0.1), 412 | name='w_omega') 413 | self.vars['w_omega'] = w_omega 414 | b_omega = tf.get_variable(initializer=tf.random_normal([self.attention_size], stddev=0.1), name='b_omega') 415 | self.vars['b_omega'] = b_omega 416 | u_omega = tf.get_variable(initializer=tf.random_normal([self.attention_size], stddev=0.1), name='u_omega') 417 | self.vars['u_omega'] = u_omega 418 | v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega) 419 | print("v shape", v.shape) # v shape (4, 2005, 32) 420 | 421 | # For each of the timestamps its vector of size A from `v` is reduced with `u` vector 422 | vu = tf.tensordot(v, u_omega, axes=1, name='vu') # (B,T) shape 423 | print("vu shape", vu.shape) # vu shape (4, 2005) 424 | alphas = tf.nn.softmax(vu, name='alphas', axis=0) # (B,T) shape 425 | self.vars['alphas_'+self.tag] = alphas 426 | 427 | output = tf.reduce_sum(inputs*tf.expand_dims(alphas, -1), 0) 428 | 429 | return output -------------------------------------------------------------------------------- /data_utils.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import pickle\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from tqdm import tqdm\n", 14 | "import random\n", 15 | "import fasttext\n", 16 | "from collections import Counter\n", 17 | "\n", 18 | "\n", 19 | "np.random.seed(0)\n", 20 | "random.seed(0)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": { 27 | "scrolled": true 28 | }, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "total courses 697\n", 35 | "total courses 705\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "# load user video act\n", 41 | "json_file = \"additional_information/user_video_act.json\"\n", 42 | "\n", 43 | "with open(json_file, \"r\") as f:\n", 44 | " lines = f.readlines()\n", 45 | "\n", 46 | "# load course name dict\n", 47 | "with open(\"entities/course.json\", \"r\", encoding=\"utf8\") as f:\n", 48 | " courses = f.readlines()\n", 49 | " \n", 50 | "course_dict = dict()\n", 51 | "for c in courses:\n", 52 | " json_data = json.loads(c)\n", 53 | " course_dict[json_data[\"id\"]] = json_data[\"name\"]\n", 54 | "\n", 55 | "# load course-teacher relationship to dict\n", 56 | "def get_course_dict(filepath):\n", 57 | " with open(filepath, \"r\", encoding=\"utf8\") as f:\n", 58 | " tc_rels = f.readlines()\n", 59 | " tc_rel_dict = dict()\n", 60 | " for tc_rel in tc_rels:\n", 61 | " tc_rel = tc_rel.strip().split(\"\\t\")\n", 62 | " if tc_rel[1] not in tc_rel_dict:\n", 63 | " tc_rel_dict[tc_rel[1]] = [tc_rel[0]]\n", 64 | " else:\n", 65 | " tc_rel_dict[tc_rel[1]].append(tc_rel[0])\n", 66 | " \n", 67 | " return tc_rel_dict\n", 68 | "\n", 69 | "tc_rel_dict = get_course_dict(\"relations/teacher-course.json\")\n", 70 | "print(\"total courses\", len(tc_rel_dict))\n", 71 | "\n", 72 | "sc_rel_dict = get_course_dict(\"relations/school-course.json\")\n", 73 | "print(\"total courses\", len(sc_rel_dict))\n", 74 | "\n", 75 | "# check if each course has more than one schools (no)\n", 76 | "# for key in sc_rel_dict:\n", 77 | "# if len(sc_rel_dict[key]) > 1:\n", 78 | "# print(sc_rel_dict[key])\n", 79 | "\n", 80 | "# load video concepts\n", 81 | "vc_df = pd.read_csv(\"relations/video-concept.json\",\n", 82 | " header=None,\n", 83 | " delimiter=\"\\t\",\n", 84 | " names=[\"video\", \"concept\"])\n", 85 | "vc_dict = vc_df.groupby('video')['concept'].apply(list).to_dict()\n", 86 | "\n", 87 | "# load course concepts\n", 88 | "cc_df = pd.read_csv(\"relations/course-concept.json\",\n", 89 | " header=None,\n", 90 | " delimiter=\"\\t\",\n", 91 | " names=[\"course\", \"concept\"])\n", 92 | "cc_dict = cc_df.groupby('course')['concept'].apply(list).to_dict()\n", 93 | "\n", 94 | "# load course video\n", 95 | "cv_df = pd.read_csv(\"relations/course-video.json\",\n", 96 | " header=None,\n", 97 | " delimiter=\"\\t\",\n", 98 | " names=[\"course\", \"video\"])\n", 99 | "cv_dict = cv_df.groupby(\"course\")[\"video\"].apply(list).to_dict()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "['K_订单_管理科学技术',\n", 111 | " 'K_会计_管理科学技术',\n", 112 | " 'K_贷款_管理科学技术',\n", 113 | " 'K_模拟_心理学',\n", 114 | " 'K_资源_管理科学技术',\n", 115 | " 'K_会计等式_管理科学技术',\n", 116 | " 'K_信息储存_管理科学技术',\n", 117 | " 'K_知识_管理科学技术',\n", 118 | " 'K_复式记账法_管理科学技术',\n", 119 | " 'K_应收账款_管理科学技术',\n", 120 | " 'K_道路_数学',\n", 121 | " 'K_组织_管理科学技术',\n", 122 | " 'K_预算_管理科学技术',\n", 123 | " 'K_反馈_管理科学技术',\n", 124 | " 'K_期限_管理科学技术',\n", 125 | " 'K_使用_管理科学技术',\n", 126 | " 'K_匹配_数学',\n", 127 | " 'K_广告_管理科学技术',\n", 128 | " 'K_总分类账_管理科学技术',\n", 129 | " 'K_价值_管理科学技术',\n", 130 | " 'K_计数值_管理科学技术',\n", 131 | " 'K_所有者权益_管理科学技术',\n", 132 | " 'K_养老基金_管理科学技术',\n", 133 | " 'K_平衡_管理科学技术',\n", 134 | " 'K_组织设计_管理科学技术',\n", 135 | " 'K_商品_管理科学技术',\n", 136 | " 'K_误差_数学',\n", 137 | " 'K_等待_管理科学技术',\n", 138 | " 'K_工作环境_管理科学技术',\n", 139 | " 'K_通分_数学',\n", 140 | " 'K_分析_心理学',\n", 141 | " 'K_客户_管理科学技术',\n", 142 | " 'K_练习_心理学',\n", 143 | " 'K_慈善组织_管理科学技术',\n", 144 | " 'K_计划_管理科学技术',\n", 145 | " 'K_收入_管理科学技术',\n", 146 | " 'K_索引_管理科学技术',\n", 147 | " 'K_费用_管理科学技术',\n", 148 | " 'K_需要确认_管理科学技术',\n", 149 | " 'K_企业家_管理科学技术',\n", 150 | " 'K_学会_管理科学技术',\n", 151 | " 'K_空间_数学',\n", 152 | " 'K_公司_管理科学技术',\n", 153 | " 'K_学习_管理科学技术',\n", 154 | " 'K_分类账簿_管理科学技术',\n", 155 | " 'K_纠正_管理科学技术',\n", 156 | " 'K_解释_数学',\n", 157 | " 'K_政策_管理科学技术',\n", 158 | " 'K_注意_心理学',\n", 159 | " 'K_控制账目_管理科学技术',\n", 160 | " 'K_检查_管理科学技术',\n", 161 | " 'K_交换_管理科学技术',\n", 162 | " 'K_账户_管理科学技术',\n", 163 | " 'K_投资_管理科学技术',\n", 164 | " 'K_价格_管理科学技术',\n", 165 | " 'K_计算方法_数学',\n", 166 | " 'K_福利_管理科学技术',\n", 167 | " 'K_雇主_管理科学技术',\n", 168 | " 'K_调整_心理学',\n", 169 | " 'K_过账_管理科学技术',\n", 170 | " 'K_假设_数学',\n", 171 | " 'K_职位_管理科学技术',\n", 172 | " 'K_应付账款_管理科学技术',\n", 173 | " 'K_收益_管理科学技术',\n", 174 | " 'K_系统_管理科学技术',\n", 175 | " 'K_透支_管理科学技术',\n", 176 | " 'K_交易所_管理科学技术',\n", 177 | " 'K_非流动资产_管理科学技术',\n", 178 | " 'K_支票_管理科学技术',\n", 179 | " 'K_交易_管理科学技术',\n", 180 | " 'K_努力_管理科学技术',\n", 181 | " 'K_债务_管理科学技术',\n", 182 | " 'K_对照_数学',\n", 183 | " 'K_部分信息_数学',\n", 184 | " 'K_数字编码_数学',\n", 185 | " 'K_批准_管理科学技术',\n", 186 | " 'K_原始凭证_管理科学技术',\n", 187 | " 'K_个体_数学',\n", 188 | " 'K_财务报告_管理科学技术',\n", 189 | " 'K_包含_数学',\n", 190 | " 'K_出现_数学',\n", 191 | " 'K_运作_管理科学技术',\n", 192 | " 'K_需要_心理学',\n", 193 | " 'K_相等_数学',\n", 194 | " 'K_税收_管理科学技术',\n", 195 | " 'K_会计分录_管理科学技术',\n", 196 | " 'K_政府_管理科学技术',\n", 197 | " 'K_银行_管理科学技术',\n", 198 | " 'K_计算_数学',\n", 199 | " 'K_认证_管理科学技术',\n", 200 | " 'K_能力_管理科学技术',\n", 201 | " 'K_支付_管理科学技术',\n", 202 | " 'K_自主学习_心理学',\n", 203 | " 'K_输入_数学',\n", 204 | " 'K_阶段_管理科学技术',\n", 205 | " 'K_归档_管理科学技术',\n", 206 | " 'K_发现_管理科学技术',\n", 207 | " 'K_定额_管理科学技术',\n", 208 | " 'K_报酬_管理科学技术',\n", 209 | " 'K_补货_管理科学技术',\n", 210 | " 'K_固定资产_管理科学技术',\n", 211 | " 'K_合同_管理科学技术',\n", 212 | " 'K_结账_管理科学技术',\n", 213 | " 'K_职业生涯_管理科学技术',\n", 214 | " 'K_顾客_管理科学技术',\n", 215 | " 'K_属于_数学',\n", 216 | " 'K_现金_管理科学技术',\n", 217 | " 'K_补偿_管理科学技术',\n", 218 | " 'K_资产_管理科学技术',\n", 219 | " 'K_条款_管理科学技术',\n", 220 | " 'K_编制_管理科学技术',\n", 221 | " 'K_保险_管理科学技术',\n", 222 | " 'K_项目_管理科学技术',\n", 223 | " 'K_事务_管理科学技术',\n", 224 | " 'K_索赔_管理科学技术',\n", 225 | " 'K_选择_管理科学技术',\n", 226 | " 'K_取消_管理科学技术']" 227 | ] 228 | }, 229 | "execution_count": 3, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "cc_dict[\"C_course-v1:ACCA+FA1_X+2019_T1\"]" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 4, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "name": "stderr", 245 | "output_type": "stream", 246 | "text": [ 247 | " 0%| | 23/48640 [00:00<03:39, 221.00it/s]" 248 | ] 249 | }, 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "total users 48640\n" 255 | ] 256 | }, 257 | { 258 | "name": "stderr", 259 | "output_type": "stream", 260 | "text": [ 261 | "100%|██████████| 48640/48640 [02:57<00:00, 273.77it/s]\n" 262 | ] 263 | }, 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "(4874298, 8)" 268 | ] 269 | }, 270 | "execution_count": 4, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "print(\"total users\", len(lines))\n", 277 | "user_dfs = list()\n", 278 | "\n", 279 | "for l in tqdm(lines):\n", 280 | " json_data = json.loads(l)\n", 281 | "# print(json_data)\n", 282 | "\n", 283 | " activities = json_data[\"activity\"]\n", 284 | " course_ids = list()\n", 285 | " video_ids = list()\n", 286 | " local_start_times = list()\n", 287 | " concepts = list()\n", 288 | " cnames = list()\n", 289 | " teachers = list()\n", 290 | " schools = list()\n", 291 | " \n", 292 | " for a in activities:\n", 293 | " cnames.append(course_dict[a[\"course_id\"]]) if a[\"course_id\"] in course_dict else cnames.append(\"\")\n", 294 | " course_ids.append(a[\"course_id\"])\n", 295 | " video_ids.append(a[\"video_id\"])\n", 296 | " local_start_times.append(a[\"local_start_time\"])\n", 297 | " concepts.append(vc_dict[a[\"video_id\"]]) if a[\"video_id\"] in vc_dict else concepts.append([\"\"])\n", 298 | " teachers.append(tc_rel_dict[a[\"course_id\"]]) if a[\"course_id\"] in tc_rel_dict else teachers.append([\"\"])\n", 299 | " schools.append(sc_rel_dict[a[\"course_id\"]][0]) if a[\"course_id\"] in sc_rel_dict else schools.append(\"\")\n", 300 | " \n", 301 | " df = pd.DataFrame({\n", 302 | " \"id\": json_data[\"id\"],\n", 303 | " \"cname\": cnames,\n", 304 | " \"cid\": course_ids,\n", 305 | " \"vid\": video_ids,\n", 306 | " \"concepts\": concepts,\n", 307 | " \"teachers\": teachers,\n", 308 | " \"schools\": schools,\n", 309 | " \"local_start_time\": local_start_times\n", 310 | " })\n", 311 | " df.sort_values(\"local_start_time\", inplace=True)\n", 312 | " \n", 313 | "# display(df)\n", 314 | " user_dfs.append(df)\n", 315 | "\n", 316 | "# convert to total df\n", 317 | "total_df = pd.concat(user_dfs,ignore_index=True)\n", 318 | "total_df.shape" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 5, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "2130\n", 331 | "(311743, 8)\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "# max '2020-04-17 17:25:48'\n", 337 | "bad_formatted_users = list(total_df[total_df[\"local_start_time\"]<\"2017-01-01 00:00:00\"]['id'].unique())\n", 338 | "user_in_trainingperiod = list(total_df[(total_df[\"local_start_time\"]>='2017-01-01 00:00:00') & (total_df[\"local_start_time\"]<='2019-11-01 00:00:00')]['id'].unique())\n", 339 | "user_in_testperiod = list(total_df[total_df[\"local_start_time\"]>'2019-11-01 00:00:00']['id'].unique())\n", 340 | "selected_users = [u for u in user_in_testperiod if u in user_in_trainingperiod if u not in bad_formatted_users]\n", 341 | "print(len(selected_users))\n", 342 | "\n", 343 | "filtered_df = total_df[total_df[\"id\"].isin(selected_users)]\n", 344 | "print(filtered_df.shape)\n", 345 | "\n", 346 | "# check whether there is at least one concept in the testing period that is not in the training period\n", 347 | "need_to_filter = list()\n", 348 | "for u in selected_users:\n", 349 | " udf = filtered_df[filtered_df[\"id\"]==u]\n", 350 | " train_udf = udf[(udf[\"local_start_time\"]>='2017-01-01 00:00:00') & (udf[\"local_start_time\"]<='2019-11-01 00:00:00')]\n", 351 | " test_udf = udf[udf[\"local_start_time\"]>'2019-11-01 00:00:00']\n", 352 | " train_concepts = list(set([x for sublist in train_udf[\"concepts\"].values for x in sublist]))\n", 353 | " test_concepts = list(set([x for sublist in test_udf[\"concepts\"].values for x in sublist]))\n", 354 | " if len([x for x in test_concepts if x not in train_concepts])==0:\n", 355 | "# print(\"unsatisfied user:\", u)\n", 356 | " need_to_filter.append(u)\n", 357 | "\n", 358 | "filtered_users = [u for u in selected_users if u not in need_to_filter]\n", 359 | "filtered_df = filtered_df[filtered_df[\"id\"].isin(filtered_users)]" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 6, 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "---------------- Entity Statistics -----------------\n", 372 | "filtered df (290024, 8)\n", 373 | "total users 2005\n", 374 | "total courses 600\n", 375 | "total videos 22403\n", 376 | "total schools 137\n", 377 | "distinct concepts 21037\n", 378 | "total teachers 1385\n" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "print(\"---------------- Entity Statistics -----------------\")\n", 384 | "distinct_concepts = list(set([c for sublist in filtered_df[\"concepts\"].values for c in sublist if len(c)>0]))\n", 385 | "\n", 386 | "print(\"filtered df\", filtered_df.shape)\n", 387 | "print(\"total users\", len(filtered_df[\"id\"].unique()))\n", 388 | "print(\"total courses\", len(filtered_df[\"cid\"].unique()))\n", 389 | "print(\"total videos\", len(filtered_df[\"vid\"].unique()))\n", 390 | "print(\"total schools\", len(filtered_df[\"schools\"].unique()))\n", 391 | "print(\"distinct concepts\", len(distinct_concepts))\n", 392 | "t_list = filtered_df[\"teachers\"].values\n", 393 | "flattend_t_list = list(set([t for sublist in t_list for t in sublist]))\n", 394 | "print(\"total teachers\", len(flattend_t_list))\n", 395 | "\n", 396 | "######################\n", 397 | "# relations (wrong before: not only limited to relations in the filtered but from relation.json files)\n", 398 | "# print(\"user-course relations\", filtered_df.groupby(['id','cid']).size().shape[0])\n", 399 | "# print(\"course-video relations\", filtered_df.groupby(['cid','vid']).size().shape[0])\n", 400 | "# ct_list = [[filtered_df[\"cid\"].values[i]+x for x in v] for i,v in enumerate(filtered_df[\"teachers\"].values)]\n", 401 | "# flattend_ct_list = list(set([ct for sublist in ct_list for ct in sublist]))\n", 402 | "# print(\"teacher-course relations\", len(flattend_ct_list))\n", 403 | "# vcon_list = [[filtered_df[\"vid\"].values[i]+x for x in v] for i,v in enumerate(filtered_df[\"concepts\"].values)]\n", 404 | "# flattend_vcon_list = list(set([ct for sublist in vcon_list for ct in sublist]))\n", 405 | "# print(\"video-concept relations\", len(flattend_vcon_list))" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 7, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# store distinct concept for embedding analysis\n", 415 | "with open(\"concept21037.txt\", \"w\") as f:\n", 416 | " for c in distinct_concepts:\n", 417 | " f.write(\"{}\\n\".format(c))" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 36, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "name": "stdout", 427 | "output_type": "stream", 428 | "text": [ 429 | "---------------- Relation Statistics based on filtered courses -----------------\n", 430 | "user-course relations 13696\n", 431 | "course-video relations 42117\n", 432 | "teacher-course relations 1875\n", 433 | "34506\n", 434 | "video-concept relations 295475\n", 435 | "course-concept relations 150811\n", 436 | "course - C_course-v1:RiceX+Phys102x+sp does not have concepts\n", 437 | "course - C_course-v1:TsinghuaX+Thesis2018+sp does not have concepts\n", 438 | "course - C_course-v1:UC_BerkeleyX+ColWri2_1x_2015_T1+2019_T1 does not have concepts\n", 439 | "course - C_course-v1:HUBU+2017022703X+sp does not have concepts\n", 440 | "course - C_course-v1:TsinghuaX+TsinghuaMandarin01+sp does not have concepts\n", 441 | "course - C_course-v1:UC_BerkeleyX+CS169_2x+sp does not have concepts\n", 442 | "course - C_course-v1:ZAFU+20171218+2019_T1 does not have concepts\n", 443 | "course - C_course-v1:qhnu+20181212x+2019_T1 does not have concepts\n", 444 | "course - C_course-v1:SDSNAssociation+C21001+sp does not have concepts\n", 445 | "course - C_course-v1:XJTU+20171025001+2019_T1 does not have concepts\n", 446 | "course - C_course-v1:NUDT+05028103+2018_T2 does not have concepts\n", 447 | "course - C_course-v1:qhnu+20181212x+2018_T2 does not have concepts\n" 448 | ] 449 | }, 450 | { 451 | "data": { 452 | "text/plain": [ 453 | "(21037, 600)" 454 | ] 455 | }, 456 | "execution_count": 36, 457 | "metadata": {}, 458 | "output_type": "execute_result" 459 | } 460 | ], 461 | "source": [ 462 | "print(\"---------------- Relation Statistics based on filtered courses -----------------\")\n", 463 | "print(\"user-course relations\", filtered_df.groupby(['id','cid']).size().shape[0])\n", 464 | "\n", 465 | "#######################\n", 466 | "# relations based on filtered course \n", 467 | "courses = filtered_df[\"cid\"].unique()\n", 468 | "# course video\n", 469 | "filtered_cv_dict = dict(filter(lambda elem: elem[0] in courses, cv_dict.items()))\n", 470 | "print(\"course-video relations\", sum([len(x) for x in filtered_cv_dict.values()]))\n", 471 | "# teacher course\n", 472 | "filtered_tc_rel_dict = dict(filter(lambda elem: elem[0] in courses, tc_rel_dict.items()))\n", 473 | "print(\"teacher-course relations\", sum([len(x) for x in filtered_tc_rel_dict.values()]))\n", 474 | "# video concept\n", 475 | "videos_in_filtered_courses = list(set([x for sublist in filtered_cv_dict.values() for x in sublist]))\n", 476 | "print(len(videos_in_filtered_courses))\n", 477 | "filtered_vc_rel_dict = dict(filter(lambda elem: elem[0] in videos_in_filtered_courses, vc_dict.items()))\n", 478 | "print(\"video-concept relations\", sum([len(x) for x in filtered_vc_rel_dict.values()]))\n", 479 | "# course concept\n", 480 | "filtered_cc_rel_dict = dict(filter(lambda elem: elem[0] in courses, cc_dict.items()))\n", 481 | "print(\"course-concept relations\", sum([len(x) for x in filtered_cc_rel_dict.values()]))\n", 482 | "\n", 483 | "# concepts = list(set([x for sublist in filtered_cc_rel_dict.values() for x in sublist]))\n", 484 | "# print(\"total concepts in filtered courses\", len(concepts))\n", 485 | "\n", 486 | "# store KC.p\n", 487 | "concepts = distinct_concepts\n", 488 | "course2index = dict(zip(courses, list(range(len(courses)))))\n", 489 | "concept2index = dict(zip(concepts, list(range(len(concepts)))))\n", 490 | "\n", 491 | "cc_list = list()\n", 492 | "for c in courses:\n", 493 | " cvec = np.zeros(len(concepts))\n", 494 | " if c in filtered_cc_rel_dict:\n", 495 | " indices = [concept2index[_] for _ in filtered_cc_rel_dict[c] if _ in concept2index]\n", 496 | " cvec[indices] = 1\n", 497 | " else:\n", 498 | " print(\"course - {} does not have concepts\".format(c))\n", 499 | " cc_list.append(cvec)\n", 500 | " \n", 501 | "cc_np = np.array(cc_list).T\n", 502 | "cc_np.shape" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 33, 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "name": "stdout", 512 | "output_type": "stream", 513 | "text": [ 514 | "0 U_9044043\n", 515 | "1 U_7078467\n", 516 | "2 U_1049362\n", 517 | "3 U_902\n", 518 | "4 U_8520713\n", 519 | "5 U_10748942\n", 520 | "6 U_1087\n", 521 | "7 U_1112\n", 522 | "8 U_9700483\n", 523 | "9 U_6816904\n", 524 | "10 U_8258710\n" 525 | ] 526 | }, 527 | { 528 | "name": "stderr", 529 | "output_type": "stream", 530 | "text": [ 531 | "c:\\users\\gpiao\\miniconda3\\envs\\py3.7tf1.13.1\\lib\\site-packages\\ipykernel_launcher.py:96: SettingWithCopyWarning: \n", 532 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 533 | "\n", 534 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" 535 | ] 536 | }, 537 | { 538 | "name": "stdout", 539 | "output_type": "stream", 540 | "text": [ 541 | "11 U_6685870\n", 542 | "12 U_2491705\n", 543 | "13 U_9700831\n", 544 | "14 U_6817254\n", 545 | "15 U_7210631\n", 546 | "16 U_8128165\n", 547 | "17 U_1443689\n", 548 | "18 U_8521607\n", 549 | "19 U_7997323\n", 550 | "20 U_8390552\n", 551 | "21 U_7079859\n", 552 | "22 U_6162373\n", 553 | "23 U_1999\n", 554 | "24 U_9570262\n", 555 | "25 U_7080106\n", 556 | "26 U_6817993\n", 557 | "27 U_526622\n", 558 | "28 U_7997847\n", 559 | "29 U_7211469\n", 560 | "30 U_6949381\n", 561 | "31 U_10488412\n", 562 | "32 U_7080668\n", 563 | "33 U_8260321\n", 564 | "34 U_8260334\n", 565 | "35 U_8522532\n", 566 | "36 U_3076\n", 567 | "37 U_8653873\n", 568 | "38 U_10488889\n", 569 | "39 U_7081019\n", 570 | "40 U_7998662\n", 571 | "41 U_8523071\n", 572 | "42 U_10227059\n", 573 | "43 U_9440652\n", 574 | "44 U_658844\n", 575 | "45 U_10358199\n", 576 | "46 U_8261091\n", 577 | "47 U_7736996\n", 578 | "48 U_1576865\n", 579 | "49 U_6950933\n", 580 | "50 U_790570\n", 581 | "51 U_6295703\n", 582 | "52 U_7737646\n", 583 | "53 U_8524090\n", 584 | "54 U_7213560\n", 585 | "55 U_9835064\n", 586 | "56 U_7475945\n", 587 | "57 U_8393517\n", 588 | "58 U_7738206\n", 589 | "59 U_9573295\n", 590 | "60 U_7738346\n", 591 | "61 U_9704489\n", 592 | "62 U_7738457\n", 593 | "63 U_2233537\n", 594 | "64 U_8262868\n", 595 | "65 U_9573649\n", 596 | "66 U_10491387\n", 597 | "67 U_9049609\n", 598 | "68 U_6690329\n", 599 | "69 U_923366\n", 600 | "70 U_5889\n", 601 | "71 U_9050022\n", 602 | "72 U_9443260\n", 603 | "73 U_2234406\n", 604 | "74 U_2496576\n", 605 | "75 U_9050292\n", 606 | "76 U_8657087\n", 607 | "77 U_8001814\n", 608 | "78 U_1972521\n", 609 | "79 U_530846\n", 610 | "80 U_6691233\n", 611 | "81 U_10492510\n", 612 | "82 U_7084743\n", 613 | "83 U_10230484\n", 614 | "84 U_8002279\n", 615 | "85 U_7478331\n", 616 | "86 U_2628704\n", 617 | "87 U_6691940\n", 618 | "88 U_9444482\n", 619 | "89 U_8002724\n", 620 | "90 U_9575673\n", 621 | "91 U_8265148\n", 622 | "92 U_8527316\n", 623 | "93 U_8003040\n", 624 | "94 U_9444909\n", 625 | "95 U_7741016\n", 626 | "96 U_10886762\n", 627 | "97 U_9576159\n", 628 | "98 U_8658730\n", 629 | "99 U_270210\n", 630 | "100 U_663489\n", 631 | "101 U_9445451\n", 632 | "102 U_8527960\n", 633 | "103 U_9576614\n", 634 | "104 U_1581369\n", 635 | "105 U_8608\n", 636 | "106 U_8659368\n", 637 | "107 U_8676\n", 638 | "108 U_9970244\n", 639 | "109 U_9445970\n", 640 | "110 U_8266369\n", 641 | "111 U_9446049\n", 642 | "112 U_5251778\n", 643 | "113 U_5251822\n", 644 | "114 U_8990\n", 645 | "115 U_7086882\n", 646 | "116 U_2761546\n", 647 | "117 U_8266651\n", 648 | "118 U_10495216\n", 649 | "119 U_10495258\n", 650 | "120 U_9315688\n", 651 | "121 U_1451399\n", 652 | "122 U_8398223\n", 653 | "123 U_10495498\n", 654 | "124 U_9315888\n", 655 | "125 U_10757784\n", 656 | "126 U_5252796\n", 657 | "127 U_10495695\n", 658 | "128 U_9578462\n", 659 | "129 U_10364909\n", 660 | "130 U_7088181\n", 661 | "131 U_7088255\n", 662 | "132 U_4729096\n", 663 | "133 U_8399135\n", 664 | "134 U_10496302\n", 665 | "135 U_8661307\n", 666 | "136 U_8268257\n", 667 | "137 U_1452660\n", 668 | "138 U_6695599\n", 669 | "139 U_10941\n", 670 | "140 U_8006383\n", 671 | "141 U_10890077\n", 672 | "142 U_10496953\n", 673 | "143 U_8268787\n", 674 | "144 U_9317488\n", 675 | "145 U_2764230\n", 676 | "146 U_7089664\n", 677 | "147 U_8662600\n", 678 | "148 U_10366628\n", 679 | "149 U_8924849\n", 680 | "150 U_8007347\n", 681 | "151 U_8400626\n", 682 | "152 U_8662771\n", 683 | "153 U_8269564\n", 684 | "154 U_8138584\n", 685 | "155 U_3026885\n", 686 | "156 U_11939823\n", 687 | "157 U_6828026\n", 688 | "158 U_405628\n", 689 | "159 U_8663198\n", 690 | "160 U_8663208\n", 691 | "161 U_10498227\n", 692 | "162 U_3158245\n", 693 | "163 U_10891604\n", 694 | "164 U_10498530\n", 695 | "165 U_7614954\n", 696 | "166 U_9843263\n", 697 | "167 U_8663669\n", 698 | "168 U_10236550\n", 699 | "169 U_10367676\n", 700 | "170 U_668349\n", 701 | "171 U_6697700\n", 702 | "172 U_8794944\n", 703 | "173 U_10367853\n", 704 | "174 U_2896787\n", 705 | "175 U_144286\n", 706 | "176 U_8270779\n", 707 | "177 U_11416651\n", 708 | "178 U_10499175\n", 709 | "179 U_10499219\n", 710 | "180 U_8139967\n", 711 | "181 U_10630344\n", 712 | "182 U_10892592\n", 713 | "183 U_9975096\n", 714 | "184 U_406916\n", 715 | "185 U_5649873\n", 716 | "186 U_8009200\n", 717 | "187 U_8402461\n", 718 | "188 U_8402594\n", 719 | "189 U_3159880\n", 720 | "190 U_8271699\n", 721 | "191 U_2504636\n", 722 | "192 U_6567904\n", 723 | "193 U_8271871\n", 724 | "194 U_8271951\n", 725 | "195 U_10238227\n", 726 | "196 U_5257729\n", 727 | "197 U_9583198\n", 728 | "198 U_539271\n", 729 | "199 U_9714366\n", 730 | "200 U_6699957\n", 731 | "201 U_9452481\n", 732 | "202 U_7748564\n", 733 | "203 U_8273113\n", 734 | "204 U_10763482\n", 735 | "205 U_4734204\n", 736 | "206 U_6962585\n", 737 | "207 U_10894945\n", 738 | "208 U_7880375\n", 739 | "209 U_2637512\n", 740 | "210 U_11943634\n", 741 | "211 U_9060199\n", 742 | "212 U_8142710\n", 743 | "213 U_278508\n", 744 | "214 U_7094274\n", 745 | "215 U_409616\n", 746 | "216 U_4735122\n", 747 | "217 U_540946\n", 748 | "218 U_9322834\n", 749 | "219 U_8274269\n", 750 | "220 U_2900418\n", 751 | "221 U_8143347\n", 752 | "222 U_6570518\n", 753 | "223 U_10764882\n", 754 | "224 U_279125\n", 755 | "225 U_7094959\n", 756 | "226 U_7357138\n", 757 | "227 U_10371949\n", 758 | "228 U_8405905\n", 759 | "229 U_9061276\n", 760 | "230 U_9323425\n", 761 | "231 U_9192429\n", 762 | "232 U_6702076\n", 763 | "233 U_6571037\n", 764 | "234 U_672805\n", 765 | "235 U_9454647\n", 766 | "236 U_148566\n", 767 | "237 U_9847894\n", 768 | "238 U_6964390\n", 769 | "239 U_9061596\n", 770 | "240 U_6964596\n", 771 | "241 U_8275485\n", 772 | "242 U_3163703\n", 773 | "243 U_5129918\n", 774 | "244 U_6571986\n", 775 | "245 U_6703159\n", 776 | "246 U_6834278\n", 777 | "247 U_11290774\n", 778 | "248 U_8407194\n", 779 | "249 U_8276155\n", 780 | "250 U_10111265\n", 781 | "251 U_5785949\n", 782 | "252 U_10635711\n", 783 | "253 U_9718220\n", 784 | "254 U_7096925\n", 785 | "255 U_8800943\n", 786 | "256 U_9456329\n", 787 | "257 U_9063151\n", 788 | "258 U_6572790\n", 789 | "259 U_2509567\n", 790 | "260 U_11553611\n", 791 | "261 U_6048698\n", 792 | "262 U_2640830\n", 793 | "263 U_9325505\n", 794 | "264 U_8932420\n", 795 | "265 U_8408151\n", 796 | "266 U_6048893\n", 797 | "267 U_8408479\n", 798 | "268 U_19932\n", 799 | "269 U_7097873\n", 800 | "270 U_9850408\n", 801 | "271 U_10243655\n", 802 | "272 U_9850486\n", 803 | "273 U_10768012\n", 804 | "274 U_2248369\n", 805 | "275 U_10899142\n", 806 | "276 U_9719514\n", 807 | "277 U_1462037\n", 808 | "278 U_11685656\n", 809 | "279 U_8146832\n", 810 | "280 U_1986570\n", 811 | "281 U_8278064\n", 812 | "282 U_8409405\n", 813 | "283 U_10244473\n", 814 | "284 U_11293087\n", 815 | "285 U_10244524\n", 816 | "286 U_8802773\n", 817 | "287 U_8016354\n", 818 | "288 U_6836742\n", 819 | "289 U_11686410\n", 820 | "290 U_545306\n", 821 | "291 U_9982497\n", 822 | "292 U_7098941\n", 823 | "293 U_1987151\n", 824 | "294 U_4477552\n", 825 | "295 U_8016684\n", 826 | "296 U_10244936\n", 827 | "297 U_9065352\n", 828 | "298 U_1725351\n", 829 | "299 U_1201171\n", 830 | "300 U_8410136\n", 831 | "301 U_7099469\n", 832 | "302 U_11818092\n", 833 | "303 U_10376449\n", 834 | "304 U_6706464\n", 835 | "305 U_415166\n", 836 | "306 U_11294163\n", 837 | "307 U_10769902\n", 838 | "308 U_10507776\n", 839 | "309 U_6051497\n", 840 | "310 U_8541970\n", 841 | "311 U_8935218\n", 842 | "312 U_8410996\n", 843 | "313 U_10508152\n", 844 | "314 U_2774916\n", 845 | "315 U_3037196\n", 846 | "316 U_2906188\n", 847 | "317 U_7100495\n", 848 | "318 U_9459823\n", 849 | "319 U_8149112\n", 850 | "320 U_11950228\n", 851 | "321 U_10639564\n", 852 | "322 U_10115301\n", 853 | "323 U_9066901\n", 854 | "324 U_9591215\n", 855 | "325 U_154067\n", 856 | "326 U_8804887\n", 857 | "327 U_8804899\n", 858 | "328 U_9329272\n", 859 | "329 U_9460407\n", 860 | "330 U_8411837\n", 861 | "331 U_7363322\n", 862 | "332 U_7101232\n", 863 | "333 U_9329509\n", 864 | "334 U_2907033\n", 865 | "335 U_154610\n", 866 | "336 U_2514180\n", 867 | "337 U_6315294\n", 868 | "338 U_548331\n", 869 | "339 U_6315508\n", 870 | "340 U_8019496\n", 871 | "341 U_9592509\n", 872 | "342 U_8413062\n", 873 | "343 U_417816\n", 874 | "344 U_9461784\n", 875 | "345 U_8413230\n", 876 | "346 U_5791890\n", 877 | "347 U_9986217\n", 878 | "348 U_10641588\n", 879 | "349 U_7102674\n", 880 | "350 U_8020253\n", 881 | "351 U_6316326\n", 882 | "352 U_11559208\n", 883 | "353 U_156001\n", 884 | "354 U_6971779\n", 885 | "355 U_8151491\n", 886 | "356 U_10118000\n", 887 | "357 U_11559804\n", 888 | "358 U_7103506\n", 889 | "359 U_6710321\n", 890 | "360 U_5268563\n", 891 | "361 U_3695754\n", 892 | "362 U_4351214\n", 893 | "363 U_7103926\n", 894 | "364 U_419317\n", 895 | "365 U_6841925\n", 896 | "366 U_9332368\n", 897 | "367 U_8414933\n", 898 | "368 U_9594588\n", 899 | "369 U_7104343\n", 900 | "370 U_10250072\n", 901 | "371 U_9332577\n", 902 | "372 U_2648107\n", 903 | "373 U_10119226\n", 904 | "374 U_6318143\n", 905 | "375 U_8677456\n", 906 | "376 U_7104640\n", 907 | "377 U_9857223\n", 908 | "378 U_8808801\n", 909 | "379 U_10643833\n", 910 | "380 U_9857591\n", 911 | "381 U_9595558\n", 912 | "382 U_10775238\n", 913 | "383 U_6974158\n", 914 | "384 U_3173091\n", 915 | "385 U_8154033\n", 916 | "386 U_3173407\n", 917 | "387 U_7105571\n", 918 | "388 U_9595998\n", 919 | "389 U_8416388\n", 920 | "390 U_8809743\n", 921 | "391 U_6843701\n", 922 | "392 U_2780469\n", 923 | "393 U_9858446\n", 924 | "394 U_6843835\n", 925 | "395 U_9203331\n", 926 | "396 U_9334455\n", 927 | "397 U_9334477\n", 928 | "398 U_9334561\n", 929 | "399 U_9203603\n", 930 | "400 U_11562992\n", 931 | "401 U_8417416\n", 932 | "402 U_422086\n", 933 | "403 U_5140749\n", 934 | "404 U_6844743\n", 935 | "405 U_10252656\n", 936 | "406 U_1077791\n", 937 | "407 U_684592\n", 938 | "408 U_6845201\n", 939 | "409 U_6714161\n", 940 | "410 U_9990976\n", 941 | "411 U_1078092\n", 942 | "412 U_7107422\n", 943 | "413 U_684969\n", 944 | "414 U_9466835\n", 945 | "415 U_8418389\n", 946 | "416 U_7238948\n", 947 | "417 U_3568971\n", 948 | "418 U_2651559\n", 949 | "419 U_292266\n", 950 | "420 U_10778026\n", 951 | "421 U_10516095\n", 952 | "422 U_9729675\n", 953 | "423 U_10253976\n", 954 | "424 U_6846175\n", 955 | "425 U_11826940\n", 956 | "426 U_161575\n", 957 | "427 U_10647463\n", 958 | "428 U_2783170\n", 959 | "429 U_8550646\n", 960 | "430 U_6322423\n", 961 | "431 U_293168\n", 962 | "432 U_555320\n", 963 | "433 U_162233\n", 964 | "434 U_10516932\n", 965 | "435 U_3176926\n", 966 | "436 U_10517069\n", 967 | "437 U_3439265\n", 968 | "438 U_6191786\n", 969 | "439 U_11041503\n", 970 | "440 U_6847248\n", 971 | "441 U_7633787\n", 972 | "442 U_6060930\n", 973 | "443 U_10124191\n", 974 | "444 U_6716461\n", 975 | "445 U_8158276\n", 976 | "446 U_9075855\n", 977 | "447 U_8158366\n", 978 | "448 U_8158570\n", 979 | "449 U_10386826\n", 980 | "450 U_1080745\n", 981 | "451 U_9076174\n", 982 | "452 U_7765490\n", 983 | "453 U_687815\n", 984 | "454 U_557005\n", 985 | "455 U_7765974\n", 986 | "456 U_7766089\n", 987 | "457 U_10125413\n", 988 | "458 U_688300\n", 989 | "459 U_688302\n", 990 | "460 U_4489449\n", 991 | "461 U_1081702\n", 992 | "462 U_8028553\n", 993 | "463 U_9601439\n", 994 | "464 U_1475046\n", 995 | "465 U_9470447\n", 996 | "466 U_10387962\n", 997 | "467 U_6849070\n", 998 | "468 U_10388097\n", 999 | "469 U_9732770\n", 1000 | "470 U_10388135\n", 1001 | "471 U_8422093\n", 1002 | "472 U_8815322\n", 1003 | "473 U_1475441\n", 1004 | "474 U_10519579\n", 1005 | "475 U_7767150\n", 1006 | "476 U_7111817\n", 1007 | "477 U_6849732\n", 1008 | "478 U_9078035\n", 1009 | "479 U_10126626\n", 1010 | "480 U_10126709\n", 1011 | "481 U_7112094\n", 1012 | "482 U_11044270\n", 1013 | "483 U_6981047\n", 1014 | "484 U_9733599\n", 1015 | "485 U_427501\n", 1016 | "486 U_7374346\n", 1017 | "487 U_296579\n", 1018 | "488 U_9602717\n", 1019 | "489 U_6850261\n", 1020 | "490 U_10782432\n", 1021 | "491 U_296693\n", 1022 | "492 U_558854\n", 1023 | "493 U_7505690\n", 1024 | "494 U_689950\n", 1025 | "495 U_7767847\n", 1026 | "496 U_8423326\n", 1027 | "497 U_6195162\n", 1028 | "498 U_2394165\n", 1029 | "499 U_9341048\n", 1030 | "500 U_5802134\n", 1031 | "501 U_8161526\n", 1032 | "502 U_690538\n", 1033 | "503 U_11045353\n", 1034 | "504 U_8030748\n", 1035 | "505 U_4754126\n", 1036 | "506 U_8424354\n", 1037 | "507 U_8162290\n", 1038 | "508 U_9997307\n", 1039 | "509 U_8031261\n", 1040 | "510 U_10390718\n", 1041 | "511 U_10521807\n", 1042 | "512 U_6720783\n", 1043 | "513 U_8555862\n", 1044 | "514 U_11570536\n", 1045 | "515 U_10522002\n", 1046 | "516 U_3706260\n", 1047 | "517 U_6589902\n", 1048 | "518 U_9604587\n", 1049 | "519 U_9342495\n", 1050 | "520 U_167463\n", 1051 | "521 U_9473646\n", 1052 | "522 U_9342607\n", 1053 | "523 U_10260240\n", 1054 | "524 U_10260302\n", 1055 | "525 U_9604968\n", 1056 | "526 U_8425417\n", 1057 | "527 U_2920416\n", 1058 | "528 U_9474140\n", 1059 | "529 U_9605291\n", 1060 | "530 U_9605413\n", 1061 | "531 U_8425772\n", 1062 | "532 U_8163794\n", 1063 | "533 U_2789956\n", 1064 | "534 U_6722245\n", 1065 | "535 U_9343723\n", 1066 | "536 U_8033012\n", 1067 | "537 U_9343753\n", 1068 | "538 U_8164135\n", 1069 | "539 U_3707692\n", 1070 | "540 U_9343825\n", 1071 | "541 U_6329173\n", 1072 | "542 U_9212847\n", 1073 | "543 U_9343920\n", 1074 | "544 U_6853675\n", 1075 | "545 U_1086859\n", 1076 | "546 U_10261917\n", 1077 | "547 U_9475565\n", 1078 | "548 U_10130964\n", 1079 | "549 U_10393232\n", 1080 | "550 U_8165134\n", 1081 | "551 U_9475904\n", 1082 | "552 U_9082736\n", 1083 | "553 U_2529282\n", 1084 | "554 U_1087518\n", 1085 | "555 U_8427659\n", 1086 | "556 U_6985874\n", 1087 | "557 U_9869468\n", 1088 | "558 U_8165656\n", 1089 | "559 U_10525371\n", 1090 | "560 U_1088215\n", 1091 | "561 U_7773078\n", 1092 | "562 U_10918837\n", 1093 | "563 U_9739242\n", 1094 | "564 U_10132622\n", 1095 | "565 U_9477285\n", 1096 | "566 U_9477314\n", 1097 | "567 U_8035559\n", 1098 | "568 U_9477438\n", 1099 | "569 U_9477439\n", 1100 | "570 U_10001784\n", 1101 | "571 U_4496766\n", 1102 | "572 U_10657186\n", 1103 | "573 U_10657206\n", 1104 | "574 U_7118370\n", 1105 | "575 U_9215566\n", 1106 | "576 U_8953546\n", 1107 | "577 U_8822481\n", 1108 | "578 U_9739989\n", 1109 | "579 U_3710693\n", 1110 | "580 U_40759\n", 1111 | "581 U_9084882\n", 1112 | "582 U_4497370\n", 1113 | "583 U_3710973\n", 1114 | "584 U_2924547\n", 1115 | "585 U_7118909\n", 1116 | "586 U_6594677\n", 1117 | "587 U_5808305\n", 1118 | "588 U_8429779\n", 1119 | "589 U_11575521\n", 1120 | "590 U_6463829\n", 1121 | "591 U_10658174\n", 1122 | "592 U_10527116\n", 1123 | "593 U_6988220\n", 1124 | "594 U_6988235\n", 1125 | "595 U_9347540\n", 1126 | "596 U_8823399\n", 1127 | "597 U_6988539\n", 1128 | "598 U_6333302\n", 1129 | "599 U_8692614\n", 1130 | "600 U_8430489\n", 1131 | "601 U_8430524\n" 1132 | ] 1133 | }, 1134 | { 1135 | "name": "stdout", 1136 | "output_type": "stream", 1137 | "text": [ 1138 | "602 U_7775166\n", 1139 | "603 U_10527848\n", 1140 | "604 U_2925783\n", 1141 | "605 U_10659101\n", 1142 | "606 U_6858023\n", 1143 | "607 U_9479624\n", 1144 | "608 U_6465013\n", 1145 | "609 U_42572\n", 1146 | "610 U_8693356\n", 1147 | "611 U_6596249\n", 1148 | "612 U_8431369\n", 1149 | "613 U_3188531\n", 1150 | "614 U_3188589\n", 1151 | "615 U_42873\n", 1152 | "616 U_11970467\n", 1153 | "617 U_9349249\n", 1154 | "618 U_9873612\n", 1155 | "619 U_9480410\n", 1156 | "620 U_2926850\n", 1157 | "621 U_174354\n", 1158 | "622 U_2664762\n", 1159 | "623 U_7121326\n", 1160 | "624 U_10529324\n", 1161 | "625 U_8170036\n", 1162 | "626 U_9743116\n", 1163 | "627 U_8694545\n", 1164 | "628 U_10529590\n", 1165 | "629 U_8563608\n", 1166 | "630 U_8039648\n", 1167 | "631 U_6597899\n", 1168 | "632 U_3190078\n", 1169 | "633 U_10923386\n", 1170 | "634 U_9743796\n", 1171 | "635 U_8564164\n", 1172 | "636 U_9612820\n", 1173 | "637 U_8171130\n", 1174 | "638 U_4632368\n", 1175 | "639 U_10661683\n", 1176 | "640 U_9482194\n", 1177 | "641 U_9744338\n", 1178 | "642 U_9351154\n", 1179 | "643 U_8040502\n", 1180 | "644 U_2142347\n", 1181 | "645 U_10662117\n", 1182 | "646 U_3191111\n", 1183 | "647 U_8958324\n", 1184 | "648 U_9220499\n", 1185 | "649 U_45524\n", 1186 | "650 U_10531300\n", 1187 | "651 U_176670\n", 1188 | "652 U_9613872\n", 1189 | "653 U_8172111\n", 1190 | "654 U_8172176\n", 1191 | "655 U_176791\n", 1192 | "656 U_9614276\n", 1193 | "657 U_9483456\n", 1194 | "658 U_9745828\n", 1195 | "659 U_701874\n", 1196 | "660 U_8435188\n", 1197 | "661 U_46614\n", 1198 | "662 U_9483871\n", 1199 | "663 U_177794\n", 1200 | "664 U_2668169\n", 1201 | "665 U_10532597\n", 1202 | "666 U_8435535\n", 1203 | "667 U_440145\n", 1204 | "668 U_2930527\n", 1205 | "669 U_8173485\n", 1206 | "670 U_10663900\n", 1207 | "671 U_3061748\n", 1208 | "672 U_6731860\n", 1209 | "673 U_7125116\n", 1210 | "674 U_3193060\n", 1211 | "675 U_11581697\n", 1212 | "676 U_8960353\n", 1213 | "677 U_8960427\n", 1214 | "678 U_10140155\n", 1215 | "679 U_6732309\n", 1216 | "680 U_6863384\n", 1217 | "681 U_1096406\n", 1218 | "682 U_8043252\n", 1219 | "683 U_9222924\n", 1220 | "684 U_8043416\n", 1221 | "685 U_5421992\n", 1222 | "686 U_9092160\n", 1223 | "687 U_6732877\n", 1224 | "688 U_9354421\n", 1225 | "689 U_8568165\n", 1226 | "690 U_7781760\n", 1227 | "691 U_703891\n", 1228 | "692 U_8044021\n", 1229 | "693 U_10534420\n", 1230 | "694 U_10534480\n", 1231 | "695 U_704155\n", 1232 | "696 U_9092836\n", 1233 | "697 U_10403558\n", 1234 | "698 U_6602536\n", 1235 | "699 U_8568618\n", 1236 | "700 U_8437576\n", 1237 | "701 U_9879582\n", 1238 | "702 U_9617539\n", 1239 | "703 U_9617548\n", 1240 | "704 U_10535188\n", 1241 | "705 U_1098031\n", 1242 | "706 U_704857\n", 1243 | "707 U_2933092\n", 1244 | "708 U_2671027\n", 1245 | "709 U_8045013\n", 1246 | "710 U_8438233\n", 1247 | "711 U_9486813\n", 1248 | "712 U_7127520\n", 1249 | "713 U_9486824\n", 1250 | "714 U_9617994\n", 1251 | "715 U_6603371\n", 1252 | "716 U_9486962\n", 1253 | "717 U_7520885\n", 1254 | "718 U_7521085\n", 1255 | "719 U_9880396\n", 1256 | "720 U_9225037\n", 1257 | "721 U_2540372\n", 1258 | "722 U_1491803\n", 1259 | "723 U_4375397\n", 1260 | "724 U_7127921\n", 1261 | "725 U_10404836\n", 1262 | "726 U_7652338\n", 1263 | "727 U_2933904\n", 1264 | "728 U_6735001\n", 1265 | "729 U_3065049\n", 1266 | "730 U_9618653\n", 1267 | "731 U_1099001\n", 1268 | "732 U_2671865\n", 1269 | "733 U_9487730\n", 1270 | "734 U_11322856\n", 1271 | "735 U_9487870\n", 1272 | "736 U_9619059\n", 1273 | "737 U_8963842\n", 1274 | "738 U_8177422\n", 1275 | "739 U_9881362\n", 1276 | "740 U_575258\n", 1277 | "741 U_182066\n", 1278 | "742 U_9488192\n", 1279 | "743 U_10274839\n", 1280 | "744 U_9488441\n", 1281 | "745 U_11585682\n", 1282 | "746 U_706716\n", 1283 | "747 U_6998188\n", 1284 | "748 U_7522482\n", 1285 | "749 U_8571186\n", 1286 | "750 U_9488757\n", 1287 | "751 U_10537401\n", 1288 | "752 U_10668477\n", 1289 | "753 U_10537568\n", 1290 | "754 U_1493633\n", 1291 | "755 U_1493687\n", 1292 | "756 U_10275518\n", 1293 | "757 U_10799833\n", 1294 | "758 U_707301\n", 1295 | "759 U_9489218\n", 1296 | "760 U_7785510\n", 1297 | "761 U_6737094\n", 1298 | "762 U_9489625\n", 1299 | "763 U_7523582\n", 1300 | "764 U_576852\n", 1301 | "765 U_6737391\n", 1302 | "766 U_2543264\n", 1303 | "767 U_7786344\n", 1304 | "768 U_7524201\n", 1305 | "769 U_6606700\n", 1306 | "770 U_2281434\n", 1307 | "771 U_708750\n", 1308 | "772 U_10277094\n", 1309 | "773 U_7131369\n", 1310 | "774 U_7524604\n", 1311 | "775 U_6869364\n", 1312 | "776 U_10539385\n", 1313 | "777 U_9884041\n", 1314 | "778 U_2806207\n", 1315 | "779 U_10146397\n", 1316 | "780 U_7000752\n", 1317 | "781 U_7131834\n", 1318 | "782 U_7000786\n", 1319 | "783 U_8966905\n", 1320 | "784 U_8705049\n", 1321 | "785 U_9884754\n", 1322 | "786 U_2282626\n", 1323 | "787 U_7394508\n", 1324 | "788 U_9754079\n", 1325 | "789 U_2545143\n", 1326 | "790 U_7394843\n", 1327 | "791 U_7132732\n", 1328 | "792 U_9754186\n", 1329 | "793 U_7525966\n", 1330 | "794 U_10278525\n", 1331 | "795 U_710302\n", 1332 | "796 U_186190\n", 1333 | "797 U_2807641\n", 1334 | "798 U_10409908\n", 1335 | "799 U_8705994\n", 1336 | "800 U_11589618\n", 1337 | "801 U_9754617\n", 1338 | "802 U_10278947\n", 1339 | "803 U_7395518\n", 1340 | "804 U_9623890\n", 1341 | "805 U_7395729\n", 1342 | "806 U_7133621\n", 1343 | "807 U_5822905\n", 1344 | "808 U_8575518\n", 1345 | "809 U_6871650\n", 1346 | "810 U_8706664\n", 1347 | "811 U_7920295\n", 1348 | "812 U_7920398\n", 1349 | "813 U_7789345\n", 1350 | "814 U_711540\n", 1351 | "815 U_7658409\n", 1352 | "816 U_10804138\n", 1353 | "817 U_8444867\n", 1354 | "818 U_7134224\n", 1355 | "819 U_2022449\n", 1356 | "820 U_10804324\n", 1357 | "821 U_9755788\n", 1358 | "822 U_10017973\n", 1359 | "823 U_9493720\n", 1360 | "824 U_8576242\n", 1361 | "825 U_9755922\n", 1362 | "826 U_9493784\n", 1363 | "827 U_10542372\n", 1364 | "828 U_10804533\n", 1365 | "829 U_8838823\n", 1366 | "830 U_6610606\n", 1367 | "831 U_7003879\n", 1368 | "832 U_3202980\n", 1369 | "833 U_10412108\n", 1370 | "834 U_9494918\n", 1371 | "835 U_9363858\n", 1372 | "836 U_9494986\n", 1373 | "837 U_8446431\n", 1374 | "838 U_8446525\n", 1375 | "839 U_3072629\n", 1376 | "840 U_713453\n", 1377 | "841 U_8577796\n", 1378 | "842 U_10544000\n", 1379 | "843 U_10544045\n", 1380 | "844 U_9495523\n", 1381 | "845 U_10413112\n", 1382 | "846 U_9757851\n", 1383 | "847 U_7005346\n", 1384 | "848 U_451797\n", 1385 | "849 U_9364708\n", 1386 | "850 U_8447266\n", 1387 | "851 U_10806672\n", 1388 | "852 U_9889245\n", 1389 | "853 U_190062\n", 1390 | "854 U_6612595\n", 1391 | "855 U_11724418\n", 1392 | "856 U_2549446\n", 1393 | "857 U_5171092\n", 1394 | "858 U_1107874\n", 1395 | "859 U_8185776\n", 1396 | "860 U_7137200\n", 1397 | "861 U_8710085\n", 1398 | "862 U_2811848\n", 1399 | "863 U_8579046\n", 1400 | "864 U_7792618\n", 1401 | "865 U_7137263\n", 1402 | "866 U_1107947\n", 1403 | "867 U_9496667\n", 1404 | "868 U_9889935\n", 1405 | "869 U_9889974\n", 1406 | "870 U_10021069\n", 1407 | "871 U_11331870\n", 1408 | "872 U_9365858\n", 1409 | "873 U_5302908\n", 1410 | "874 U_10676864\n", 1411 | "875 U_60035\n", 1412 | "876 U_7793338\n", 1413 | "877 U_10545862\n", 1414 | "878 U_9497307\n", 1415 | "879 U_6089446\n", 1416 | "880 U_7793417\n", 1417 | "881 U_9890587\n", 1418 | "882 U_1895216\n", 1419 | "883 U_9890733\n", 1420 | "884 U_10546097\n", 1421 | "885 U_8448965\n", 1422 | "886 U_10415141\n", 1423 | "887 U_7007318\n", 1424 | "888 U_8449258\n", 1425 | "889 U_6614252\n", 1426 | "890 U_716051\n", 1427 | "891 U_1633557\n", 1428 | "892 U_7794168\n", 1429 | "893 U_10284540\n", 1430 | "894 U_6876773\n", 1431 | "895 U_7663212\n", 1432 | "896 U_8974044\n", 1433 | "897 U_7794432\n", 1434 | "898 U_9891721\n", 1435 | "899 U_9498506\n", 1436 | "900 U_8581185\n", 1437 | "901 U_10678675\n", 1438 | "902 U_7795096\n", 1439 | "903 U_8188581\n", 1440 | "904 U_8582070\n", 1441 | "905 U_10810316\n", 1442 | "906 U_8975316\n", 1443 | "907 U_8189007\n", 1444 | "908 U_7140489\n", 1445 | "909 U_455858\n", 1446 | "910 U_10810668\n", 1447 | "911 U_10024265\n", 1448 | "912 U_9500060\n", 1449 | "913 U_3864008\n", 1450 | "914 U_7665142\n", 1451 | "915 U_8189474\n", 1452 | "916 U_11597362\n", 1453 | "917 U_10548829\n", 1454 | "918 U_8451723\n", 1455 | "919 U_9500373\n", 1456 | "920 U_6354794\n", 1457 | "921 U_3209347\n", 1458 | "922 U_6879450\n", 1459 | "923 U_10811625\n", 1460 | "924 U_7141726\n", 1461 | "925 U_6355351\n", 1462 | "926 U_1112485\n", 1463 | "927 U_6355400\n", 1464 | "928 U_9763352\n", 1465 | "929 U_8190494\n", 1466 | "930 U_7666284\n", 1467 | "931 U_6617754\n", 1468 | "932 U_4520614\n", 1469 | "933 U_10549964\n", 1470 | "934 U_8977150\n", 1471 | "935 U_6880080\n", 1472 | "936 U_8977339\n", 1473 | "937 U_7142452\n", 1474 | "938 U_9239673\n", 1475 | "939 U_9370765\n", 1476 | "940 U_6225057\n", 1477 | "941 U_8060116\n", 1478 | "942 U_64739\n", 1479 | "943 U_1113330\n", 1480 | "944 U_10812792\n", 1481 | "945 U_9895410\n", 1482 | "946 U_327163\n", 1483 | "947 U_327216\n", 1484 | "948 U_7798419\n", 1485 | "949 U_4652788\n", 1486 | "950 U_6618896\n", 1487 | "951 U_7012191\n", 1488 | "952 U_2686840\n", 1489 | "953 U_8716276\n", 1490 | "954 U_9502809\n", 1491 | "955 U_8061112\n", 1492 | "956 U_6750397\n", 1493 | "957 U_10551512\n", 1494 | "958 U_4391197\n", 1495 | "959 U_10813844\n", 1496 | "960 U_65946\n", 1497 | "961 U_6881852\n", 1498 | "962 U_10420823\n", 1499 | "963 U_10945164\n", 1500 | "964 U_721557\n", 1501 | "965 U_8454842\n", 1502 | "966 U_11469511\n", 1503 | "967 U_7144186\n", 1504 | "968 U_7668506\n", 1505 | "969 U_9896772\n", 1506 | "970 U_7406489\n", 1507 | "971 U_8979364\n", 1508 | "972 U_10159022\n", 1509 | "973 U_5964752\n", 1510 | "974 U_8062031\n", 1511 | "975 U_7669097\n", 1512 | "976 U_10290575\n", 1513 | "977 U_8193447\n", 1514 | "978 U_8062403\n", 1515 | "979 U_8979909\n", 1516 | "980 U_10159602\n", 1517 | "981 U_9504307\n", 1518 | "982 U_8193747\n", 1519 | "983 U_591588\n", 1520 | "984 U_2819908\n", 1521 | "985 U_8849245\n", 1522 | "986 U_67509\n", 1523 | "987 U_9111540\n", 1524 | "988 U_7800829\n", 1525 | "989 U_7800834\n", 1526 | "990 U_9111586\n", 1527 | "991 U_10553404\n", 1528 | "992 U_9898050\n", 1529 | "993 U_7538853\n", 1530 | "994 U_11077818\n", 1531 | "995 U_2165262\n", 1532 | "996 U_7015015\n", 1533 | "997 U_9636475\n", 1534 | "998 U_723652\n", 1535 | "999 U_8457007\n", 1536 | "1000 U_723821\n", 1537 | "1001 U_7277439\n", 1538 | "1002 U_6884256\n", 1539 | "1003 U_7408600\n", 1540 | "1004 U_7539864\n", 1541 | "1005 U_9505962\n", 1542 | "1006 U_6229193\n", 1543 | "1007 U_7277796\n", 1544 | "1008 U_7015681\n", 1545 | "1009 U_9506298\n", 1546 | "1010 U_10685994\n", 1547 | "1011 U_8588906\n", 1548 | "1012 U_462458\n", 1549 | "1013 U_9506476\n", 1550 | "1014 U_8457973\n", 1551 | "1015 U_7540489\n", 1552 | "1016 U_11210514\n", 1553 | "1017 U_9113489\n", 1554 | "1018 U_69521\n", 1555 | "1019 U_11866029\n", 1556 | "1020 U_7278542\n", 1557 | "1021 U_462810\n", 1558 | "1022 U_7147495\n", 1559 | "1023 U_9506824\n", 1560 | "1024 U_200765\n", 1561 | "1025 U_10817691\n", 1562 | "1026 U_10817709\n", 1563 | "1027 U_200949\n", 1564 | "1028 U_8589574\n", 1565 | "1029 U_8982840\n", 1566 | "1030 U_8458667\n", 1567 | "1031 U_10031580\n", 1568 | "1032 U_9638459\n", 1569 | "1033 U_6230626\n", 1570 | "1034 U_9900746\n", 1571 | "1035 U_10818251\n", 1572 | "1036 U_332540\n", 1573 | "1037 U_9900831\n", 1574 | "1038 U_7541546\n", 1575 | "1039 U_6886237\n", 1576 | "1040 U_6624159\n", 1577 | "1041 U_6624190\n", 1578 | "1042 U_594888\n", 1579 | "1043 U_9638876\n", 1580 | "1044 U_70713\n", 1581 | "1045 U_10949723\n", 1582 | "1046 U_9508062\n", 1583 | "1047 U_70928\n", 1584 | "1048 U_10294727\n", 1585 | "1049 U_71132\n", 1586 | "1050 U_8590846\n", 1587 | "1051 U_10032663\n", 1588 | "1052 U_7542348\n", 1589 | "1053 U_10032768\n", 1590 | "1054 U_10688161\n", 1591 | "1055 U_8460041\n", 1592 | "1056 U_9901887\n", 1593 | "1057 U_2824110\n", 1594 | "1058 U_10557367\n", 1595 | "1059 U_7542889\n", 1596 | "1060 U_9115762\n", 1597 | "1061 U_8067245\n", 1598 | "1062 U_11081931\n", 1599 | "1063 U_9115982\n", 1600 | "1064 U_7018897\n", 1601 | "1065 U_6887834\n", 1602 | "1066 U_7543227\n", 1603 | "1067 U_2562509\n", 1604 | "1068 U_10557939\n", 1605 | "1069 U_9640458\n", 1606 | "1070 U_7150182\n", 1607 | "1071 U_5315203\n", 1608 | "1072 U_8854204\n", 1609 | "1073 U_8067819\n", 1610 | "1074 U_7150348\n", 1611 | "1075 U_72542\n", 1612 | "1076 U_10034098\n", 1613 | "1077 U_7805900\n", 1614 | "1078 U_7150581\n", 1615 | "1079 U_10296364\n", 1616 | "1080 U_7150672\n", 1617 | "1081 U_11345043\n", 1618 | "1082 U_8854703\n", 1619 | "1083 U_10558655\n", 1620 | "1084 U_7150845\n", 1621 | "1085 U_9379155\n", 1622 | "1086 U_8068448\n", 1623 | "1087 U_9641349\n", 1624 | "1088 U_7019923\n", 1625 | "1089 U_8068519\n", 1626 | "1090 U_7544246\n", 1627 | "1091 U_9772481\n", 1628 | "1092 U_335346\n", 1629 | "1093 U_466485\n", 1630 | "1094 U_8855272\n", 1631 | "1095 U_597779\n", 1632 | "1096 U_8724245\n", 1633 | "1097 U_9117477\n", 1634 | "1098 U_7675805\n", 1635 | "1099 U_8855463\n", 1636 | "1100 U_8200175\n", 1637 | "1101 U_4268045\n", 1638 | "1102 U_8986710\n", 1639 | "1103 U_7544937\n", 1640 | "1104 U_2564243\n", 1641 | "1105 U_2826449\n", 1642 | "1106 U_8200453\n", 1643 | "1107 U_8855879\n", 1644 | "1108 U_7545231\n", 1645 | "1109 U_7545312\n", 1646 | "1110 U_6889970\n", 1647 | "1111 U_7414318\n", 1648 | "1112 U_7807563\n", 1649 | "1113 U_8593997\n", 1650 | "1114 U_11346558\n", 1651 | "1115 U_74415\n", 1652 | "1116 U_9380588\n", 1653 | "1117 U_10560299\n", 1654 | "1118 U_9773875\n", 1655 | "1119 U_8725396\n", 1656 | "1120 U_9118645\n", 1657 | "1121 U_9249760\n", 1658 | "1122 U_730135\n", 1659 | "1123 U_8856642\n", 1660 | "1124 U_8725958\n", 1661 | "1125 U_8857202\n", 1662 | "1126 U_7153272\n", 1663 | "1127 U_6629028\n", 1664 | "1128 U_8070907\n", 1665 | "1129 U_8988418\n", 1666 | "1130 U_7546727\n", 1667 | "1131 U_206802\n", 1668 | "1132 U_8071186\n", 1669 | "1133 U_6629551\n", 1670 | "1134 U_338352\n", 1671 | "1135 U_9906678\n", 1672 | "1136 U_7678490\n", 1673 | "1137 U_76324\n", 1674 | "1138 U_9120305\n", 1675 | "1139 U_8727154\n", 1676 | "1140 U_207499\n", 1677 | "1141 U_7547659\n", 1678 | "1142 U_10300171\n", 1679 | "1143 U_7154496\n", 1680 | "1144 U_11610965\n", 1681 | "1145 U_10693493\n", 1682 | "1146 U_10300470\n", 1683 | "1147 U_9383008\n", 1684 | "1148 U_9383108\n", 1685 | "1149 U_8727824\n", 1686 | "1150 U_732480\n", 1687 | "1151 U_10563015\n", 1688 | "1152 U_8728050\n", 1689 | "1153 U_7024115\n", 1690 | "1154 U_2960897\n", 1691 | "1155 U_9383604\n", 1692 | "1156 U_7417537\n", 1693 | "1157 U_6893340\n", 1694 | "1158 U_2437113\n", 1695 | "1159 U_2568229\n", 1696 | "1160 U_10563656\n", 1697 | "1161 U_10301531\n", 1698 | "1162 U_8859766\n", 1699 | "1163 U_340091\n", 1700 | "1164 U_10039545\n", 1701 | "1165 U_2306352\n", 1702 | "1166 U_209219\n", 1703 | "1167 U_8991096\n", 1704 | "1168 U_2830770\n", 1705 | "1169 U_10039901\n", 1706 | "1170 U_8860438\n", 1707 | "1171 U_2569068\n", 1708 | "1172 U_8991611\n", 1709 | "1173 U_8205250\n", 1710 | "1174 U_2569219\n", 1711 | "1175 U_7025699\n", 1712 | "1176 U_603187\n", 1713 | "1177 U_341060\n", 1714 | "1178 U_9385171\n", 1715 | "1179 U_7419095\n" 1716 | ] 1717 | }, 1718 | { 1719 | "name": "stdout", 1720 | "output_type": "stream", 1721 | "text": [ 1722 | "1180 U_7157037\n", 1723 | "1181 U_2831733\n", 1724 | "1182 U_7026042\n", 1725 | "1183 U_8730169\n", 1726 | "1184 U_7026263\n", 1727 | "1185 U_7419488\n", 1728 | "1186 U_5191407\n", 1729 | "1187 U_8730445\n", 1730 | "1188 U_7026528\n", 1731 | "1189 U_11745166\n", 1732 | "1190 U_7419926\n", 1733 | "1191 U_7420043\n", 1734 | "1192 U_7157955\n", 1735 | "1193 U_7420125\n", 1736 | "1194 U_8730963\n", 1737 | "1195 U_11483488\n", 1738 | "1196 U_10041727\n", 1739 | "1197 U_11614697\n", 1740 | "1198 U_7944768\n", 1741 | "1199 U_7420502\n", 1742 | "1200 U_9517656\n", 1743 | "1201 U_8731279\n", 1744 | "1202 U_8731303\n", 1745 | "1203 U_5323476\n", 1746 | "1204 U_8207103\n", 1747 | "1205 U_10435341\n", 1748 | "1206 U_10042137\n", 1749 | "1207 U_7027496\n", 1750 | "1208 U_11615050\n", 1751 | "1209 U_7420893\n", 1752 | "1210 U_8731643\n", 1753 | "1211 U_11353130\n", 1754 | "1212 U_6241394\n", 1755 | "1213 U_81026\n", 1756 | "1214 U_7945384\n", 1757 | "1215 U_7945447\n", 1758 | "1216 U_343395\n", 1759 | "1217 U_6634872\n", 1760 | "1218 U_7421317\n", 1761 | "1219 U_9518586\n", 1762 | "1220 U_8207910\n", 1763 | "1221 U_6766122\n", 1764 | "1222 U_9911868\n", 1765 | "1223 U_9518687\n", 1766 | "1224 U_6897536\n", 1767 | "1225 U_4276351\n", 1768 | "1226 U_10436930\n", 1769 | "1227 U_2965887\n", 1770 | "1228 U_9257354\n", 1771 | "1229 U_8208845\n", 1772 | "1230 U_10306066\n", 1773 | "1231 U_9257540\n", 1774 | "1232 U_7029366\n", 1775 | "1233 U_8995447\n", 1776 | "1234 U_7029413\n", 1777 | "1235 U_8733453\n", 1778 | "1236 U_8078172\n", 1779 | "1237 U_11486064\n", 1780 | "1238 U_10568584\n", 1781 | "1239 U_7029753\n", 1782 | "1240 U_8733708\n", 1783 | "1241 U_214149\n", 1784 | "1242 U_214163\n", 1785 | "1243 U_11486382\n", 1786 | "1244 U_8078601\n", 1787 | "1245 U_738693\n", 1788 | "1246 U_8078765\n", 1789 | "1247 U_6505939\n", 1790 | "1248 U_10044899\n", 1791 | "1249 U_7423473\n", 1792 | "1250 U_11486824\n", 1793 | "1251 U_8079018\n", 1794 | "1252 U_9651886\n", 1795 | "1253 U_10045141\n", 1796 | "1254 U_8210188\n", 1797 | "1255 U_608048\n", 1798 | "1256 U_9521021\n", 1799 | "1257 U_83864\n", 1800 | "1258 U_9521175\n", 1801 | "1259 U_6899805\n", 1802 | "1260 U_1788029\n", 1803 | "1261 U_7948549\n", 1804 | "1262 U_9783571\n", 1805 | "1263 U_6900124\n", 1806 | "1264 U_739772\n", 1807 | "1265 U_8997439\n", 1808 | "1266 U_6507072\n", 1809 | "1267 U_6900340\n", 1810 | "1268 U_8997531\n", 1811 | "1269 U_10439350\n", 1812 | "1270 U_2050937\n", 1813 | "1271 U_8997833\n", 1814 | "1272 U_5983221\n", 1815 | "1273 U_5065922\n", 1816 | "1274 U_740602\n", 1817 | "1275 U_8998143\n", 1818 | "1276 U_9653510\n", 1819 | "1277 U_2575753\n", 1820 | "1278 U_10702247\n", 1821 | "1279 U_9391537\n", 1822 | "1280 U_9260540\n", 1823 | "1281 U_10047055\n", 1824 | "1282 U_6377043\n", 1825 | "1283 U_10047099\n", 1826 | "1284 U_2576065\n", 1827 | "1285 U_8474357\n", 1828 | "1286 U_7950070\n", 1829 | "1287 U_478998\n", 1830 | "1288 U_479034\n", 1831 | "1289 U_9654076\n", 1832 | "1290 U_3231714\n", 1833 | "1291 U_8867819\n", 1834 | "1292 U_9654272\n", 1835 | "1293 U_8474788\n", 1836 | "1294 U_11096238\n", 1837 | "1295 U_7426249\n", 1838 | "1296 U_6901979\n", 1839 | "1297 U_10703181\n", 1840 | "1298 U_9654628\n", 1841 | "1299 U_610664\n", 1842 | "1300 U_10834354\n", 1843 | "1301 U_9916866\n", 1844 | "1302 U_9654857\n", 1845 | "1303 U_5198426\n", 1846 | "1304 U_3625584\n", 1847 | "1305 U_7033564\n", 1848 | "1306 U_6902532\n", 1849 | "1307 U_8999870\n", 1850 | "1308 U_7951342\n", 1851 | "1309 U_6771709\n", 1852 | "1310 U_10573057\n", 1853 | "1311 U_11490618\n", 1854 | "1312 U_9917859\n", 1855 | "1313 U_7034286\n", 1856 | "1314 U_2971263\n", 1857 | "1315 U_349938\n", 1858 | "1316 U_1529588\n", 1859 | "1317 U_7952126\n", 1860 | "1318 U_8476544\n", 1861 | "1319 U_10573723\n", 1862 | "1320 U_6379526\n", 1863 | "1321 U_8738828\n", 1864 | "1322 U_1792147\n", 1865 | "1323 U_219439\n", 1866 | "1324 U_9918833\n", 1867 | "1325 U_6773138\n", 1868 | "1326 U_8083899\n", 1869 | "1327 U_9918919\n", 1870 | "1328 U_8083926\n", 1871 | "1329 U_10836516\n", 1872 | "1330 U_7035440\n", 1873 | "1331 U_9263683\n", 1874 | "1332 U_8608624\n", 1875 | "1333 U_10574743\n", 1876 | "1334 U_10574836\n", 1877 | "1335 U_8870920\n", 1878 | "1336 U_9002054\n", 1879 | "1337 U_8871159\n", 1880 | "1338 U_6905132\n", 1881 | "1339 U_6511921\n", 1882 | "1340 U_10050868\n", 1883 | "1341 U_3235131\n", 1884 | "1342 U_9395535\n", 1885 | "1343 U_11623802\n", 1886 | "1344 U_1531285\n", 1887 | "1345 U_9657851\n", 1888 | "1346 U_9264709\n", 1889 | "1347 U_8085071\n", 1890 | "1348 U_6905461\n", 1891 | "1349 U_11886205\n", 1892 | "1350 U_8478450\n", 1893 | "1351 U_9658183\n", 1894 | "1352 U_2842641\n", 1895 | "1353 U_90135\n", 1896 | "1354 U_8872024\n", 1897 | "1355 U_7037035\n", 1898 | "1356 U_352449\n", 1899 | "1357 U_6643919\n", 1900 | "1358 U_7037169\n", 1901 | "1359 U_9003353\n", 1902 | "1360 U_10314256\n", 1903 | "1361 U_7692834\n", 1904 | "1362 U_9921113\n", 1905 | "1363 U_90920\n", 1906 | "1364 U_7037854\n", 1907 | "1365 U_7431138\n", 1908 | "1366 U_9921526\n", 1909 | "1367 U_9004107\n", 1910 | "1368 U_9004116\n", 1911 | "1369 U_9004121\n", 1912 | "1370 U_7693402\n", 1913 | "1371 U_8873270\n", 1914 | "1372 U_9659821\n", 1915 | "1373 U_8480179\n", 1916 | "1374 U_7693796\n", 1917 | "1375 U_6907491\n", 1918 | "1376 U_6252157\n", 1919 | "1377 U_7563022\n", 1920 | "1378 U_10446807\n", 1921 | "1379 U_6907925\n", 1922 | "1380 U_6514765\n", 1923 | "1381 U_616597\n", 1924 | "1382 U_4548853\n", 1925 | "1383 U_7563560\n", 1926 | "1384 U_10316099\n", 1927 | "1385 U_9660786\n", 1928 | "1386 U_7825812\n", 1929 | "1387 U_1665449\n", 1930 | "1388 U_6908468\n", 1931 | "1389 U_8481374\n", 1932 | "1390 U_10578619\n", 1933 | "1391 U_7171019\n", 1934 | "1392 U_10447850\n", 1935 | "1393 U_8481829\n", 1936 | "1394 U_617511\n", 1937 | "1395 U_8481872\n", 1938 | "1396 U_7171194\n", 1939 | "1397 U_224437\n", 1940 | "1398 U_6384845\n", 1941 | "1399 U_10317165\n", 1942 | "1400 U_9661918\n", 1943 | "1401 U_4550145\n", 1944 | "1402 U_11365932\n", 1945 | "1403 U_10448491\n", 1946 | "1404 U_10579615\n", 1947 | "1405 U_2322198\n", 1948 | "1406 U_9662258\n", 1949 | "1407 U_10448771\n", 1950 | "1408 U_7172044\n", 1951 | "1409 U_10711020\n", 1952 | "1410 U_6516743\n", 1953 | "1411 U_8482868\n", 1954 | "1412 U_10317878\n", 1955 | "1413 U_7958611\n", 1956 | "1414 U_9138288\n", 1957 | "1415 U_6123634\n", 1958 | "1416 U_9531530\n", 1959 | "1417 U_9662632\n", 1960 | "1418 U_10580176\n", 1961 | "1419 U_225500\n", 1962 | "1420 U_6123902\n", 1963 | "1421 U_9794102\n", 1964 | "1422 U_7434900\n", 1965 | "1423 U_7172851\n", 1966 | "1424 U_488235\n", 1967 | "1425 U_6648634\n", 1968 | "1426 U_8483681\n", 1969 | "1427 U_6517692\n", 1970 | "1428 U_7959571\n", 1971 | "1429 U_10581063\n", 1972 | "1430 U_9794648\n", 1973 | "1431 U_2716762\n", 1974 | "1432 U_8483979\n", 1975 | "1433 U_8746192\n", 1976 | "1434 U_8352997\n", 1977 | "1435 U_7435515\n", 1978 | "1436 U_226588\n", 1979 | "1437 U_3241246\n", 1980 | "1438 U_8746321\n", 1981 | "1439 U_10843515\n", 1982 | "1440 U_10581397\n", 1983 | "1441 U_10843601\n", 1984 | "1442 U_357856\n", 1985 | "1443 U_9532928\n", 1986 | "1444 U_9533038\n", 1987 | "1445 U_8353412\n", 1988 | "1446 U_8091297\n", 1989 | "1447 U_8877746\n", 1990 | "1448 U_10188495\n", 1991 | "1449 U_7829207\n", 1992 | "1450 U_9926465\n", 1993 | "1451 U_8746847\n", 1994 | "1452 U_8878002\n", 1995 | "1453 U_9009178\n", 1996 | "1454 U_9533479\n", 1997 | "1455 U_7174200\n", 1998 | "1456 U_10582160\n", 1999 | "1457 U_7829811\n", 2000 | "1458 U_10451278\n", 2001 | "1459 U_7174483\n", 2002 | "1460 U_9271673\n", 2003 | "1461 U_2455968\n", 2004 | "1462 U_10320360\n", 2005 | "1463 U_2849312\n", 2006 | "1464 U_6257187\n", 2007 | "1465 U_9534134\n", 2008 | "1466 U_10451645\n", 2009 | "1467 U_7568093\n", 2010 | "1468 U_8878859\n", 2011 | "1469 U_6650679\n", 2012 | "1470 U_3767127\n", 2013 | "1471 U_7699320\n", 2014 | "1472 U_8747910\n", 2015 | "1473 U_6519692\n", 2016 | "1474 U_97359\n", 2017 | "1475 U_9272441\n", 2018 | "1476 U_9927801\n", 2019 | "1477 U_10452170\n", 2020 | "1478 U_8748253\n", 2021 | "1479 U_2457088\n", 2022 | "1480 U_5340748\n", 2023 | "1481 U_9535057\n", 2024 | "1482 U_10583694\n", 2025 | "1483 U_9273031\n", 2026 | "1484 U_8879824\n", 2027 | "1485 U_9273044\n", 2028 | "1486 U_8093487\n", 2029 | "1487 U_7044968\n", 2030 | "1488 U_7700338\n", 2031 | "1489 U_10321863\n", 2032 | "1490 U_9011145\n", 2033 | "1491 U_7831506\n", 2034 | "1492 U_10453184\n", 2035 | "1493 U_98562\n", 2036 | "1494 U_229791\n", 2037 | "1495 U_7045548\n", 2038 | "1496 U_10846690\n", 2039 | "1497 U_9535988\n", 2040 | "1498 U_5866050\n", 2041 | "1499 U_4293372\n", 2042 | "1500 U_8094526\n", 2043 | "1501 U_230216\n", 2044 | "1502 U_7177246\n", 2045 | "1503 U_9536612\n", 2046 | "1504 U_7177433\n", 2047 | "1505 U_7177465\n", 2048 | "1506 U_492860\n", 2049 | "1507 U_492861\n", 2050 | "1508 U_9012578\n", 2051 | "1509 U_6653434\n", 2052 | "1510 U_7570941\n", 2053 | "1511 U_3376660\n", 2054 | "1512 U_8488500\n", 2055 | "1513 U_7177888\n", 2056 | "1514 U_886442\n", 2057 | "1515 U_7964426\n", 2058 | "1516 U_7833466\n", 2059 | "1517 U_624517\n", 2060 | "1518 U_2721677\n", 2061 | "1519 U_10717180\n", 2062 | "1520 U_624732\n", 2063 | "1521 U_10717365\n", 2064 | "1522 U_6654207\n", 2065 | "1523 U_9537819\n", 2066 | "1524 U_7965079\n", 2067 | "1525 U_1018270\n", 2068 | "1526 U_7178711\n", 2069 | "1527 U_6261325\n", 2070 | "1528 U_7178857\n", 2071 | "1529 U_8227461\n", 2072 | "1530 U_10193612\n", 2073 | "1531 U_8489681\n", 2074 | "1532 U_8096499\n", 2075 | "1533 U_6523839\n", 2076 | "1534 U_8490011\n", 2077 | "1535 U_7048308\n", 2078 | "1536 U_7965858\n", 2079 | "1537 U_9538735\n", 2080 | "1538 U_10194128\n", 2081 | "1539 U_4295973\n", 2082 | "1540 U_10194295\n", 2083 | "1541 U_8490375\n", 2084 | "1542 U_8490433\n", 2085 | "1543 U_7572934\n", 2086 | "1544 U_8883897\n", 2087 | "1545 U_7835448\n", 2088 | "1546 U_7966750\n", 2089 | "1547 U_9015329\n", 2090 | "1548 U_9539699\n", 2091 | "1549 U_8884397\n", 2092 | "1550 U_7835865\n", 2093 | "1551 U_9539881\n", 2094 | "1552 U_7835985\n", 2095 | "1553 U_3117423\n", 2096 | "1554 U_10457848\n", 2097 | "1555 U_9802590\n", 2098 | "1556 U_7836667\n", 2099 | "1557 U_6919298\n", 2100 | "1558 U_8492384\n", 2101 | "1559 U_10196388\n", 2102 | "1560 U_6657635\n", 2103 | "1561 U_9541230\n", 2104 | "1562 U_9672337\n", 2105 | "1563 U_9541523\n", 2106 | "1564 U_628733\n", 2107 | "1565 U_9017432\n", 2108 | "1566 U_9017454\n", 2109 | "1567 U_1546439\n", 2110 | "1568 U_9672925\n", 2111 | "1569 U_6658333\n", 2112 | "1570 U_7051860\n", 2113 | "1571 U_4692591\n", 2114 | "1572 U_9935483\n", 2115 | "1573 U_6265490\n", 2116 | "1574 U_9542358\n", 2117 | "1575 U_6920979\n", 2118 | "1576 U_8493866\n", 2119 | "1577 U_9935708\n", 2120 | "1578 U_6921106\n", 2121 | "1579 U_9935779\n", 2122 | "1580 U_9411561\n", 2123 | "1581 U_6003868\n", 2124 | "1582 U_4693166\n", 2125 | "1583 U_367846\n", 2126 | "1584 U_7183697\n", 2127 | "1585 U_499083\n", 2128 | "1586 U_499176\n", 2129 | "1587 U_499200\n", 2130 | "1588 U_8494656\n", 2131 | "1589 U_7577254\n", 2132 | "1590 U_6397609\n", 2133 | "1591 U_9019061\n", 2134 | "1592 U_9412411\n", 2135 | "1593 U_7970718\n", 2136 | "1594 U_3121072\n", 2137 | "1595 U_6660129\n", 2138 | "1596 U_6135910\n", 2139 | "1597 U_7839937\n", 2140 | "1598 U_11116746\n", 2141 | "1599 U_9412876\n", 2142 | "1600 U_7184674\n", 2143 | "1601 U_8495434\n", 2144 | "1602 U_9412969\n", 2145 | "1603 U_2335099\n", 2146 | "1604 U_7840155\n", 2147 | "1605 U_8495537\n", 2148 | "1606 U_9806268\n", 2149 | "1607 U_9806294\n", 2150 | "1608 U_8102398\n", 2151 | "1609 U_9806376\n", 2152 | "1610 U_9151059\n", 2153 | "1611 U_7709292\n", 2154 | "1612 U_7185021\n", 2155 | "1613 U_10723976\n", 2156 | "1614 U_6922952\n", 2157 | "1615 U_2466566\n", 2158 | "1616 U_11903829\n", 2159 | "1617 U_6923198\n", 2160 | "1618 U_6398937\n", 2161 | "1619 U_7840732\n", 2162 | "1620 U_10200085\n", 2163 | "1621 U_7185431\n", 2164 | "1622 U_9151575\n", 2165 | "1623 U_9413806\n", 2166 | "1624 U_632010\n", 2167 | "1625 U_501068\n", 2168 | "1626 U_501113\n", 2169 | "1627 U_10593744\n", 2170 | "1628 U_9938404\n", 2171 | "1629 U_10856146\n", 2172 | "1630 U_9545446\n", 2173 | "1631 U_7579461\n", 2174 | "1632 U_9021274\n", 2175 | "1633 U_9414519\n", 2176 | "1634 U_9807751\n", 2177 | "1635 U_9021404\n", 2178 | "1636 U_6531085\n", 2179 | "1637 U_10070074\n", 2180 | "1638 U_5482687\n", 2181 | "1639 U_6662358\n", 2182 | "1640 U_7186724\n", 2183 | "1641 U_10201387\n", 2184 | "1642 U_10070381\n", 2185 | "1643 U_10463643\n", 2186 | "1644 U_7580068\n", 2187 | "1645 U_8628657\n", 2188 | "1646 U_3385787\n", 2189 | "1647 U_10201533\n", 2190 | "1648 U_5220891\n", 2191 | "1649 U_4565592\n", 2192 | "1650 U_2992803\n", 2193 | "1651 U_9808628\n", 2194 | "1652 U_10857283\n", 2195 | "1653 U_109433\n", 2196 | "1654 U_10202000\n", 2197 | "1655 U_9415626\n", 2198 | "1656 U_9415694\n", 2199 | "1657 U_502843\n", 2200 | "1658 U_2862207\n", 2201 | "1659 U_9022612\n", 2202 | "1660 U_7449927\n", 2203 | "1661 U_7843162\n", 2204 | "1662 U_6532445\n", 2205 | "1663 U_9022917\n", 2206 | "1664 U_9154002\n", 2207 | "1665 U_2993674\n", 2208 | "1666 U_10071644\n", 2209 | "1667 U_6139495\n", 2210 | "1668 U_8367765\n", 2211 | "1669 U_6925973\n", 2212 | "1670 U_7974579\n", 2213 | "1671 U_634710\n", 2214 | "1672 U_8367960\n", 2215 | "1673 U_6532964\n", 2216 | "1674 U_4173735\n", 2217 | "1675 U_7843842\n", 2218 | "1676 U_6664206\n", 2219 | "1677 U_5353582\n", 2220 | "1678 U_10596467\n", 2221 | "1679 U_9285775\n", 2222 | "1680 U_3125447\n", 2223 | "1681 U_8499471\n", 2224 | "1682 U_10465551\n", 2225 | "1683 U_8106292\n", 2226 | "1684 U_8499537\n", 2227 | "1685 U_9154972\n", 2228 | "1686 U_9417227\n", 2229 | "1687 U_10072588\n", 2230 | "1688 U_7975461\n", 2231 | "1689 U_7582269\n", 2232 | "1690 U_9810514\n", 2233 | "1691 U_8499931\n", 2234 | "1692 U_10466037\n", 2235 | "1693 U_111382\n", 2236 | "1694 U_10072868\n", 2237 | "1695 U_2995022\n", 2238 | "1696 U_11383676\n", 2239 | "1697 U_6534064\n", 2240 | "1698 U_10073176\n", 2241 | "1699 U_7845107\n", 2242 | "1700 U_8631561\n", 2243 | "1701 U_9286965\n", 2244 | "1702 U_111944\n", 2245 | "1703 U_7320949\n", 2246 | "1704 U_2602377\n", 2247 | "1705 U_243148\n", 2248 | "1706 U_3126788\n", 2249 | "1707 U_8369675\n", 2250 | "1708 U_8500756\n", 2251 | "1709 U_5486116\n", 2252 | "1710 U_9418403\n", 2253 | "1711 U_1554137\n", 2254 | "1712 U_3127059\n", 2255 | "1713 U_9680683\n", 2256 | "1714 U_8501053\n", 2257 | "1715 U_505687\n", 2258 | "1716 U_3127145\n", 2259 | "1717 U_9680840\n", 2260 | "1718 U_2471930\n", 2261 | "1719 U_2603020\n", 2262 | "1720 U_6666296\n", 2263 | "1721 U_2734184\n", 2264 | "1722 U_4438158\n", 2265 | "1723 U_1554668\n", 2266 | "1724 U_7977214\n" 2267 | ] 2268 | }, 2269 | { 2270 | "name": "stdout", 2271 | "output_type": "stream", 2272 | "text": [ 2273 | "1725 U_637205\n", 2274 | "1726 U_2734449\n", 2275 | "1727 U_8501627\n", 2276 | "1728 U_7846343\n", 2277 | "1729 U_2734556\n", 2278 | "1730 U_8370667\n", 2279 | "1731 U_10467877\n", 2280 | "1732 U_8370766\n", 2281 | "1733 U_3127897\n", 2282 | "1734 U_10730147\n", 2283 | "1735 U_7584448\n", 2284 | "1736 U_7322355\n", 2285 | "1737 U_768777\n", 2286 | "1738 U_5356330\n", 2287 | "1739 U_244579\n", 2288 | "1740 U_10992517\n", 2289 | "1741 U_3259297\n", 2290 | "1742 U_506786\n", 2291 | "1743 U_6929429\n", 2292 | "1744 U_7584823\n", 2293 | "1745 U_9288795\n", 2294 | "1746 U_7978106\n", 2295 | "1747 U_7060618\n", 2296 | "1748 U_4570354\n", 2297 | "1749 U_11517170\n", 2298 | "1750 U_10599685\n", 2299 | "1751 U_4701537\n", 2300 | "1752 U_376197\n", 2301 | "1753 U_9551247\n", 2302 | "1754 U_245268\n", 2303 | "1755 U_9551395\n", 2304 | "1756 U_6667886\n", 2305 | "1757 U_10075786\n", 2306 | "1758 U_8634002\n", 2307 | "1759 U_8634060\n", 2308 | "1760 U_8109816\n", 2309 | "1761 U_245543\n", 2310 | "1762 U_6668079\n", 2311 | "1763 U_8372040\n", 2312 | "1764 U_9551714\n", 2313 | "1765 U_7978851\n", 2314 | "1766 U_10338201\n", 2315 | "1767 U_9420719\n", 2316 | "1768 U_7585723\n", 2317 | "1769 U_10862535\n", 2318 | "1770 U_9682914\n", 2319 | "1771 U_10600422\n", 2320 | "1772 U_9683000\n", 2321 | "1773 U_2736203\n", 2322 | "1774 U_6930590\n", 2323 | "1775 U_5882159\n", 2324 | "1776 U_4440406\n", 2325 | "1777 U_4571498\n", 2326 | "1778 U_4440505\n", 2327 | "1779 U_9683413\n", 2328 | "1780 U_4440677\n", 2329 | "1781 U_4440694\n", 2330 | "1782 U_6931185\n", 2331 | "1783 U_4440859\n", 2332 | "1784 U_4440886\n", 2333 | "1785 U_9552732\n", 2334 | "1786 U_4440941\n", 2335 | "1787 U_10601345\n", 2336 | "1788 U_7193607\n", 2337 | "1789 U_3130584\n", 2338 | "1790 U_9422051\n", 2339 | "1791 U_8242470\n", 2340 | "1792 U_9553304\n", 2341 | "1793 U_4441565\n", 2342 | "1794 U_4441607\n", 2343 | "1795 U_4441619\n", 2344 | "1796 U_4441692\n", 2345 | "1797 U_4441700\n", 2346 | "1798 U_4441703\n", 2347 | "1799 U_7980749\n", 2348 | "1800 U_8374087\n", 2349 | "1801 U_4441959\n", 2350 | "1802 U_8374128\n", 2351 | "1803 U_2344871\n", 2352 | "1804 U_6539179\n", 2353 | "1805 U_8374421\n", 2354 | "1806 U_4442308\n", 2355 | "1807 U_4180198\n", 2356 | "1808 U_10078508\n", 2357 | "1809 U_10996274\n", 2358 | "1810 U_4442728\n", 2359 | "1811 U_10472081\n", 2360 | "1812 U_4442828\n", 2361 | "1813 U_5360343\n", 2362 | "1814 U_6146823\n", 2363 | "1815 U_9816878\n", 2364 | "1816 U_4442969\n", 2365 | "1817 U_7326581\n", 2366 | "1818 U_6540251\n", 2367 | "1819 U_7982069\n", 2368 | "1820 U_6540341\n", 2369 | "1821 U_8375390\n", 2370 | "1822 U_117876\n", 2371 | "1823 U_6671552\n", 2372 | "1824 U_10210756\n", 2373 | "1825 U_3133086\n", 2374 | "1826 U_6409893\n", 2375 | "1827 U_7982833\n", 2376 | "1828 U_7982854\n", 2377 | "1829 U_7589826\n", 2378 | "1830 U_6148065\n", 2379 | "1831 U_512079\n", 2380 | "1832 U_10080408\n", 2381 | "1833 U_8245490\n", 2382 | "1834 U_9949505\n", 2383 | "1835 U_8245665\n", 2384 | "1836 U_8114652\n", 2385 | "1837 U_8245893\n", 2386 | "1838 U_2740953\n", 2387 | "1839 U_9949924\n", 2388 | "1840 U_9294759\n", 2389 | "1841 U_6411252\n", 2390 | "1842 U_11785212\n", 2391 | "1843 U_8246479\n", 2392 | "1844 U_9688295\n", 2393 | "1845 U_251111\n", 2394 | "1846 U_9295200\n", 2395 | "1847 U_2479518\n", 2396 | "1848 U_8508837\n", 2397 | "1849 U_9295279\n", 2398 | "1850 U_7722419\n", 2399 | "1851 U_9950654\n", 2400 | "1852 U_9950685\n", 2401 | "1853 U_7722571\n", 2402 | "1854 U_8509167\n", 2403 | "1855 U_7198475\n", 2404 | "1856 U_8509227\n", 2405 | "1857 U_120642\n", 2406 | "1858 U_120661\n", 2407 | "1859 U_5101489\n", 2408 | "1860 U_6936520\n", 2409 | "1861 U_10475536\n", 2410 | "1862 U_6936713\n", 2411 | "1863 U_10082477\n", 2412 | "1864 U_6674677\n", 2413 | "1865 U_121176\n", 2414 | "1866 U_6937001\n", 2415 | "1867 U_1038791\n", 2416 | "1868 U_7985690\n", 2417 | "1869 U_7068192\n", 2418 | "1870 U_6675021\n", 2419 | "1871 U_6675042\n", 2420 | "1872 U_1432285\n", 2421 | "1873 U_8116969\n", 2422 | "1874 U_9296697\n", 2423 | "1875 U_8248136\n", 2424 | "1876 U_908206\n", 2425 | "1877 U_7986104\n", 2426 | "1878 U_9034798\n", 2427 | "1879 U_9952338\n", 2428 | "1880 U_8248489\n", 2429 | "1881 U_8510715\n", 2430 | "1882 U_253202\n", 2431 | "1883 U_9690452\n", 2432 | "1884 U_10476896\n", 2433 | "1885 U_9690577\n", 2434 | "1886 U_7724503\n", 2435 | "1887 U_253445\n", 2436 | "1888 U_10739216\n", 2437 | "1889 U_9166464\n", 2438 | "1890 U_6413968\n", 2439 | "1891 U_6807185\n", 2440 | "1892 U_9559862\n", 2441 | "1893 U_9166694\n", 2442 | "1894 U_11132895\n", 2443 | "1895 U_3137529\n", 2444 | "1896 U_10477667\n", 2445 | "1897 U_3006738\n", 2446 | "1898 U_9298215\n", 2447 | "1899 U_9560499\n", 2448 | "1900 U_10609081\n", 2449 | "1901 U_8774114\n", 2450 | "1902 U_6545923\n", 2451 | "1903 U_7201413\n", 2452 | "1904 U_8118972\n", 2453 | "1905 U_10478305\n", 2454 | "1906 U_7987958\n", 2455 | "1907 U_3007380\n", 2456 | "1908 U_7595005\n", 2457 | "1909 U_4449292\n", 2458 | "1910 U_6677598\n", 2459 | "1911 U_6546595\n", 2460 | "1912 U_8512688\n", 2461 | "1913 U_9692345\n", 2462 | "1914 U_9561313\n", 2463 | "1915 U_4449575\n", 2464 | "1916 U_386408\n", 2465 | "1917 U_517497\n", 2466 | "1918 U_9168402\n", 2467 | "1919 U_517763\n", 2468 | "1920 U_6940344\n", 2469 | "1921 U_9168641\n", 2470 | "1922 U_10610459\n", 2471 | "1923 U_6023033\n", 2472 | "1924 U_7726992\n", 2473 | "1925 U_7989153\n", 2474 | "1926 U_8513446\n", 2475 | "1927 U_11790327\n", 2476 | "1928 U_9168905\n", 2477 | "1929 U_10610803\n", 2478 | "1930 U_3008638\n", 2479 | "1931 U_6547698\n", 2480 | "1932 U_649754\n", 2481 | "1933 U_7858772\n", 2482 | "1934 U_9825008\n", 2483 | "1935 U_7990034\n", 2484 | "1936 U_10087256\n", 2485 | "1937 U_7072787\n", 2486 | "1938 U_6941732\n", 2487 | "1939 U_9825424\n", 2488 | "1940 U_6679782\n", 2489 | "1941 U_6941940\n", 2490 | "1942 U_388403\n", 2491 | "1943 U_4189596\n", 2492 | "1944 U_9694700\n", 2493 | "1945 U_126482\n", 2494 | "1946 U_9563758\n", 2495 | "1947 U_4321098\n", 2496 | "1948 U_10874725\n", 2497 | "1949 U_8777620\n", 2498 | "1950 U_651188\n", 2499 | "1951 U_6942654\n", 2500 | "1952 U_9957387\n", 2501 | "1953 U_9040027\n", 2502 | "1954 U_4452550\n", 2503 | "1955 U_8777963\n", 2504 | "1956 U_10613164\n", 2505 | "1957 U_8384954\n", 2506 | "1958 U_1962603\n", 2507 | "1959 U_9564813\n", 2508 | "1960 U_258707\n", 2509 | "1961 U_10875649\n", 2510 | "1962 U_8123190\n", 2511 | "1963 U_8123235\n", 2512 | "1964 U_9565072\n", 2513 | "1965 U_7992324\n", 2514 | "1966 U_9827338\n", 2515 | "1967 U_390235\n", 2516 | "1968 U_259216\n", 2517 | "1969 U_9041124\n", 2518 | "1970 U_7206203\n", 2519 | "1971 U_7337287\n", 2520 | "1972 U_10352015\n", 2521 | "1973 U_10614378\n", 2522 | "1974 U_10483309\n", 2523 | "1975 U_8648343\n", 2524 | "1976 U_5240541\n", 2525 | "1977 U_6682337\n", 2526 | "1978 U_10614803\n", 2527 | "1979 U_7993400\n", 2528 | "1980 U_7075962\n", 2529 | "1981 U_8517754\n", 2530 | "1982 U_129260\n", 2531 | "1983 U_9697677\n", 2532 | "1984 U_7076265\n", 2533 | "1985 U_10091071\n", 2534 | "1986 U_9960070\n", 2535 | "1987 U_8125068\n", 2536 | "1988 U_6290163\n", 2537 | "1989 U_6683512\n", 2538 | "1990 U_5634939\n", 2539 | "1991 U_9436095\n", 2540 | "1992 U_6421476\n", 2541 | "1993 U_6814784\n", 2542 | "1994 U_6683743\n", 2543 | "1995 U_9436339\n", 2544 | "1996 U_9829745\n", 2545 | "1997 U_9305479\n", 2546 | "1998 U_8650128\n", 2547 | "1999 U_10091997\n", 2548 | "2000 U_6946417\n", 2549 | "2001 U_2621100\n", 2550 | "2002 U_8388269\n", 2551 | "2003 U_9436943\n", 2552 | "2004 U_6422328\n" 2553 | ] 2554 | } 2555 | ], 2556 | "source": [ 2557 | "# creat user-index, concept-index dict\n", 2558 | "users = filtered_df[\"id\"].unique()\n", 2559 | "user2index = dict(zip(users, list(range(len(users)))))\n", 2560 | "concepts = distinct_concepts\n", 2561 | "concept2index = dict(zip(concepts, list(range(len(concepts)))))\n", 2562 | "courses = filtered_df[\"cid\"].unique()\n", 2563 | "course2index = dict(zip(courses, list(range(len(courses)))))\n", 2564 | "teachers = flattend_t_list\n", 2565 | "teacher2index = dict(zip(teachers, list(range(len(teachers)))))\n", 2566 | "schools = filtered_df[\"schools\"].unique()\n", 2567 | "school2index = dict(zip(schools, list(range(len(schools)))))\n", 2568 | "videos = filtered_df[\"vid\"].unique()\n", 2569 | "video2index = dict(zip(videos, list(range(len(videos)))))\n", 2570 | "\n", 2571 | "\n", 2572 | "user_action = list()\n", 2573 | "rating_matrix = list()\n", 2574 | "adjacency_matrix = list()\n", 2575 | "user_course = list()\n", 2576 | "user_teacher = list()\n", 2577 | "user_school = list()\n", 2578 | "user_video = list()\n", 2579 | "\n", 2580 | "\n", 2581 | "def get_ua(udf, column, index_dict):\n", 2582 | " \"\"\" \n", 2583 | " Get user-concept vector based on the given user dataframe \n", 2584 | " \"\"\"\n", 2585 | "# print(type(udf[column].values[0]))\n", 2586 | " # below should be change to uceoncepts = dict(concept, clicked count)\n", 2587 | " if isinstance(udf[column].values[0], list):\n", 2588 | " uconcepts = list([c for sublist in udf[column].values for c in sublist if len(c)>0])\n", 2589 | " else:\n", 2590 | "# print(udf[column].values)\n", 2591 | " uconcepts = list(c for c in udf[column].values if len(c)>0)\n", 2592 | " uconcepts = Counter(uconcepts)\n", 2593 | " uconcepts = dict(uconcepts)\n", 2594 | "# print(len(uconcepts))\n", 2595 | " \n", 2596 | " uconcepts_indices = [index_dict[c] for c in uconcepts.keys()]\n", 2597 | " uvec = np.zeros(len(index_dict))\n", 2598 | "# uvec[uconcepts_indices] = 1\n", 2599 | " np.put(a=uvec, ind=uconcepts_indices, v=list(uconcepts.values()))\n", 2600 | " assert len(uconcepts_indices) == (uvec>0).sum()\n", 2601 | " return uvec\n", 2602 | "\n", 2603 | "\n", 2604 | "def radom_negative_sample(user_action, item_size):\n", 2605 | " \"\"\"to get (user_size, 100, 2), 100th item is positive one\"\"\"\n", 2606 | " negative_sample = []\n", 2607 | " for u in user_action:\n", 2608 | " sample = []\n", 2609 | " i = 0\n", 2610 | " while i < 99:\n", 2611 | " t = random.randint(0, item_size-1)\n", 2612 | " if t not in user_action[u]:\n", 2613 | " sample.append([u, t])\n", 2614 | " i += 1\n", 2615 | " sample.append([u, user_action[u][-1]])\n", 2616 | " negative_sample.append(sample)\n", 2617 | " return np.array(negative_sample)\n", 2618 | "\n", 2619 | "\n", 2620 | "user_concept_dict = dict()\n", 2621 | "# i = 7\n", 2622 | "for i in range(len(users)):\n", 2623 | " print(i, users[i])\n", 2624 | "\n", 2625 | " # user_action\n", 2626 | " udf = filtered_df[filtered_df.id==users[i]]\n", 2627 | " user_action.append(get_ua(udf, 'concepts', concept2index))\n", 2628 | " \n", 2629 | " # adjacency_matrix\n", 2630 | " udf_train = udf[(udf[\"local_start_time\"]>='2017-01-01 00:00:00') & (udf[\"local_start_time\"]<='2019-11-01 00:00:00')]\n", 2631 | " uvec_train = get_ua(udf_train, 'concepts', concept2index)\n", 2632 | " adjacency_matrix.append(uvec_train)\n", 2633 | "\n", 2634 | " # rating_matrix\n", 2635 | " udf_train.sort_values([\"local_start_time\"], inplace=True)\n", 2636 | " con_list = udf_train[\"concepts\"].values\n", 2637 | " learning_time = udf_train[\"local_start_time\"].values\n", 2638 | "\n", 2639 | " # create condict ordered by concept learned time\n", 2640 | " # first learned time for a concept as its time\n", 2641 | " con_dict = dict()\n", 2642 | " for ind, clist in enumerate(con_list):\n", 2643 | " time = learning_time[ind]\n", 2644 | " for c in clist:\n", 2645 | " if c not in con_dict and len(c)>0:\n", 2646 | " con_dict[c] = time\n", 2647 | " # replace the last item count as 0 as the paper described\n", 2648 | "# print(\"before\", (uvec_train>0).sum())\n", 2649 | " uvec_train_ = uvec_train.copy()\n", 2650 | " uvec_train_[concept2index[list(con_dict.keys())[-1]]] = 0\n", 2651 | "# print(\"after\", (uvec_train_>0).sum())\n", 2652 | " rating_matrix.append(uvec_train_)\n", 2653 | "\n", 2654 | " user_concept_dict[i] = [concept2index[c] for c in list(con_dict.keys())]\n", 2655 | "\n", 2656 | " # user course\n", 2657 | " user_course.append(get_ua(udf_train, 'cid', course2index))\n", 2658 | "\n", 2659 | " # user school\n", 2660 | " user_school.append(get_ua(udf_train, 'schools', school2index))\n", 2661 | "\n", 2662 | " # user teacher\n", 2663 | " user_teacher.append(get_ua(udf_train, 'teachers', teacher2index))\n", 2664 | " \n", 2665 | " # user video\n", 2666 | " user_video.append(get_ua(udf_train, 'vid', video2index))\n", 2667 | " \n", 2668 | "# binarize except those need to maintain numbers\n", 2669 | "adjacency_matrix = (np.array(adjacency_matrix)>0).astype(np.int8)\n", 2670 | "user_course = (np.array(user_course)>0).astype(np.int8)\n", 2671 | "user_teacher = (np.array(user_teacher)>0).astype(np.int8)\n", 2672 | "user_school = (np.array(user_school)>0).astype(np.int8)\n", 2673 | "user_video = (np.array(user_video)>0).astype(np.int8)" 2674 | ] 2675 | }, 2676 | { 2677 | "cell_type": "code", 2678 | "execution_count": 17, 2679 | "metadata": {}, 2680 | "outputs": [ 2681 | { 2682 | "data": { 2683 | "text/plain": [ 2684 | "(2005, 100, 2)" 2685 | ] 2686 | }, 2687 | "execution_count": 17, 2688 | "metadata": {}, 2689 | "output_type": "execute_result" 2690 | } 2691 | ], 2692 | "source": [ 2693 | "# construct negatives for the last item in each user's training set\n", 2694 | "negatives = radom_negative_sample(user_concept_dict, len(concept2index))\n", 2695 | "negatives.shape" 2696 | ] 2697 | }, 2698 | { 2699 | "cell_type": "code", 2700 | "execution_count": 18, 2701 | "metadata": {}, 2702 | "outputs": [ 2703 | { 2704 | "data": { 2705 | "text/plain": [ 2706 | "(856067, 858072)" 2707 | ] 2708 | }, 2709 | "execution_count": 18, 2710 | "metadata": {}, 2711 | "output_type": "execute_result" 2712 | } 2713 | ], 2714 | "source": [ 2715 | "(np.array(rating_matrix)>0).sum(), (np.array(adjacency_matrix)>0).sum()" 2716 | ] 2717 | }, 2718 | { 2719 | "cell_type": "code", 2720 | "execution_count": 19, 2721 | "metadata": {}, 2722 | "outputs": [ 2723 | { 2724 | "data": { 2725 | "text/plain": [ 2726 | "(386, 387)" 2727 | ] 2728 | }, 2729 | "execution_count": 19, 2730 | "metadata": {}, 2731 | "output_type": "execute_result" 2732 | } 2733 | ], 2734 | "source": [ 2735 | "(rating_matrix[0]>0).sum(), adjacency_matrix[0].sum()" 2736 | ] 2737 | }, 2738 | { 2739 | "cell_type": "code", 2740 | "execution_count": null, 2741 | "metadata": {}, 2742 | "outputs": [], 2743 | "source": [ 2744 | "# check shape\n", 2745 | "print(\"user_action\", np.array(user_action).shape)\n", 2746 | "print(\"rating_matrix\", np.array(rating_matrix).shape)\n", 2747 | "print(\"adjacency_matrix\", np.array(adjacency_matrix).shape)\n", 2748 | "print(\"UC\", np.array(user_course).shape)\n", 2749 | "print(\"UCT\", np.array(user_teacher).shape)\n", 2750 | "print(\"US\", np.array(user_school).shape)\n", 2751 | "print(\"UV\", np.array(user_video).shape)\n", 2752 | "print(\"negatives\", negatives.shape)" 2753 | ] 2754 | }, 2755 | { 2756 | "cell_type": "code", 2757 | "execution_count": 37, 2758 | "metadata": {}, 2759 | "outputs": [], 2760 | "source": [ 2761 | "with open('data-for-kgcrec/KC.p', 'wb') as f:\n", 2762 | " pickle.dump(np.asmatrix(cc_np), f)" 2763 | ] 2764 | }, 2765 | { 2766 | "cell_type": "code", 2767 | "execution_count": null, 2768 | "metadata": {}, 2769 | "outputs": [], 2770 | "source": [ 2771 | "# map the files from paper repository\n", 2772 | "with open('data-for-kgcrec/user_action.p', 'wb') as f:\n", 2773 | " pickle.dump(np.asmatrix(user_action), f)\n", 2774 | "with open('data-for-kgcrec/rate_matrix.p', 'wb') as f:\n", 2775 | " pickle.dump(np.asmatrix(rating_matrix), f)\n", 2776 | "with open('data-for-kgcrec/adjacency_matrix.p', 'wb') as f:\n", 2777 | " pickle.dump(np.asmatrix(adjacency_matrix), f)\n", 2778 | "with open('data-for-kgcrec/UC.p', 'wb') as f:\n", 2779 | " pickle.dump(np.asmatrix(user_course), f)\n", 2780 | "with open('data-for-kgcrec/UCT.p', 'wb') as f:\n", 2781 | " pickle.dump(np.asmatrix(user_teacher), f)\n", 2782 | "with open('data-for-kgcrec/US.p', 'wb') as f:\n", 2783 | " pickle.dump(np.asmatrix(user_school), f)\n", 2784 | "with open('data-for-kgcrec/UV.p', 'wb') as f:\n", 2785 | " pickle.dump(np.asmatrix(user_video), f)\n", 2786 | "with open('data-for-kgcrec/negative.p', 'wb') as f:\n", 2787 | " pickle.dump(negatives, f)" 2788 | ] 2789 | }, 2790 | { 2791 | "cell_type": "code", 2792 | "execution_count": null, 2793 | "metadata": {}, 2794 | "outputs": [], 2795 | "source": [ 2796 | "# load embeddings from fasttext (not used at the moment)\n", 2797 | "model = fasttext.load_model(\"data-for-kgcrec/cc.zh.100.bin\")\n", 2798 | "con_vectors = list()\n", 2799 | "for c in distinct_concepts:\n", 2800 | " con_vectors.append(model.get_word_vector(c))\n", 2801 | "\n", 2802 | "with open('data-for-kgcrec/concept_embedding.p', 'wb') as f:\n", 2803 | " pickle.dump(np.array(con_vectors), f)" 2804 | ] 2805 | }, 2806 | { 2807 | "cell_type": "code", 2808 | "execution_count": null, 2809 | "metadata": { 2810 | "scrolled": true 2811 | }, 2812 | "outputs": [], 2813 | "source": [ 2814 | "# training using concept descriptions\n", 2815 | "with open(\"entities/concept.json\", \"r\", encoding=\"utf8\") as f:\n", 2816 | " lines = f.readlines()\n", 2817 | " \n", 2818 | "docs = list()\n", 2819 | "for l in lines:\n", 2820 | " json_str = json.loads(l)\n", 2821 | " docs.append(json_str[\"name\"])\n", 2822 | " if 'explanation' in json_str:\n", 2823 | " docs.append(json_str[\"explanation\"])" 2824 | ] 2825 | }, 2826 | { 2827 | "cell_type": "code", 2828 | "execution_count": null, 2829 | "metadata": {}, 2830 | "outputs": [], 2831 | "source": [ 2832 | "len(docs)" 2833 | ] 2834 | }, 2835 | { 2836 | "cell_type": "code", 2837 | "execution_count": null, 2838 | "metadata": {}, 2839 | "outputs": [], 2840 | "source": [ 2841 | "import jieba\n", 2842 | "import gensim\n", 2843 | "\n", 2844 | "z = [list(jieba.cut(i)) for i in docs]\n", 2845 | "model = gensim.models.FastText(z, \n", 2846 | " sg=0, # CBOW\n", 2847 | " min_n=5,\n", 2848 | " max_n=10,\n", 2849 | " size=100, \n", 2850 | " window=5,\n", 2851 | " negative=10,\n", 2852 | " min_count=1)" 2853 | ] 2854 | }, 2855 | { 2856 | "cell_type": "code", 2857 | "execution_count": null, 2858 | "metadata": { 2859 | "scrolled": true 2860 | }, 2861 | "outputs": [], 2862 | "source": [ 2863 | "# print(len(model.wv.vocab.keys()), model.wv.vectors.shape)\n", 2864 | "# with open(\"vocab\", \"w\", encoding=\"utf8\") as f:\n", 2865 | "# for w in model.wv.vocab.keys():\n", 2866 | "# print(w)\n", 2867 | "# f.write(w+\"\\n\")\n", 2868 | "\n", 2869 | "import csv\n", 2870 | "with open(\"vocab\", 'w', encoding=\"utf8\") as myfile:\n", 2871 | " wr = csv.writer(myfile, quoting=csv.QUOTE_ALL, delimiter=\"\\n\")\n", 2872 | " lines = [l.replace(\"\\n\",\" \") for l in list(model.wv.vocab.keys())]\n", 2873 | " wr.writerow(lines)\n", 2874 | "np.savetxt(\"emb.tsv\", model.wv.vectors, delimiter=\"\\t\")" 2875 | ] 2876 | }, 2877 | { 2878 | "cell_type": "code", 2879 | "execution_count": null, 2880 | "metadata": {}, 2881 | "outputs": [], 2882 | "source": [ 2883 | "len(list(model.wv.vocab.keys()))" 2884 | ] 2885 | } 2886 | ], 2887 | "metadata": { 2888 | "kernelspec": { 2889 | "display_name": "Python3.6", 2890 | "language": "python", 2891 | "name": "py3.6" 2892 | }, 2893 | "language_info": { 2894 | "codemirror_mode": { 2895 | "name": "ipython", 2896 | "version": 3 2897 | }, 2898 | "file_extension": ".py", 2899 | "mimetype": "text/x-python", 2900 | "name": "python", 2901 | "nbconvert_exporter": "python", 2902 | "pygments_lexer": "ipython3", 2903 | "version": "3.6.12" 2904 | } 2905 | }, 2906 | "nbformat": 4, 2907 | "nbformat_minor": 2 2908 | } 2909 | --------------------------------------------------------------------------------