├── README.md ├── data └── test-data.csv └── ncf_tensorflow.py /README.md: -------------------------------------------------------------------------------- 1 | # NeuralCollaborativeFiltering_NCF_Tensorflow 2 | 3 | ## How to use 4 | 5 | Win/Linux 6 | ``` 7 | pip3 install tensorflow 8 | ``` 9 | 10 | Mac 11 | 12 | ``` 13 | pip install tensorflow-macos 14 | ``` 15 | 16 | ``` 17 | python3 ncf_tensorflow.py 18 | ``` 19 | 20 | ## output 21 | 22 | ``` 23 | 学习前: 24 | User Video 1 Video 2 Video 3 Video 4 Video 5 Video 6 25 | 0 User1 10.0 3.0 NaN NaN NaN NaN 26 | 1 User2 NaN 10.0 NaN 10.0 5.0 1.0 27 | 2 User3 NaN NaN 9.0 NaN NaN NaN 28 | 3 User4 6.0 1.0 NaN 8.0 NaN 9.0 29 | 4 User5 1.0 NaN 1.0 NaN 10.0 4.0 30 | 5 User6 1.0 4.0 1.0 NaN 10.0 1.0 31 | 6 User7 NaN 2.0 1.0 2.0 NaN 8.0 32 | 7 User8 NaN NaN NaN 1.0 NaN NaN 33 | 8 User9 1.0 NaN 10.0 NaN 3.0 1.0 34 | 35 | 36 | 学习后: 37 | Video 1 Video 2 Video 3 Video 4 Video 5 Video 6 38 | User1 9.508878 3.113009 7.157410 9.335443 9.629840 9.580980 39 | User2 2.302773 9.624501 9.730996 9.773037 5.119677 0.930121 40 | User3 3.733079 9.065267 9.348892 9.705484 8.523159 3.409261 41 | User4 6.053867 1.170659 3.957999 7.933151 9.437999 8.902870 42 | User5 1.106443 3.168726 0.732408 2.506290 9.728704 4.037748 43 | User6 0.501248 4.096941 0.947260 2.387310 9.579254 1.118673 44 | User7 4.283075 1.775985 0.756208 2.097234 9.486653 8.016829 45 | User8 0.472433 1.816927 0.716490 1.029657 8.845912 1.527248 46 | User9 1.068604 8.614190 9.599453 9.406795 3.018846 0.811254 47 | ``` -------------------------------------------------------------------------------- /data/test-data.csv: -------------------------------------------------------------------------------- 1 | User,Video 1,Video 2,Video 3,Video 4,Video 5,Video 6 2 | User1,10,3,,,, 3 | User2,,10,,10,5,1 4 | User3,,,9,,, 5 | User4,6,1,,8,,9 6 | User5,1,,1,,10,4 7 | User6,1,4,1,,10,1 8 | User7,,2,1,2,,8 9 | User8,,,,1,, 10 | User9,1,,10,,3,1 11 | -------------------------------------------------------------------------------- /ncf_tensorflow.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import tensorflow as tf 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.preprocessing import LabelEncoder 6 | # set pandas to show all columns without truncation and line breaks 7 | pd.set_option('display.max_columns', 1000) 8 | pd.set_option('display.width', 1000) 9 | 10 | # data = np.loadtxt('data/test-data.csv', delimiter=',', dtype=int, skiprows=1,) 11 | data = pd.read_csv('data/test-data.csv') 12 | print(data) 13 | 14 | # reset the column.index to be numeric 15 | user_index = data[data.columns[0]] 16 | video_index = data.columns 17 | data = data.reset_index(drop=True) 18 | data[data.columns[0]] = data.index.astype('int') 19 | # print(data) 20 | # print(data) 21 | scaler = 10 22 | 23 | # data = pd.DataFrame(data.to_numpy(), index=range(0,len(user_index)), columns=range(0,len(video_index))) 24 | df_long = pd.melt(data, id_vars=[data.columns[0]], 25 | ignore_index=True, 26 | var_name='video_id', 27 | value_name='rate').dropna() 28 | df_long.columns = ['user_id', 'video_id', 'rating'] 29 | df_long['rating'] = df_long['rating'] / scaler 30 | # replace the user_id to user by match user_index 31 | df_long['user_id'] = df_long['user_id'].apply(lambda x: user_index[x]) 32 | # data = df_long.to_numpy() 33 | 34 | #print(df_long) 35 | 36 | dataset = df_long 37 | # Encode the user and movie IDs 38 | user_encoder = LabelEncoder() 39 | video_encoder = LabelEncoder() 40 | dataset['user_id'] = user_encoder.fit_transform(dataset['user_id']) 41 | dataset['video_id'] = video_encoder.fit_transform(dataset['video_id']) 42 | 43 | # Split the dataset into train and test sets 44 | # train, test = train_test_split(dataset, test_size=0.2, random_state=42) 45 | train = dataset 46 | 47 | # Model hyperparameters 48 | num_users = len(dataset['user_id'].unique()) 49 | num_countries = len(dataset['video_id'].unique()) 50 | 51 | 52 | embedding_dim = 64 53 | 54 | # Create the NCF model 55 | inputs_user = tf.keras.layers.Input(shape=(1,)) 56 | inputs_video = tf.keras.layers.Input(shape=(1,)) 57 | embedding_user = tf.keras.layers.Embedding(num_users, embedding_dim)(inputs_user) 58 | embedding_video = tf.keras.layers.Embedding(num_countries, embedding_dim)(inputs_video) 59 | 60 | # Merge the embeddings using concatenation, you can also try other merging methods like dot product or multiplication 61 | merged = tf.keras.layers.Concatenate()([embedding_user, embedding_video]) 62 | merged = tf.keras.layers.Flatten()(merged) 63 | 64 | # Add fully connected layers 65 | dense = tf.keras.layers.Dense(64, activation='relu')(merged) 66 | dense = tf.keras.layers.Dense(32, activation='relu')(dense) 67 | output = tf.keras.layers.Dense(1, activation='sigmoid')(dense) 68 | 69 | # Compile the model 70 | model = tf.keras.Model(inputs=[inputs_user, inputs_video], outputs=output) 71 | model.compile(optimizer='adam', loss='mse', metrics=['mae']) 72 | 73 | model.fit( 74 | [train['user_id'].values, train['video_id'].values], 75 | train['rating'].values, 76 | batch_size=64, 77 | epochs=100, 78 | verbose=0, 79 | # validation_split=0.1, 80 | ) 81 | 82 | result_df = {} 83 | for user_i in range(1, 10): 84 | user = f'User{user_i}' 85 | result_df[user] = {} 86 | for video_i in range(1, 7): 87 | video = f'Video {video_i}' 88 | pred_user_id = user_encoder.transform([user]) 89 | pred_video_id = video_encoder.transform([video]) 90 | result = model.predict(x=[pred_user_id, pred_video_id], verbose=0) 91 | result_df[user][video] = result[0][0] 92 | result_df = pd.DataFrame(result_df).T 93 | result_df *= scaler 94 | 95 | print(result_df) 96 | 97 | --------------------------------------------------------------------------------