├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── RecSys2019_DeepLearning_Evaluation ├── .gitignore ├── Base │ ├── BaseCBFRecommender.py │ ├── BaseMatrixFactorizationRecommender.py │ ├── BaseRecommender.py │ ├── BaseSimilarityMatrixRecommender.py │ ├── BaseTempFolder.py │ ├── DataIO.py │ ├── Evaluation │ │ ├── Evaluator.py │ │ ├── KFold_SignificanceTest.py │ │ ├── metrics.py │ │ └── metrics_Test.py │ ├── IR_feature_weighting.py │ ├── Incremental_Training_Early_Stopping.py │ ├── NonPersonalizedRecommender.py │ ├── Recommender_utils.py │ ├── Recommender_utils_Test.py │ └── Similarity │ │ ├── Compute_Similarity.py │ │ ├── Compute_Similarity_Euclidean.py │ │ ├── Compute_Similarity_Python.py │ │ ├── Compute_similarity_euclidean_test.py │ │ ├── Compute_similarity_test.py │ │ └── Cython │ │ └── Compute_Similarity_Cython.pyx ├── CNN_on_embeddings │ ├── IJCAI │ │ ├── CFM_github │ │ │ ├── CFM.py │ │ │ ├── FM.py │ │ │ ├── LoadData.py │ │ │ ├── NeuralFM.py │ │ │ ├── ONCF.py │ │ │ └── README.md │ │ ├── CFM_our_interface │ │ │ ├── CFM.py │ │ │ ├── CFMWrapper.py │ │ │ ├── Dataset_wrapper.py │ │ │ ├── FM.py │ │ │ ├── FMWrapper.py │ │ │ ├── LoadData.py │ │ │ ├── NeuralFM.py │ │ │ ├── ONCF.py │ │ │ └── README.md │ │ ├── ConvNCF_our_interface │ │ │ ├── ConvNCF.py │ │ │ └── ConvNCF_wrapper.py │ │ └── CoupledCF_our_interface │ │ │ ├── CoupledCFWrapper.py │ │ │ ├── Movielens1MReader │ │ │ └── Movielens1MReader.py │ │ │ ├── TafengReader │ │ │ └── TafengReader.py │ │ │ ├── mainMovieUserCnn.py │ │ │ ├── mainMovieUserCnn_only_deepCF.py │ │ │ ├── mainTafengUserCnn.py │ │ │ └── mainTafengUserCnn_only_deepCF.py │ ├── read_CNN_embedding_evaluation_results.py │ └── run_CNN_embedding_evaluation_ablation.py ├── Conferences │ ├── IJCAI │ │ ├── ConvNCF_github │ │ │ ├── ConvNCF.py │ │ │ ├── Data │ │ │ │ ├── yelp.test.negative.gz │ │ │ │ ├── yelp.test.rating.gz │ │ │ │ └── yelp.train.rating.gz │ │ │ ├── Dataset.py │ │ │ ├── MF_BPR.py │ │ │ ├── README.md │ │ │ ├── figure.png │ │ │ └── saver.py │ │ ├── ConvNCF_our_interface │ │ │ ├── ConvNCF.py │ │ │ ├── ConvNCF_wrapper.py │ │ │ ├── GowallaReader │ │ │ │ └── GowallaReader.py │ │ │ ├── MFBPR_Wrapper.py │ │ │ ├── MF_BPR.py │ │ │ └── YelpReader │ │ │ │ └── YelpReader.py │ │ ├── CoupledCF_original │ │ │ ├── LoadMovieDataCnn.py │ │ │ ├── LoadTafengDataCnn.py │ │ │ ├── evaluateMovieCnn.py │ │ │ ├── evaluateMovieCnn_only_deepCF.py │ │ │ ├── evaluateTafengCnn.py │ │ │ ├── evaluateTafengCnn_only_deepCF.py │ │ │ ├── mainMovieUserCnn.py │ │ │ ├── mainMovieUserCnn_only_deepCF.py │ │ │ ├── mainTafengUserCnn.py │ │ │ └── mainTafengUserCnn_only_deepCF.py │ │ ├── CoupledCF_our_interface │ │ │ ├── CoupledCFWrapper.py │ │ │ ├── DeepCFWrapper.py │ │ │ ├── Movielens1MReader │ │ │ │ └── Movielens1MReader.py │ │ │ ├── TafengReader │ │ │ │ └── TafengReader.py │ │ │ ├── mainMovieUserCnn.py │ │ │ ├── mainMovieUserCnn_only_deepCF.py │ │ │ ├── mainTafengUserCnn.py │ │ │ └── mainTafengUserCnn_only_deepCF.py │ │ ├── DELF_original │ │ │ ├── DELF_EF.sh │ │ │ ├── DELF_MLP.sh │ │ │ ├── DMF.sh │ │ │ ├── Dataset.py │ │ │ ├── MLP.sh │ │ │ ├── Model │ │ │ │ ├── DELF.py │ │ │ │ ├── DMF.py │ │ │ │ ├── DMF_2layers.py │ │ │ │ ├── MF.py │ │ │ │ ├── NMF.py │ │ │ │ ├── NMF_attention.py │ │ │ │ ├── NMF_attention_EF.py │ │ │ │ ├── NMF_attention_MLP.py │ │ │ │ ├── NMF_attention_MLP_seq.py │ │ │ │ ├── NMF_attention_NSVD.py │ │ │ │ ├── NMF_attention_SVDpp.py │ │ │ │ ├── NMF_attention_noAtt.py │ │ │ │ ├── NMF_attention_nointer.py │ │ │ │ ├── NMF_fmn.py │ │ │ │ ├── NMF_multi_embedding.py │ │ │ │ ├── NMF_nsvd.py │ │ │ │ ├── NMF_nsvd_NTN.py │ │ │ │ ├── NMF_nsvd_NeuMF.py │ │ │ │ ├── NMF_nsvd_gating.py │ │ │ │ ├── NMF_nsvd_inter0.py │ │ │ │ ├── NMF_nsvd_partition.py │ │ │ │ ├── NMF_nsvd_pretrain.py │ │ │ │ ├── __init__.py │ │ │ │ └── item_rating_num.py │ │ │ ├── NeuMF.sh │ │ │ ├── eval_trained_model.py │ │ │ ├── eval_trained_model_MLP.py │ │ │ ├── evaluate.py │ │ │ ├── evaluate_batch.py │ │ │ ├── evaluate_batch_MLP.py │ │ │ ├── main.py │ │ │ ├── main_attention.py │ │ │ ├── main_nsvd.py │ │ │ └── main_nsvd_pretrain.py │ │ ├── DELF_our_interface │ │ │ ├── AmazonMusicReader │ │ │ │ └── AmazonMusicReader.py │ │ │ ├── DELFWrapper.py │ │ │ ├── Dataset.py │ │ │ └── Movielens1MReader │ │ │ │ └── Movielens1MReader.py │ │ ├── DMF_our_interface │ │ │ ├── AmazonMovieReader │ │ │ │ └── AmazonMovieReader.py │ │ │ ├── AmazonMusicReader │ │ │ │ └── AmazonMusicReader.py │ │ │ ├── Movielens100KReader │ │ │ │ └── Movielens100KReader.py │ │ │ └── Movielens1MReader │ │ │ │ └── Movielens1MReader.py │ │ ├── NeuRec_github │ │ │ ├── INeuRec.py │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── UNeuRec.py │ │ │ ├── eval.py │ │ │ ├── loadData.py │ │ │ └── main.py │ │ └── NeuRec_our_interface │ │ │ ├── FilmTrust │ │ │ └── FilmTrustReader.py │ │ │ ├── Frappe │ │ │ └── FrappeReader.py │ │ │ ├── INeuRec.py │ │ │ ├── INeuRecWrapper.py │ │ │ ├── Movielens1M │ │ │ └── Movielens1MReader.py │ │ │ ├── MovielensHetrec2011 │ │ │ └── MovielensHetrec2011Reader.py │ │ │ ├── UNeuRec.py │ │ │ └── UNeuRecWrapper.py │ ├── KDD │ │ ├── CollaborativeDL_github_matlab │ │ │ ├── README.md │ │ │ ├── candidate_lrates.m │ │ │ ├── candidate_moments.m │ │ │ ├── cdl.m │ │ │ ├── collaborative-dl.ipynb │ │ │ ├── ctr-part-release │ │ │ │ ├── .ctr.cpp.swp │ │ │ │ ├── Makefile │ │ │ │ ├── README │ │ │ │ ├── ctr │ │ │ │ ├── ctr.cpp │ │ │ │ ├── ctr.h │ │ │ │ ├── data.cpp │ │ │ │ ├── data.h │ │ │ │ ├── main.cpp │ │ │ │ ├── utils.cpp │ │ │ │ └── utils.h │ │ │ ├── dae.m │ │ │ ├── dae_get_hidden.m │ │ │ ├── default_dae.m │ │ │ ├── default_sdae.m │ │ │ ├── dsigmoid.m │ │ │ ├── gpl-2.0.txt │ │ │ ├── logdiff.m │ │ │ ├── logsum.m │ │ │ ├── print_n_updates.m │ │ │ ├── save_intermediate.m │ │ │ ├── sdae_get_hidden.m │ │ │ ├── sdae_get_visible.m │ │ │ ├── sigmoid.m │ │ │ ├── softmax.m │ │ │ └── train_rbm.m │ │ ├── CollaborativeDL_our_interface │ │ │ ├── CDL.py │ │ │ └── CollaborativeDL_Matlab_RecommenderWrapper.py │ │ ├── CollaborativeVAE_github │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── citeulike-t │ │ │ │ ├── test_cvae-cf10.py │ │ │ │ ├── test_cvae.py │ │ │ │ └── test_vae.py │ │ │ ├── image │ │ │ │ └── recall.png │ │ │ ├── test.m │ │ │ ├── test_cvae.py │ │ │ └── test_vae.py │ │ ├── CollaborativeVAE_our_interface │ │ │ ├── Citeulike │ │ │ │ └── CiteulikeReader.py │ │ │ └── CollaborativeVAE_RecommenderWrapper.py │ │ ├── MCRec_github │ │ │ ├── README.md │ │ │ └── code │ │ │ │ ├── Dataset.py │ │ │ │ ├── MCRec.py │ │ │ │ ├── evaluate.py │ │ │ │ └── metapathbasedPathSampleForMovielens.py │ │ └── MCRec_our_interface │ │ │ ├── MCRecRecommenderWrapper.py │ │ │ └── Movielens100K │ │ │ └── Movielens100KReader.py │ ├── RecSys │ │ ├── SpectralCF_github │ │ │ ├── README.md │ │ │ ├── SpectralCF.py │ │ │ ├── load_data.py │ │ │ ├── main.py │ │ │ ├── params.py │ │ │ ├── test.py │ │ │ └── utils.py │ │ └── SpectralCF_our_interface │ │ │ ├── AmazonInstantVideo │ │ │ └── AmazonInstantVideoReader.py │ │ │ ├── Movielens1M │ │ │ └── Movielens1MReader.py │ │ │ ├── MovielensHetrec2011 │ │ │ └── MovielensHetrec2011Reader.py │ │ │ ├── SpectralCF.py │ │ │ └── SpectralCF_RecommenderWrapper.py │ ├── SIGIR │ │ ├── CMN_github │ │ │ ├── README.md │ │ │ ├── pretrain.py │ │ │ ├── train.py │ │ │ └── util │ │ │ │ ├── attention.py │ │ │ │ ├── cmn.py │ │ │ │ ├── data.py │ │ │ │ ├── evaluation.py │ │ │ │ ├── gmf.py │ │ │ │ ├── helper.py │ │ │ │ └── layers.py │ │ └── CMN_our_interface │ │ │ ├── CMN_RecommenderWrapper.py │ │ │ ├── CiteULike │ │ │ └── CiteULikeReader.py │ │ │ ├── Epinions │ │ │ └── EpinionsReader.py │ │ │ └── Pinterest │ │ │ └── PinterestICCVReader.py │ └── WWW │ │ ├── MultiVAE_github │ │ ├── LICENSE │ │ ├── README.md │ │ └── VAE_ML20M_WWW2018.ipynb │ │ ├── MultiVAE_our_interface │ │ ├── EvaluatorUserSubsetWrapper.py │ │ ├── Movielens20M │ │ │ └── Movielens20MReader.py │ │ ├── MultiVAE_RecommenderWrapper.py │ │ ├── MultiVae_Dae.py │ │ ├── NetflixPrize │ │ │ └── NetflixPrizeReader.py │ │ ├── VAE_CF_data_splitter.py │ │ └── split_train_validation_test_VAE_CF.py │ │ ├── NeuMF_github │ │ ├── Dataset.py │ │ ├── GMF.py │ │ ├── LICENSE │ │ ├── MLP.py │ │ ├── NeuMF.py │ │ ├── README.md │ │ └── evaluate.py │ │ └── NeuMF_our_interface │ │ ├── Movielens1M │ │ └── Movielens1MReader.py │ │ ├── NeuMF_RecommenderWrapper.py │ │ └── Pinterest │ │ └── PinterestICCVReader.py ├── CythonCompiler │ ├── compile_script.py │ └── run_compile_subprocess.py ├── DATASET_SPLITS.md ├── DL_Evaluation_TOIS_Additional_material.pdf ├── Data_manager │ ├── AmazonReviewData │ │ ├── AmazonAllBeautyReader.py │ │ ├── AmazonAllCreditCardsReader.py │ │ ├── AmazonAllElectronicsReader.py │ │ ├── AmazonAlternativeRockReader.py │ │ ├── AmazonAmazonCoinsReader.py │ │ ├── AmazonAmazonFashionReader.py │ │ ├── AmazonAmazonFireTVReader.py │ │ ├── AmazonAmazonInstantVideoReader.py │ │ ├── AmazonAppliancesReader.py │ │ ├── AmazonAppsforAndroidReader.py │ │ ├── AmazonAppstoreforAndroidReader.py │ │ ├── AmazonArtsCraftsSewingReader.py │ │ ├── AmazonAutomotiveReader.py │ │ ├── AmazonBabyProductsReader.py │ │ ├── AmazonBabyReader.py │ │ ├── AmazonBeautyReader.py │ │ ├── AmazonBluesReader.py │ │ ├── AmazonBooksReader.py │ │ ├── AmazonBroadwayVocalistsReader.py │ │ ├── AmazonBuyaKindleReader.py │ │ ├── AmazonCDsVinylReader.py │ │ ├── AmazonCameraPhotoReader.py │ │ ├── AmazonCarElectronicsReader.py │ │ ├── AmazonCelebrateyourBirthdaywithNickelodeonReader.py │ │ ├── AmazonCellPhonesAccessoriesReader.py │ │ ├── AmazonChildrensMusicReader.py │ │ ├── AmazonChristianReader.py │ │ ├── AmazonClassicRockReader.py │ │ ├── AmazonClassicalReader.py │ │ ├── AmazonClothingShoesJewelryReader.py │ │ ├── AmazonCollectibleCoinsReader.py │ │ ├── AmazonCollectiblesFineArtReader.py │ │ ├── AmazonComputersReader.py │ │ ├── AmazonCountryReader.py │ │ ├── AmazonDanceElectronicReader.py │ │ ├── AmazonDavisReader.py │ │ ├── AmazonDigitalMusicReader.py │ │ ├── AmazonElectronicsReader.py │ │ ├── AmazonEntertainmentReader.py │ │ ├── AmazonFolkReader.py │ │ ├── AmazonFurnitureDecorReader.py │ │ ├── AmazonGPSNavigationReader.py │ │ ├── AmazonGiftCardsReader.py │ │ ├── AmazonGiftCardsStoreReader.py │ │ ├── AmazonGospelReader.py │ │ ├── AmazonGroceryGourmetFoodReader.py │ │ ├── AmazonHardRockMetalReader.py │ │ ├── AmazonHealthPersonalCareReader.py │ │ ├── AmazonHomeImprovementReader.py │ │ ├── AmazonHomeKitchenReader.py │ │ ├── AmazonIndustrialScientificReader.py │ │ ├── AmazonInternationalReader.py │ │ ├── AmazonJazzReader.py │ │ ├── AmazonKindleStoreReader.py │ │ ├── AmazonKitchenDiningReader.py │ │ ├── AmazonLatinMusicReader.py │ │ ├── AmazonLearningEducationReader.py │ │ ├── AmazonLuxuryBeautyReader.py │ │ ├── AmazonMP3PlayersAccessoriesReader.py │ │ ├── AmazonMagazineSubscriptionsReader.py │ │ ├── AmazonMicrosoftReader.py │ │ ├── AmazonMiscellaneousReader.py │ │ ├── AmazonMoviesTVReader.py │ │ ├── AmazonMusicalInstrumentsReader.py │ │ ├── AmazonNewAgeReader.py │ │ ├── AmazonNickelodeonReader.py │ │ ├── AmazonOfficeProductsReader.py │ │ ├── AmazonOfficeSchoolSuppliesReader.py │ │ ├── AmazonPatioLawnGardenReader.py │ │ ├── AmazonPetSuppliesReader.py │ │ ├── AmazonPopReader.py │ │ ├── AmazonPublishersReader.py │ │ ├── AmazonPurchaseCirclesReader.py │ │ ├── AmazonRBReader.py │ │ ├── AmazonRapHipHopReader.py │ │ ├── AmazonRockReader.py │ │ ├── AmazonSoftwareReader.py │ │ ├── AmazonSportsCollectiblesReader.py │ │ ├── AmazonSportsOutdoorsReader.py │ │ ├── AmazonToolsHomeImprovementReader.py │ │ ├── AmazonToysGamesReader.py │ │ ├── AmazonVideoGamesReader.py │ │ ├── AmazonWineReader.py │ │ └── _AmazonReviewDataReader.py │ ├── Anime │ │ └── AnimeReader.py │ ├── BookCrossing │ │ └── BookCrossingReader.py │ ├── CiaoDVD │ │ └── CiaoDVDReader.py │ ├── DataReader.py │ ├── DataReader_utils.py │ ├── DataSplitter.py │ ├── DataSplitter_global_timestamp.py │ ├── DataSplitter_k_fold_random.py │ ├── DataSplitter_leave_k_out.py │ ├── Dataset.py │ ├── Dating │ │ └── DatingReader.py │ ├── Epinions │ │ └── EpinionsReader.py │ ├── ExampleCSVDataset │ │ └── ExampleCSVDatasetReader.py │ ├── FilmTrust │ │ └── FilmTrustReader.py │ ├── Flixster │ │ └── FlixsterReader.py │ ├── Frappe │ │ └── FrappeReader.py │ ├── Goodreads │ │ └── GoodreadsReader.py │ ├── GoogleLocalReviews │ │ └── GoogleLocalReviewsReader.py │ ├── Gowalla │ │ └── GowallaReader.py │ ├── IncrementalSparseMatrix.py │ ├── IncrementalSparseMatrix_test.py │ ├── Jester2 │ │ └── Jester2Reader.py │ ├── LastFM │ │ └── LastFMReader.py │ ├── MarketBiasAmazon │ │ └── MarketBiasAmazonReader.py │ ├── MarketBiasModCloth │ │ └── MarketBiasModClothReader.py │ ├── MovieTweetings │ │ └── MovieTweetingsReader.py │ ├── Movielens │ │ ├── Movielens100KReader.py │ │ ├── Movielens10MReader.py │ │ ├── Movielens1MReader.py │ │ ├── Movielens20MReader.py │ │ ├── MovielensHetrec2011Reader.py │ │ └── _utils_movielens_parser.py │ ├── NetflixPrize │ │ └── NetflixPrizeReader.py │ ├── Recipes │ │ └── RecipesReader.py │ ├── TagPreprocessing.py │ ├── Utility.py │ ├── Wikilens │ │ └── WikilensReader.py │ ├── YahooMovies │ │ └── YahooMoviesReader.py │ ├── YahooMusic │ │ └── YahooMusicReader.py │ ├── create_all_data_splits.py │ ├── data_consistency_check.py │ ├── load_and_save_data.py │ └── split_functions │ │ ├── split_data_on_global_timestamp.py │ │ ├── split_data_on_timestamp.py │ │ ├── split_train_validation.py │ │ └── split_train_validation_leave_k_out.py ├── EASE_R │ └── EASE_R_Recommender.py ├── Experiment_handler │ ├── Experiment.py │ └── run_experiment.py ├── GCP_README.md ├── GENERATING_RECZILLA_DATA.md ├── GraphBased │ ├── P3alphaRecommender.py │ └── RP3betaRecommender.py ├── KNN │ ├── ItemKNNCBFRecommender.py │ ├── ItemKNNCFRecommender.py │ ├── ItemKNNCustomSimilarityRecommender.py │ ├── ItemKNN_CFCBF_Hybrid_Recommender.py │ ├── UserKNNCBFRecommender.py │ ├── UserKNNCFRecommender.py │ └── UserKNN_CFCBF_Hybrid_Recommender.py ├── LICENSE ├── MatrixFactorization │ ├── Cython │ │ ├── MatrixFactorization_Cython.py │ │ └── MatrixFactorization_Cython_Epoch.pyx │ ├── IALSRecommender.py │ ├── NMFRecommender.py │ └── PureSVDRecommender.py ├── Metafeatures │ ├── Basic.py │ ├── DistributionFeatures.py │ ├── Featurizer.py │ ├── Landmarkers.py │ ├── Metafeatures.csv │ ├── README.md │ ├── fetch_data.sh │ └── utils.py ├── ParameterTuning │ ├── ParameterSpace.py │ ├── RandomSearch.py │ ├── SearchAbstractClass.py │ ├── SearchBayesianSkopt.py │ ├── SearchSingleCase.py │ └── run_parameter_search.py ├── README_Original.md ├── RECZILLA_NOTES.md ├── Recommender_import_list.py ├── ReczillaClassifier │ ├── README.md │ ├── classifier.py │ ├── dataset_families.py │ ├── experiments │ │ ├── cf4cf.py │ │ ├── cunha2018.py │ │ ├── different_metalearners.py │ │ ├── hold_one_out_all.py │ │ ├── meta_perf_vs_num_algs.py │ │ ├── meta_perf_vs_num_feats.py │ │ ├── meta_perf_vs_num_train_datasets.py │ │ └── reczilla.py │ ├── fetch_metadata.sh │ ├── fig.png │ ├── fixed_algs_feats.py │ ├── gen_features.ipynb │ ├── get_alg_feat_selection_data.py │ ├── plots │ │ ├── cunha_comparison.ipynb │ │ ├── generate_plot.py │ │ ├── perf_vs_algs_plot.ipynb │ │ ├── perf_vs_datasets_plot.ipynb │ │ ├── perf_vs_datasets_plot_FINAL.ipynb │ │ ├── perf_vs_feats_plot.ipynb │ │ ├── perf_vs_feats_plot_FINAL.ipynb │ │ ├── plot.py │ │ └── plot_files │ │ │ ├── perf_vs_datasets_plot.pdf │ │ │ └── perf_vs_feats_plot.pdf │ ├── run_reczilla.py │ └── utils.py ├── SLIM_BPR │ └── Cython │ │ ├── SLIM_BPR_Cython.py │ │ └── SLIM_BPR_Cython_Epoch.pyx ├── SLIM_ElasticNet │ └── SLIMElasticNetRecommender.py ├── Slides │ ├── RecSys2019_DeepLearning_Evaluation_Poster.pdf │ └── RecSys2019_DeepLearning_Evaluation_Slides.pdf ├── SurpriseAlgorithms │ └── Wrappers.py ├── Utils │ ├── ResultFolderLoader.py │ ├── assertions_on_data_for_experiments.py │ ├── plot_popularity.py │ ├── print_negative_items_stats.py │ ├── reczilla_utils.py │ ├── seconds_to_biggest_unit.py │ └── update_old_reczilla_results.py ├── algorithm_handler.py ├── dataset_handler.py ├── reczilla_analysis │ ├── process_inbox.py │ └── process_logs.py ├── reczilla_examples │ ├── example_split │ │ ├── data_reader_splitter_class │ │ ├── split_URM_only_warm_users_use_validation_set.zip │ │ ├── split_mappers_only_warm_users_use_validation_set.zip │ │ └── split_parameters_only_warm_users_use_validation_set.zip │ └── read_dataset_split.py ├── requirements.txt ├── requirements_gpu.txt ├── run_IJCAI_17_DELF.py ├── run_IJCAI_17_DMF.py ├── run_IJCAI_18_ConvNCF.py ├── run_IJCAI_18_ConvNCF_CNN_embedding.py ├── run_IJCAI_18_CoupledCF.py ├── run_IJCAI_18_CoupledCF_CNN_embedding.py ├── run_IJCAI_18_NeuRec.py ├── run_IJCAI_19_CFM_CNN_embedding.py ├── run_KDD_15_CollaborativeDL.py ├── run_KDD_17_CollaborativeVAE.py ├── run_KDD_18_MCRec.py ├── run_RecSys_18_SpectralCF.py ├── run_SIGIR_18_CMN.py ├── run_WWW_17_NeuMF.py ├── run_WWW_18_Mult_VAE.py ├── run_compile_all_cython.py └── run_example_usage.py ├── ReczillaModels ├── item_hit_cov.pickle ├── mrr_10.pickle ├── prec_10.pickle └── time_on_train.pickle ├── examples └── random_rating_list.csv ├── img ├── logo.png ├── logo2.png ├── logo3.png └── reczilla_overview.png ├── notebooks ├── README.md ├── alg_feature_selection.ipynb ├── best_alg_correlations.ipynb ├── best_alg_performance_tables.ipynb ├── failed_experiment_analysis.ipynb ├── hyperparameter_transfer_matrix.ipynb ├── metadataset_analysis.ipynb ├── metadataset_analysis_update.ipynb ├── performance_meta_dataset.csv ├── preapre_metadataset_v0.ipynb ├── preapre_metadataset_v1.ipynb ├── preapre_metadataset_v2.ipynb ├── preapre_metadataset_v3a.ipynb ├── preapre_metadataset_v4.ipynb ├── preapre_metadataset_workshop.ipynb ├── reczilla_results.ipynb ├── reczilla_results_metafeatures.ipynb ├── results_16Mar22.csv ├── tables │ ├── runtime_table.csv │ ├── runtime_table.tex │ ├── table_1_test_metric_F1_cut_1.csv │ ├── table_1_test_metric_F1_cut_1.tex │ ├── table_1_test_metric_F1_cut_10.csv │ ├── table_1_test_metric_F1_cut_10.tex │ ├── table_1_test_metric_F1_cut_2.csv │ ├── table_1_test_metric_F1_cut_2.tex │ ├── table_1_test_metric_F1_cut_5.csv │ ├── table_1_test_metric_F1_cut_5.tex │ ├── table_1_test_metric_F1_cut_50.csv │ ├── table_1_test_metric_F1_cut_50.tex │ ├── table_1_test_metric_HIT_RATE_cut_1.csv │ ├── table_1_test_metric_HIT_RATE_cut_1.tex │ ├── table_1_test_metric_HIT_RATE_cut_10.csv │ ├── table_1_test_metric_HIT_RATE_cut_10.tex │ ├── table_1_test_metric_HIT_RATE_cut_2.csv │ ├── table_1_test_metric_HIT_RATE_cut_2.tex │ ├── table_1_test_metric_HIT_RATE_cut_5.csv │ ├── table_1_test_metric_HIT_RATE_cut_5.tex │ ├── table_1_test_metric_HIT_RATE_cut_50.csv │ ├── table_1_test_metric_HIT_RATE_cut_50.tex │ ├── table_1_test_metric_MAP_cut_1.csv │ ├── table_1_test_metric_MAP_cut_1.tex │ ├── table_1_test_metric_MAP_cut_10.csv │ ├── table_1_test_metric_MAP_cut_10.tex │ ├── table_1_test_metric_MAP_cut_2.csv │ ├── table_1_test_metric_MAP_cut_2.tex │ ├── table_1_test_metric_MAP_cut_5.csv │ ├── table_1_test_metric_MAP_cut_5.tex │ ├── table_1_test_metric_MAP_cut_50.csv │ ├── table_1_test_metric_MAP_cut_50.tex │ ├── table_1_test_metric_NDCG_cut_1.csv │ ├── table_1_test_metric_NDCG_cut_1.tex │ ├── table_1_test_metric_NDCG_cut_10.csv │ ├── table_1_test_metric_NDCG_cut_10.tex │ ├── table_1_test_metric_NDCG_cut_2.csv │ ├── table_1_test_metric_NDCG_cut_2.tex │ ├── table_1_test_metric_NDCG_cut_5.csv │ ├── table_1_test_metric_NDCG_cut_5.tex │ ├── table_1_test_metric_NDCG_cut_50.csv │ ├── table_1_test_metric_NDCG_cut_50.tex │ ├── table_1_test_metric_PRECISION_cut_1.csv │ ├── table_1_test_metric_PRECISION_cut_1.tex │ ├── table_1_test_metric_PRECISION_cut_10.csv │ ├── table_1_test_metric_PRECISION_cut_10.tex │ ├── table_1_test_metric_PRECISION_cut_2.csv │ ├── table_1_test_metric_PRECISION_cut_2.tex │ ├── table_1_test_metric_PRECISION_cut_5.csv │ ├── table_1_test_metric_PRECISION_cut_5.tex │ ├── table_1_test_metric_PRECISION_cut_50.csv │ ├── table_1_test_metric_PRECISION_cut_50.tex │ ├── table_2a.csv │ ├── table_2a.tex │ ├── table_2b.csv │ ├── table_2b.tex │ ├── table_3_final.csv │ └── table_3_final.tex ├── test_KNN_pipeline.ipynb └── triage_exceptions.ipynb ├── requirements.txt ├── run_reczilla_inference.sh ├── scripts ├── alg_list.txt ├── dataset_list.txt ├── neural_methods │ ├── manually_run_gpu_experiment.sh │ ├── run_single_alg.sh │ └── test_neural_methods.sh ├── neurips_experiments │ ├── algs_a_llo.sh │ ├── algs_b_llo.sh │ ├── algs_c_llo.sh │ ├── algs_d_llo.sh │ ├── knn_llo.sh │ ├── knn_llo_b.sh │ └── v5_dataset_list.sh ├── run_all_experiments.sh ├── run_experiment_on_instance.sh ├── run_random_experiment.sh ├── test_experiment.sh ├── tests │ ├── two_alg_all_datasets.sh │ └── two_datasets_all_algs.sh ├── update_reczilla_image.sh ├── utils.sh └── workshop_experiments │ ├── full_experiment_algs_a.sh │ ├── full_experiment_algs_b.sh │ ├── full_experiment_algs_c.sh │ ├── full_experiment_algs_d.sh │ └── full_experiment_knn.sh └── train_reczilla_models.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | *.gz filter=lfs diff=lfs merge=lfs -text 2 | *.data filter=lfs diff=lfs merge=lfs -text 3 | *.umtm_5_1 filter=lfs diff=lfs merge=lfs -text 4 | *.csv filter=lfs diff=lfs merge=lfs -text 5 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Base/BaseTempFolder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 19/06/2019 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from Base.Recommender_utils import get_unique_temp_folder 10 | import os, shutil 11 | 12 | 13 | class BaseTempFolder(object): 14 | 15 | def __init__(self): 16 | super(BaseTempFolder, self).__init__() 17 | 18 | self.DEFAULT_TEMP_FILE_FOLDER = './result_experiments/__Temp_{}_{}/'.format(self.RECOMMENDER_NAME, os.getpid()) 19 | 20 | 21 | def _get_unique_temp_folder(self, input_temp_file_folder = None): 22 | 23 | if input_temp_file_folder is None: 24 | print("{}: Using default Temp folder '{}'".format(self.RECOMMENDER_NAME, self.DEFAULT_TEMP_FILE_FOLDER)) 25 | self._use_default_temp_folder = True 26 | output_temp_file_folder = get_unique_temp_folder(self.DEFAULT_TEMP_FILE_FOLDER) 27 | else: 28 | print("{}: Using Temp folder '{}'".format(self.RECOMMENDER_NAME, input_temp_file_folder)) 29 | self._use_default_temp_folder = False 30 | output_temp_file_folder = get_unique_temp_folder(input_temp_file_folder) 31 | 32 | if not os.path.isdir(output_temp_file_folder): 33 | os.makedirs(output_temp_file_folder) 34 | 35 | 36 | return output_temp_file_folder 37 | 38 | 39 | 40 | def _clean_temp_folder(self, temp_file_folder): 41 | """ 42 | Clean temporary folder only if the default one 43 | :return: 44 | """ 45 | 46 | if self._use_default_temp_folder: 47 | print("{}: Cleaning temporary files from '{}'".format(self.RECOMMENDER_NAME, temp_file_folder)) 48 | shutil.rmtree(temp_file_folder, ignore_errors=True) 49 | 50 | else: 51 | print("{}: Maintaining temporary files due to a custom temp folder being selected".format(self.RECOMMENDER_NAME)) 52 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Base/Recommender_utils_Test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 30/09/17 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from Base.Recommender_utils import similarityMatrixTopK 10 | 11 | import numpy as np 12 | import scipy.sparse as sps 13 | import unittest 14 | 15 | 16 | class MyTestCase(unittest.TestCase): 17 | 18 | def test_similarityMatrixTopK_denseToDense(self): 19 | 20 | numRows = 100 21 | 22 | TopK = 20 23 | 24 | dense_input = np.random.random((numRows, numRows)) 25 | dense_output = similarityMatrixTopK(dense_input, k=TopK) 26 | 27 | numExpectedNonZeroCells = TopK*numRows 28 | 29 | numNonZeroCells = np.sum(dense_output!=0) 30 | 31 | self.assertEqual(numExpectedNonZeroCells, numNonZeroCells, "DenseToDense incorrect") 32 | 33 | 34 | def test_similarityMatrixTopK_sparseToSparse(self): 35 | 36 | numRows = 20 37 | 38 | TopK = 5 39 | 40 | dense_input = np.random.random((numRows, numRows)) 41 | 42 | topk_on_dense_input = similarityMatrixTopK(dense_input, k=TopK) 43 | 44 | sparse_input = sps.csc_matrix(dense_input) 45 | topk_on_sparse_input = similarityMatrixTopK(sparse_input, k=TopK) 46 | 47 | topk_on_dense_input = topk_on_dense_input.toarray() 48 | topk_on_sparse_input = topk_on_sparse_input.toarray() 49 | 50 | self.assertTrue(np.allclose(topk_on_dense_input, topk_on_sparse_input), "sparseToSparse CSC incorrect") 51 | 52 | 53 | if __name__ == '__main__': 54 | 55 | unittest.main() 56 | 57 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/CNN_on_embeddings/IJCAI/CFM_github/README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Factorization Machines (CFM) 2 | This is the Tensorflow implementation for the paper: 3 | 4 | >Xin Xin, Bo Chen, Xiangnan He, Dong Wang, Yue Ding and Joemon Jose. CFM: Convolutional Factorization Machines for Context-Aware Recommendation. In IJCAI, 2019. 5 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/CNN_on_embeddings/IJCAI/CFM_our_interface/README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Factorization Machines (CFM) 2 | This is the Tensorflow implementation for the paper: 3 | 4 | >Xin Xin, Bo Chen, Xiangnan He, Dong Wang, Yue Ding and Joemon Jose. CFM: Convolutional Factorization Machines for Context-Aware Recommendation. In IJCAI, 2019. 5 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/Data/yelp.test.negative.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eec3d05680b97b7aa30def44828d9a5f12d3fe5c54ae5528205a04499ff980a0 3 | size 64424016 4 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/Data/yelp.test.rating.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bd370ec69bda7a4ccc05ed26c71ed449e6e869b71cb5213c1257f267d417af93 3 | size 203732 4 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/Data/yelp.train.rating.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ec6e0f98b44d0df28b30ad7a1cafb15baee6cb4bdaddf875f5037a1277278700 3 | size 4665357 4 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/README.md: -------------------------------------------------------------------------------- 1 | # Outer Product-based Neural Collaborative Filtering 2 | 3 | Convolutional Neural Collaborative Filtering performs well based on outer product of user and item embeddings. This is our official implementation for the paper: 4 | 5 | Xiangnan He, Xiaoyu Du, Xiang Wang, Feng Tian, Jinhui Tang, Tat-Seng Chua, **Outer Product-based Neural Collaborative Filtering**, In Proceedings of IJCAI'18. 6 | 7 | If you use the codes, please cite our paper . Thanks! 8 | 9 | # Requirements 10 | 11 | - Tensorflow 1.7 12 | - numpy, scipy 13 | 14 | # Quick Start 15 | 16 | ![figure.png](./figure.png) 17 | 18 | 0. decompress the data files. 19 | ``` 20 | cd Data 21 | gunzip * 22 | ``` 23 | 24 | 1. Pretrain the embeddings using MF_BPR with 25 | 26 | ``` 27 | python MF_BPR.py 28 | ``` 29 | 30 | 2. Train ConvNCF with pretrained embeddings 31 | 32 | ``` 33 | python ConvNCF.py --pretrain=1 34 | ``` 35 | 36 | 37 | # Dataset 38 | 39 | 40 | We provide the compressed dataset Yelp(yelp) in Data/ 41 | 42 | ### train.rating: 43 | 44 | Train file. 45 | 46 | Each Line is a training instance: 47 | 48 | `userID\t itemID\t rating\t timestamp (if have)` 49 | 50 | ### test.rating: 51 | 52 | Test file (positive instances). 53 | Each Line is a testing instance: 54 | 55 | `userID\t itemID\t rating\t timestamp (if have)` 56 | 57 | ### test.negative 58 | 59 | Test file (negative instances). 60 | Each line corresponds to the line of test.rating, containing 999 negative samples. 61 | Each line is in the format: 62 | 63 | `(userID,itemID)\t negativeItemID1\t negativeItemID2 ...` 64 | 65 | # Files 66 | 67 | - Data. Training and testing data. 68 | - yelp.train.rating. Rating of training data. 69 | - yelp.test.rating. Rating of testing data. 70 | - yelp.test.negative. 1000 testing samples for each user. (0,32) means this row is for user 0 and the positive test item is 32. 71 | - Dataset.py. Module preprocessing data. 72 | - saver.py. Module saving parameters. 73 | - MF_BPR.py. MF model with BPR loss. 74 | - ConvNCF.py. Our model. 75 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/figure.png -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/ConvNCF_github/saver.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Saver(object): 4 | def save(self, model, sess): 5 | print('save no implemented') 6 | 7 | class MFSaver(Saver): 8 | def __init__(self): 9 | self.prefix = None 10 | 11 | def setPrefix(self, prefix = None): 12 | self.prefix = prefix 13 | 14 | def save(self, model, sess): 15 | if self.prefix == None: 16 | print ("prefix should be set by GMFSaver.setPrefix(prefix)") 17 | return 18 | 19 | params = sess.run([model.embedding_P, model.embedding_Q]) 20 | print ('saving model.embedding_P', params[0].shape, ', model.embedding_Q', params[1].shape,\ 21 | ' to', self.prefix, "_*.txt") 22 | 23 | f = open(self.prefix + "_P.txt", 'w') 24 | np.savetxt(f, params[0]) 25 | f.close() 26 | 27 | f = open(self.prefix + "_Q.txt", 'w') 28 | np.savetxt(f, params[1]) 29 | f.close() 30 | 31 | 32 | class GMFSaver(Saver): 33 | def __init__(self): 34 | self.prefix = None 35 | 36 | def setPrefix(self, prefix = None): 37 | self.prefix = prefix -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/DELF_EF.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -u main_attention.py --layers [32,16,8]; 3 | python -u main_attention.py --layers [64,32,16]; 4 | python -u main_attention.py --layers [128,64,32]; 5 | python -u main_attention.py --layers [256,128,64]; 6 | python -u main_attention.py --layers [32,16,8] --dataset Amusic-paper; 7 | python -u main_attention.py --layers [64,32,16] --dataset Amusic-paper; 8 | python -u main_attention.py --layers [128,64,32] --dataset Amusic-paper; 9 | python -u main_attention.py --layers [256,128,64] --dataset Amusic-paper; 10 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/DELF_MLP.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -u main_attention.py --layers [32,16,8]; 3 | python -u main_attention.py --layers [64,32,16]; 4 | python -u main_attention.py --layers [128,64,32]; 5 | python -u main_attention.py --layers [256,128,64]; 6 | python -u main_attention.py --layers [32,16,8] --dataset Amusic-paper; 7 | python -u main_attention.py --layers [64,32,16] --dataset Amusic-paper; 8 | python -u main_attention.py --layers [128,64,32] --dataset Amusic-paper; 9 | python -u main_attention.py --layers [256,128,64] --dataset Amusic-paper; 10 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/DMF.sh: -------------------------------------------------------------------------------- 1 | python -u main_nsvd.py --layers [32,16,8] --dataset ml-1m ; 2 | python -u main_nsvd.py --layers [64,32,16] --dataset ml-1m ; 3 | python -u main_nsvd.py --layers [128,64,32] --dataset ml-1m ; 4 | python -u main_nsvd.py --layers [256,128,64] --dataset ml-1m ; 5 | python -u main_nsvd.py --layers [32,16,8] --dataset Amusic-paper; 6 | python -u main_nsvd.py --layers [64,32,16] --dataset Amusic-paper; 7 | python -u main_nsvd.py --layers [128,64,32] --dataset Amusic-paper; 8 | python -u main_nsvd.py --layers [256,128,64] --dataset Amusic-paper; 9 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/MLP.sh: -------------------------------------------------------------------------------- 1 | #python -u main.py --layers [32,16,8] --dataset ml-1m --MLP 1; 2 | #python -u main.py --layers [64,32,16] --dataset ml-1m --MLP 1; 3 | #python -u main.py --layers [128,64,32] --dataset ml-1m --MLP 1; 4 | #python -u main.py --layers [256,128,64] --dataset ml-1m --MLP 1; 5 | python -u main.py --layers [32,16,8] --dataset Amusic-paper --MLP 1; 6 | python -u main.py --layers [64,32,16] --dataset Amusic-paper --MLP 1; 7 | python -u main.py --layers [128,64,32] --dataset Amusic-paper --MLP 1; 8 | python -u main.py --layers [256,128,64] --dataset Amusic-paper --MLP 1; 9 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/Model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/Model/__init__.py -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/DELF_original/NeuMF.sh: -------------------------------------------------------------------------------- 1 | #python -u main.py --layers [32,16,8] --dataset ml-1m --MLP 0; 2 | #python -u main.py --layers [64,32,16] --dataset ml-1m --MLP 0; 3 | #python -u main.py --layers [128,64,32] --dataset ml-1m --MLP 0; 4 | #python -u main.py --layers [256,128,64] --dataset ml-1m --MLP 0; 5 | python -u main.py --layers [32,16,8] --dataset Amusic-paper --MLP 0; 6 | python -u main.py --layers [64,32,16] --dataset Amusic-paper --MLP 0; 7 | python -u main.py --layers [128,64,32] --dataset Amusic-paper --MLP 0; 8 | python -u main.py --layers [256,128,64] --dataset Amusic-paper --MLP 0; 9 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/NeuRec_github/README.md: -------------------------------------------------------------------------------- 1 | # NeuRec 2 | 3 | If you use this code, please cite the following IJCAI paper: 4 | 5 | ``` 6 | @article{zhang2018neurec, 7 | title={NeuRec: On Nonlinear Transformation for Personalized Ranking}, 8 | author={Zhang, Shuai and Yao, Lina and Sun, Aixin and Wang, Sen and Long, Guodong and Dong, Manqing}, 9 | journal={arXiv preprint arXiv:1805.03002}, 10 | year={2018} 11 | } 12 | ``` 13 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/IJCAI/NeuRec_github/eval.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def precision_recall_ndcg_at_k(k, rankedlist, test_matrix): 5 | idcg_k = 0 6 | dcg_k = 0 7 | n_k = k if len(test_matrix) > k else len(test_matrix) 8 | for i in range(n_k): 9 | idcg_k += 1 / math.log(i + 2, 2) 10 | 11 | b1 = rankedlist 12 | b2 = test_matrix 13 | s2 = set(b2) 14 | hits = [(idx, val) for idx, val in enumerate(b1) if val in s2] 15 | count = len(hits) 16 | 17 | for c in range(count): 18 | dcg_k += 1 / math.log(hits[c][0] + 2, 2) 19 | 20 | return float(count / k), float(count / len(test_matrix)), float(dcg_k / idcg_k) 21 | 22 | 23 | def map_mrr_ndcg(rankedlist, test_matrix): 24 | ap = 0 25 | map = 0 26 | dcg = 0 27 | idcg = 0 28 | mrr = 0 29 | for i in range(len(test_matrix)): 30 | idcg += 1 / math.log(i + 2, 2) 31 | 32 | b1 = rankedlist 33 | b2 = test_matrix 34 | s2 = set(b2) 35 | hits = [(idx, val) for idx, val in enumerate(b1) if val in s2] 36 | count = len(hits) 37 | 38 | for c in range(count): 39 | ap += (c + 1) / (hits[c][0] + 1) 40 | dcg += 1 / math.log(hits[c][0] + 2, 2) 41 | 42 | if count != 0: 43 | mrr = 1 / (hits[0][0] + 1) 44 | 45 | if count != 0: 46 | map = ap / count 47 | 48 | return map, mrr, float(dcg / idcg) 49 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/candidate_lrates.m: -------------------------------------------------------------------------------- 1 | costs = []; 2 | cand_lrates = []; 3 | cand_lrate = base_lrate; 4 | for s=1:(max_iter_up + 1) 5 | cand_lrates = [cand_lrates cand_lrate]; 6 | cand_lrate = cand_lrate * exp_up; 7 | end 8 | cand_lrate = base_lrate * exp_down; 9 | for s=1:(max_iter_down) 10 | cand_lrates = [cand_lrates cand_lrate]; 11 | cand_lrate = cand_lrate * exp_down; 12 | end 13 | 14 | 15 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/candidate_moments.m: -------------------------------------------------------------------------------- 1 | costs = []; 2 | cand_moments = []; 3 | cand_moment = base_moment; 4 | for s=1:(max_iter_up + 1) 5 | cand_moments = [cand_moments cand_moment]; 6 | cand_moment = cand_moment * adaptive_moment_exp_up; 7 | end 8 | cand_moment = base_moment * adaptive_moment_exp_down; 9 | for s=1:(max_iter_down) 10 | cand_moments = [cand_moments cand_moment]; 11 | cand_moment = cand_moment * adaptive_moment_exp_down; 12 | end 13 | 14 | 15 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/.ctr.cpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/.ctr.cpp.swp -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/Makefile: -------------------------------------------------------------------------------- 1 | CC = g++ -Wall 2 | #CC = g++ -ansi -Wall -pedantic 3 | #CFLAGS = -g -Wall -O3 -ffast-math -DHAVE_INLINE -DGSL_RANGE_CHECK_OFF 4 | # CFLAGS = -g -Wall 5 | LDFLAGS = -lgsl -lm -lgslcblas 6 | 7 | GSL_INCLUDE = /usr/local/gsl/include 8 | GSL_LIB = /usr/local/gsl/lib 9 | 10 | LSOURCE = main.cpp utils.cpp ctr.cpp data.cpp 11 | LHEADER = utils.h ctr.h data.h 12 | 13 | ctr: $(LSOURCE) $(HEADER) 14 | $(CC) -I$(GSL_INCLUDE) -L$(GSL_LIB) $(LSOURCE) -o $@ $(LDFLAGS) 15 | 16 | ctr-d: $(LSOURCE) $(HEADER) 17 | $(CC) -g -I$(GSL_INCLUDE) -L$(GSL_LIB) $(LSOURCE) -o $@ $(LDFLAGS) 18 | 19 | clean: 20 | -rm -f *.o ctr 21 | clean-d: 22 | -rm -f *.o ctr-d 23 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/README: -------------------------------------------------------------------------------- 1 | This is the code for C++ part of CDL for updates of U and V by Hao Wang. 2 | The code is based on the CTR code of Chong. 3 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/ctr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/ctr -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "data.h" 4 | 5 | c_data::c_data() { 6 | } 7 | 8 | c_data::~c_data() { 9 | for (size_t i = 0; i < m_vec_data.size(); i ++) { 10 | int* ids = m_vec_data[i]; 11 | if (ids != NULL) delete [] ids; 12 | } 13 | m_vec_data.clear(); 14 | m_vec_len.clear(); 15 | } 16 | 17 | void c_data::read_data(const char * data_filename, int OFFSET) { 18 | 19 | int length = 0, n = 0, id = 0, total = 0; 20 | 21 | FILE * fileptr; 22 | fileptr = fopen(data_filename, "r"); 23 | 24 | while ((fscanf(fileptr, "%10d", &length) != EOF)) { 25 | int * ids = NULL; 26 | if (length > 0) { 27 | ids = new int[length]; 28 | for (n = 0; n < length; n++) { 29 | fscanf(fileptr, "%10d", &id); 30 | ids[n] = id - OFFSET; 31 | } 32 | } 33 | m_vec_data.push_back(ids); 34 | m_vec_len.push_back(length); 35 | total += length; 36 | } 37 | fclose(fileptr); 38 | printf("read %d vectors with %d entries ...\n", (int)m_vec_len.size(), total); 39 | } 40 | 41 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/ctr-part-release/data.h: -------------------------------------------------------------------------------- 1 | // class for reading the sparse matrix data 2 | // for both user matrix and item matrix 3 | // user matrix: 4 | // number_of_items item1 item2 ... 5 | // item matrix: 6 | // number_of_users user1 user2 ... 7 | 8 | #ifndef DATA_H 9 | #define DATA_H 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | class c_data { 16 | public: 17 | c_data(); 18 | ~c_data(); 19 | void read_data(const char * data_filename, int OFFSET=0); 20 | public: 21 | vector m_vec_data; 22 | vector m_vec_len; 23 | }; 24 | 25 | #endif // DATA_H 26 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/dae_get_hidden.m: -------------------------------------------------------------------------------- 1 | % dae_get_hidden 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | %This program is free software; you can redistribute it and/or 5 | %modify it under the terms of the GNU General Public License 6 | %as published by the Free Software Foundation; either version 2 7 | %of the License, or (at your option) any later version. 8 | % 9 | %This program is distributed in the hope that it will be useful, 10 | %but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | %MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | %GNU General Public License for more details. 13 | % 14 | %You should have received a copy of the GNU General Public License 15 | %along with this program; if not, write to the Free Software 16 | %Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | function [h] = dae_get_hidden(x0, D, target_sparsity) 19 | 20 | if nargin < 3 21 | target_sparsity = 0; 22 | end 23 | 24 | h = bsxfun(@plus, x0 * D.W, D.hbias'); 25 | 26 | if D.hidden.binary == 1 27 | h = sigmoid(h, D.hidden.use_tanh); 28 | 29 | if target_sparsity > 0 && D.hidden.use_tanh == 0 30 | avg_acts = mean(h, 1); 31 | diff_acts = max(avg_acts - (1 - target_sparsity), 0); 32 | h = min(max(bsxfun(@minus, h, diff_acts), 0), 1); 33 | end 34 | end 35 | 36 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/dsigmoid.m: -------------------------------------------------------------------------------- 1 | function [y] = dsigmoid(x, use_tanh) 2 | 3 | if nargin < 2 4 | use_tanh = 0; 5 | end 6 | 7 | switch use_tanh 8 | case 0 9 | y = x .* (1 - x); 10 | case 1 11 | y = 1 - x.^2; 12 | case 2 13 | y = x; 14 | y(x > 0) = 1; 15 | y(x <= 0) = 0; 16 | end 17 | 18 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/logdiff.m: -------------------------------------------------------------------------------- 1 | % 2 | % The original code was downloaed from 3 | % Ruslan Salakhutdinov\s hompeage: 4 | % http://http://www.mit.edu/~rsalakhu/ 5 | % 6 | function ls = logdiff(xx,dim) 7 | % ls = logsum(x,dim) 8 | % 9 | % returns the log of diff of logs 10 | % similar to logsum.m function 11 | 12 | if(length(xx(:))==1) ls=xx; return; end 13 | 14 | xdims=size(xx); 15 | if(nargin<2) 16 | dim=find(xdims>1); 17 | end 18 | 19 | alpha = max(xx,[],dim)-log(realmax)/2; 20 | repdims=ones(size(xdims)); repdims(dim)=xdims(dim); 21 | ls = alpha+log(diff(exp(xx-repmat(alpha,repdims)),dim)); 22 | 23 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/logsum.m: -------------------------------------------------------------------------------- 1 | % 2 | % The original code was downloaed from 3 | % Ruslan Salakhutdinov\s hompeage: 4 | % http://http://www.mit.edu/~rsalakhu/ 5 | % 6 | function ls = logsum(xx,dim) 7 | % ls = logsum(x,dim) 8 | % 9 | % returns the log of sum of logs 10 | % computes ls = log(sum(exp(x),dim)) 11 | % but in a way that tries to avoid underflow/overflow 12 | % 13 | % basic idea: shift before exp and reshift back 14 | % log(sum(exp(x))) = alpha + log(sum(exp(x-alpha))); 15 | % 16 | % This program was originally written by Sam Roweis 17 | 18 | if(length(xx(:))==1) ls=xx; return; end 19 | 20 | xdims=size(xx); 21 | if(nargin<2) 22 | dim=find(xdims>1); 23 | end 24 | 25 | alpha = max(xx,[],dim)-log(realmax)/2; 26 | repdims=ones(size(xdims)); 27 | repdims(dim)=xdims(dim); 28 | ls = alpha+log(sum(exp(xx-repmat(alpha,repdims)),dim)); 29 | 30 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/print_n_updates.m: -------------------------------------------------------------------------------- 1 | % print_n_updates - example hook function for each update 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | % This program is free software; you can redistribute it and/or 5 | % modify it under the terms of the GNU General Public License 6 | % as published by the Free Software Foundation; either version 2 7 | % of the License, or (at your option) any later version. 8 | % 9 | % This program is distributed in the hope that it will be useful, 10 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | % GNU General Public License for more details. 13 | % 14 | % You should have received a copy of the GNU General Public License 15 | % along with this program; if not, write to the Free Software 16 | % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | 19 | function err = print_n_updates (R, args) 20 | 21 | if R.verbose == 1 22 | fprintf(2, '# updates: %d\n', R.iteration.n_updates); 23 | end 24 | 25 | err = 0; 26 | 27 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/save_intermediate.m: -------------------------------------------------------------------------------- 1 | % save_intermediate - example hook function for each epoch 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | % This program is free software; you can redistribute it and/or 5 | % modify it under the terms of the GNU General Public License 6 | % as published by the Free Software Foundation; either version 2 7 | % of the License, or (at your option) any later version. 8 | % 9 | % This program is distributed in the hope that it will be useful, 10 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | % GNU General Public License for more details. 13 | % 14 | % You should have received a copy of the GNU General Public License 15 | % along with this program; if not, write to the Free Software 16 | % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | 19 | function err = save_intermediate (R, args) 20 | 21 | % args is a cell array 22 | 23 | % -1 signals the end of training 24 | err = 0; 25 | 26 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/sdae_get_hidden.m: -------------------------------------------------------------------------------- 1 | % sdae_get_hidden 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | %This program is free software; you can redistribute it and/or 5 | %modify it under the terms of the GNU General Public License 6 | %as published by the Free Software Foundation; either version 2 7 | %of the License, or (at your option) any later version. 8 | % 9 | %This program is distributed in the hope that it will be useful, 10 | %but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | %MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | %GNU General Public License for more details. 13 | % 14 | %You should have received a copy of the GNU General Public License 15 | %along with this program; if not, write to the Free Software 16 | %Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | function [h_mf] = sdae_get_hidden(my, mask_output, x0, S, target_sparsity) 19 | 20 | if nargin < 5 21 | target_sparsity = 0; 22 | end 23 | 24 | layers = S.structure.layers; 25 | n_layers = length(layers); 26 | 27 | h_mf = x0; 28 | 29 | for l = 2:n_layers 30 | h_mf = bsxfun(@plus, h_mf * S.W{l-1}, S.biases{l}'); 31 | if my.dropout~=0 && l~=n_layers 32 | % recover from dropout 33 | h_mf = h_mf.*(1-my.dropout); 34 | end 35 | if my.dropout~=0 && l==n_layers && mask_output 36 | % recover from dropout 37 | h_mf = h_mf.*(1-my.dropout); 38 | end 39 | 40 | if l < n_layers || S.bottleneck.binary 41 | h_mf = sigmoid(h_mf, S.hidden.use_tanh); 42 | end 43 | end 44 | 45 | if S.bottleneck.binary 46 | if target_sparsity > 0 47 | avg_acts = mean(h_mf, 1); 48 | diff_acts = max(avg_acts - (1 - target_sparsity), 0); 49 | h_mf = min(max(bsxfun(@minus, h_mf, diff_acts), 0), 1); 50 | end 51 | end 52 | 53 | 54 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/sdae_get_visible.m: -------------------------------------------------------------------------------- 1 | % sdae_get_visible 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | %This program is free software; you can redistribute it and/or 5 | %modify it under the terms of the GNU General Public License 6 | %as published by the Free Software Foundation; either version 2 7 | %of the License, or (at your option) any later version. 8 | % 9 | %This program is distributed in the hope that it will be useful, 10 | %but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | %MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | %GNU General Public License for more details. 13 | % 14 | %You should have received a copy of the GNU General Public License 15 | %along with this program; if not, write to the Free Software 16 | %Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | function [x_mf] = sdae_get_visible(my, h0, S) 19 | 20 | layers = S.structure.layers; 21 | n_layers = length(layers); 22 | 23 | x_mf = h0; 24 | 25 | for l = n_layers-1:-1:1 26 | x_mf = bsxfun(@plus, x_mf * S.W{l}', S.biases{l}'); 27 | if my.dropout~=0 && l~=1 28 | % recover from dropout 29 | x_mf = x_mf.*(1-my.dropout); 30 | end 31 | 32 | if l > 1 || S.data.binary 33 | x_mf = sigmoid(x_mf, S.hidden.use_tanh); 34 | end 35 | end 36 | 37 | 38 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/sigmoid.m: -------------------------------------------------------------------------------- 1 | % simple wrapper for sigmoid function 2 | function [y] = sigmoid(x, use_tanh) 3 | 4 | if nargin < 2 5 | use_tanh = 0; 6 | end 7 | 8 | switch use_tanh 9 | case 0 10 | y = 1./(1 + exp(-x)); 11 | case 1 12 | y = tanh(x); 13 | case 2 14 | y = max (x, 0); 15 | end 16 | 17 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/softmax.m: -------------------------------------------------------------------------------- 1 | % simple wrapper for softmax function 2 | function [y] = softmax(x) 3 | 4 | logZ = logsum(x, 2); 5 | y = exp(bsxfun(@minus, x, logZ)); 6 | 7 | 8 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeDL_github_matlab/train_rbm.m: -------------------------------------------------------------------------------- 1 | % rbm - training restricted Boltzmann machine using Gibbs sampling 2 | % Copyright (C) 2011 KyungHyun Cho, Tapani Raiko, Alexander Ilin 3 | % 4 | % This program is free software; you can redistribute it and/or 5 | % modify it under the terms of the GNU General Public License 6 | % as published by the Free Software Foundation; either version 2 7 | % of the License, or (at your option) any later version. 8 | % 9 | % This program is distributed in the hope that it will be useful, 10 | % but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | % GNU General Public License for more details. 13 | % 14 | % You should have received a copy of the GNU General Public License 15 | % along with this program; if not, write to the Free Software 16 | % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 | % 18 | function [R_out] = train_rbm (R, patches); 19 | 20 | n_visible = R.structure.n_visible; 21 | n_hidden = R.structure.n_hidden; 22 | 23 | if length(R.vbias) ~= n_visible 24 | warning ('Reinitializing visible biases'); 25 | R.vbias_init = zeros(n_visible, 1); 26 | R.vbias = R.vbias_init; 27 | end 28 | 29 | if length(R.hbias) ~= n_hidden 30 | warning ('Reinitializing hidden biases'); 31 | R.hbias_init = zeros(n_hidden, 1); 32 | R.hbias = R.hbias_init; 33 | end 34 | 35 | if sum( (size(R.W) - [n_visible n_hidden]).^2 ) ~= 0 36 | warning ('Reinitializing weights'); 37 | R.W_init = R.learning.weight_scale * 2 * (rand(n_visible, n_hidden) - 0.5); 38 | R.W = R.W_init; 39 | end 40 | 41 | % TODO: Merge rbm_pt.m and grbm_pt.m 42 | if R.parallel_tempering.use == 1 43 | if R.data.binary == 1 44 | R_out = rbm_pt(R, patches); 45 | else 46 | R_out = grbm_pt(R, patches); 47 | end 48 | else 49 | R_out = rbm(R, patches); 50 | end 51 | 52 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Xiaopeng LI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/README.md: -------------------------------------------------------------------------------- 1 | # Collaborative Variational Autoencoder 2 | This code is associated with the following paper: 3 | 4 | Xiaopeng Li and James She. Collaborative Variational Autoencoder for Recommder Systems. ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2017 (KDD'17). 5 | 6 | ### Prerequisities 7 | * The code is written in Python 2.7. 8 | * To run this code you need to have TensorFlow installed. The code is tested with TensorFlow 0.12.1. 9 | 10 | ### Usage 11 | The program consists of two parts: pre-train in VAE manner and finetuning in CVAE manner. The core code files are in lib/ directory and the test code files are test_vae.py and test_cvae.py. To run the program, you should first run test_vae.py to pre-train the weights of inference network and generation network. The pre-trained weights will be saved under model/ directory. Then test_cvae.py can be run for the CVAE model. And the model will be saved also under model/ directory. 12 | 13 | ### Note 14 | 15 | #### Reproduce 16 | For those who want to exactly reproduce the results in the paper, please note that the author used this evaluation code: lib/evaluatePaper.m to compute the recall for all methods, including baseline methods. However, the author also found out that it may not be exactly correct. Therefore, the author put a correct (as the author thought) evaluation code in lib/evaluationCorrect.m . The author found that the relative performance among all methods remain the same, however the absolute recall rates shift down a little. The following is the figure with the correct evaluation code: 17 | 18 | 19 | 20 | If you want to compare with the model directly using the absolute recall rate, this number should be fair to compare with. However, the author recommends that you should put all baseline methods under the same setting with the same evaluation code for fair comparison. 21 | 22 | #### citeulike-t experiment 23 | The citeulike-t dataset is added in `data/citeulike-t`, and the code for experiment is added in `citeulike-t/`. -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/citeulike-t/test_cvae-cf10.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import h5py 3 | sys.path.append("..") 4 | from lib.cvae import * 5 | import numpy as np 6 | import tensorflow as tf 7 | import scipy.io 8 | from lib.utils import * 9 | 10 | np.random.seed(0) 11 | tf.set_random_seed(0) 12 | init_logging("cvae-cf10.log") 13 | 14 | def load_cvae_data(): 15 | data = {} 16 | data_dir = "../data/citeulike-t/" 17 | # variables = scipy.io.loadmat(data_dir + "mult_nor.mat") 18 | # data["content"] = variables['X'] 19 | f = h5py.File('../data/citeulike-t/mult_nor.mat','r') 20 | d = f.get('X') 21 | d = np.array(d).T 22 | print d.shape 23 | data["content"] = d 24 | 25 | data["train_users"] = load_rating(data_dir + "cf-train-10-users.dat") 26 | data["train_items"] = load_rating(data_dir + "cf-train-10-items.dat") 27 | data["test_users"] = load_rating(data_dir + "cf-test-10-users.dat") 28 | data["test_items"] = load_rating(data_dir + "cf-test-10-items.dat") 29 | 30 | return data 31 | 32 | def load_rating(path): 33 | arr = [] 34 | for line in open(path): 35 | a = line.strip().split() 36 | if a[0]==0: 37 | l = [] 38 | else: 39 | l = [int(x) for x in a[1:]] 40 | arr.append(l) 41 | return arr 42 | 43 | params = Params() 44 | params.lambda_u = 0.1 45 | params.lambda_v = 10 46 | params.lambda_r = 1 47 | params.a = 1 48 | params.b = 0.01 49 | params.M = 300 50 | params.n_epochs = 100 51 | params.max_iter = 1 52 | 53 | data = load_cvae_data() 54 | num_factors = 50 55 | model = CVAE(num_users=7947, num_items=25975, num_factors=num_factors, params=params, 56 | input_dim=20000, dims=[200, 100], n_z=num_factors, activations=['sigmoid', 'sigmoid'], 57 | loss_type='cross-entropy', lr=0.001, random_seed=0, print_step=10, verbose=False) 58 | model.load_model(weight_path="model/pretrain") 59 | model.run(data["train_users"], data["train_items"], data["test_users"], data["test_items"], 60 | data["content"], params) 61 | model.save_model(weight_path="model-cf10/cvae", pmf_path="model-cf10/pmf") 62 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/citeulike-t/test_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import h5py 3 | sys.path.append("..") 4 | from lib.cvae import * 5 | import numpy as np 6 | import tensorflow as tf 7 | import scipy.io 8 | from lib.utils import * 9 | 10 | np.random.seed(0) 11 | tf.set_random_seed(0) 12 | init_logging("cvae.log") 13 | 14 | def load_cvae_data(): 15 | data = {} 16 | data_dir = "../data/citeulike-t/" 17 | # variables = scipy.io.loadmat(data_dir + "mult_nor.mat") 18 | # data["content"] = variables['X'] 19 | f = h5py.File('../data/citeulike-t/mult_nor.mat','r') 20 | d = f.get('X') 21 | d = np.array(d).T 22 | print d.shape 23 | data["content"] = d 24 | 25 | data["train_users"] = load_rating(data_dir + "cf-train-1-users.dat") 26 | data["train_items"] = load_rating(data_dir + "cf-train-1-items.dat") 27 | data["test_users"] = load_rating(data_dir + "cf-test-1-users.dat") 28 | data["test_items"] = load_rating(data_dir + "cf-test-1-items.dat") 29 | 30 | return data 31 | 32 | def load_rating(path): 33 | arr = [] 34 | for line in open(path): 35 | a = line.strip().split() 36 | if a[0]==0: 37 | l = [] 38 | else: 39 | l = [int(x) for x in a[1:]] 40 | arr.append(l) 41 | return arr 42 | 43 | params = Params() 44 | params.lambda_u = 0.1 45 | params.lambda_v = 10 46 | params.lambda_r = 1 47 | params.a = 1 48 | params.b = 0.01 49 | params.M = 300 50 | params.n_epochs = 100 51 | params.max_iter = 1 52 | 53 | data = load_cvae_data() 54 | num_factors = 50 55 | model = CVAE(num_users=7947, num_items=25975, num_factors=num_factors, params=params, 56 | input_dim=20000, dims=[200, 100], n_z=num_factors, activations=['sigmoid', 'sigmoid'], 57 | loss_type='cross-entropy', lr=0.001, random_seed=0, print_step=10, verbose=False) 58 | model.load_model(weight_path="model/pretrain") 59 | model.run(data["train_users"], data["train_items"], data["test_users"], data["test_items"], 60 | data["content"], params) 61 | model.save_model(weight_path="model/cvae", pmf_path="model/pmf") 62 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/citeulike-t/test_vae.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import scipy.io 4 | import logging 5 | import sys 6 | import h5py 7 | sys.path.append("..") 8 | from lib.vae import VariationalAutoEncoder 9 | from lib.utils import * 10 | 11 | np.random.seed(0) 12 | tf.set_random_seed(0) 13 | init_logging("vae.log") 14 | 15 | logging.info('loading data') 16 | # variables = scipy.io.loadmat("../data/citeulike-t/mult_nor.mat") 17 | # data = variables['X'] 18 | f = h5py.File('../data/citeulike-t/mult_nor.mat','r') 19 | data = f.get('X') 20 | data = np.array(data).T 21 | print data.shape 22 | 23 | idx = np.random.rand(data.shape[0]) < 0.8 24 | train_X = data[idx] 25 | test_X = data[~idx] 26 | logging.info('initializing sdae model') 27 | model = VariationalAutoEncoder(input_dim=20000, dims=[200, 100], z_dim=50, 28 | activations=['sigmoid','sigmoid'], epoch=[50, 50], 29 | noise='mask-0.3' ,loss='cross-entropy', lr=0.01, batch_size=128, print_step=1) 30 | logging.info('fitting data starts...') 31 | model.fit(train_X, test_X) 32 | # feat = model.transform(data) 33 | # scipy.io.savemat('feat-dae.mat',{'feat': feat}) 34 | # np.savez("sdae-weights.npz", en_weights=model.weights, en_biases=model.biases, 35 | # de_weights=model.de_weights, de_biases=model.de_biases) 36 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/image/recall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/image/recall.png -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/test.m: -------------------------------------------------------------------------------- 1 | M = 300; 2 | m_num_users = 5551; 3 | m_num_items = 16980; 4 | 5 | train_users = cell(m_num_users,1); 6 | fid=fopen('data/cf-train-1-users.dat','r'); % user train file 7 | for i=1:m_num_users 8 | tline = fgetl(fid); 9 | if ~ischar(tline), break, end 10 | liked = str2num(tline); 11 | liked(2:end) = liked(2:end)+1; 12 | train_users{i} = liked; 13 | end 14 | fclose(fid); 15 | 16 | test_users = cell(m_num_users,1); 17 | fid=fopen('data/cf-test-1-users.dat','r'); % user test file 18 | for i=1:m_num_users 19 | tline = fgetl(fid); 20 | if ~ischar(tline), break, end 21 | liked = str2num(tline); 22 | liked(2:end) = liked(2:end)+1; 23 | test_users{i} = liked; 24 | end 25 | fclose(fid); 26 | 27 | x = 50:50:M; 28 | % 'cvae.mat' is the "pmf.mat" file saved by the model 29 | %S = load('cvae.mat'); 30 | S = load('pmf.mat'); 31 | m_U = S.m_U; 32 | m_V = S.m_V; 33 | [recall_cvae, ~] = evaluate(train_users, test_users, m_U, m_V, M); 34 | recall_cvae = recall_cvae(x); -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/test_cvae.py: -------------------------------------------------------------------------------- 1 | from Conferences.KDD.CollaborativeVAE_github.lib.cvae import * 2 | import numpy as np 3 | import tensorflow as tf 4 | import scipy.io 5 | from Conferences.KDD.CollaborativeVAE_github.lib.utils import * 6 | 7 | np.random.seed(0) 8 | tf.set_random_seed(0) 9 | init_logging("cvae.log") 10 | 11 | def load_cvae_data(): 12 | data = {} 13 | data_dir = "./data/citeulike-a/" 14 | variables = scipy.io.loadmat(data_dir + "mult_nor.mat") 15 | data["content"] = variables['X'] 16 | 17 | data["train_users"] = load_rating(data_dir + "cf-train-1-users.dat") 18 | data["train_items"] = load_rating(data_dir + "cf-train-1-items.dat") 19 | data["test_users"] = load_rating(data_dir + "cf-test-1-users.dat") 20 | data["test_items"] = load_rating(data_dir + "cf-test-1-items.dat") 21 | 22 | return data 23 | 24 | def load_rating(path): 25 | arr = [] 26 | for line in open(path): 27 | a = line.strip().split() 28 | if a[0]==0: 29 | l = [] 30 | else: 31 | l = [int(x) for x in a[1:]] 32 | arr.append(l) 33 | return arr 34 | 35 | params = Params() 36 | params.lambda_u = 0.1 37 | params.lambda_v = 10 38 | params.lambda_r = 1 39 | params.a = 1 40 | params.b = 0.01 41 | params.M = 300 42 | params.n_epochs = 100 43 | params.max_iter = 1 44 | 45 | data = load_cvae_data() 46 | num_factors = 50 47 | model = CVAE(num_users=5551, num_items=16980, num_factors=num_factors, params=params, 48 | input_dim=8000, dims=[200, 100], n_z=num_factors, activations=['sigmoid', 'sigmoid'], 49 | loss_type='cross-entropy', lr=0.001, random_seed=0, print_step=10, verbose=False) 50 | model.load_model(weight_path="model/pretrain") 51 | model.run(data["train_users"], data["train_items"], data["test_users"], data["test_items"], 52 | data["content"], params) 53 | model.save_model(weight_path="model/cvae", pmf_path="model/pmf") 54 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/CollaborativeVAE_github/test_vae.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import scipy.io 4 | import logging 5 | from Conferences.KDD.CollaborativeVAE_github.lib.vae import VariationalAutoEncoder 6 | from Conferences.KDD.CollaborativeVAE_github.lib.utils import * 7 | 8 | np.random.seed(0) 9 | tf.set_random_seed(0) 10 | init_logging("vae.log") 11 | 12 | logging.info('loading data') 13 | variables = scipy.io.loadmat("./data/citeulike-a/mult_nor.mat") 14 | data = variables['X'] 15 | idx = np.random.rand(data.shape[0]) < 0.8 16 | train_X = data[idx] 17 | test_X = data[~idx] 18 | logging.info('initializing sdae model') 19 | 20 | model = VariationalAutoEncoder(input_dim=8000, dims=[200, 100], z_dim=50, 21 | activations=['sigmoid','sigmoid'], epoch=[50, 50], 22 | noise='mask-0.3' ,loss='cross-entropy', lr=0.01, batch_size=128, print_step=1) 23 | 24 | logging.info('fitting data starts...') 25 | model.fit(train_X, test_X) 26 | 27 | # feat = model.transform(data) 28 | # scipy.io.savemat('feat-dae.mat',{'feat': feat}) 29 | # np.savez("sdae-weights.npz", en_weights=model.weights, en_biases=model.biases, 30 | # de_weights=model.de_weights, de_biases=model.de_biases) 31 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/KDD/MCRec_github/README.md: -------------------------------------------------------------------------------- 1 | # MCRec 2 | Source code for KDD 2018 paper "Leverage Meta-path based Context for Top-N Recommendation with a Neural Co-Attention Model" 3 | 4 | # Requirements 5 | 6 | * numpy 7 | 8 | * scipy 9 | 10 | * Tensorflow-gpu (1.2.1) or Theano (1.0.1) 11 | 12 | * Keras (2.1.1) 13 | 14 | * My machine with two GPUs (NVIDIA GTX-1080 *2) and two CPUs (Intel Xeon E5-2690 * 2) 15 | 16 | # Reference 17 | 18 | @inproceedings{ 19 | 20 | > author = {Binbin Hu, Chuan Shi, Wayne Xin Zhao and Philip S. Yu.}, 21 | 22 | > title = {Leverage Meta-path based Context for Top-N Recommendation with a Neural Co-Attention Model}, 23 | 24 | > booktitle = {Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, 25 | 26 | > year = {2018}, 27 | 28 | > url = {NULL}, 29 | 30 | > publisher = {ACM}, 31 | 32 | > address = {New York, NY, USA}, 33 | 34 | > keywords = {Recommender System, Heterogeneous Information Network, Deep Learning, Attention Mechanism}, 35 | 36 | } 37 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/RecSys/SpectralCF_github/README.md: -------------------------------------------------------------------------------- 1 | # SpectralCF 2 | ## This repo hosts code for paper "Spectral Collaborative Filtering" (RecSys2018) 3 | #### Requirements: 4 | ##### Python 3.5 5 | ##### TensorFlow 1.4.0-rc0 6 | ##### NumPy 1.14.0 7 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/RecSys/SpectralCF_github/main.py: -------------------------------------------------------------------------------- 1 | from Conferences.RecSys.SpectralCF_github.SpectralCF import SpectralCF 2 | from Conferences.RecSys.SpectralCF_github.test import * 3 | 4 | import tensorflow as tf 5 | 6 | def main(): 7 | 8 | print("Instantiating model...") 9 | 10 | model = SpectralCF(K=K, graph=data_generator.R, n_users=USER_NUM, n_items=ITEM_NUM, emb_dim=EMB_DIM, 11 | lr=LR, decay=DECAY, batch_size=BATCH_SIZE,DIR=DIR) 12 | 13 | print("Instantiating model... done!") 14 | print(model.model_name) 15 | 16 | config = tf.ConfigProto() 17 | config.gpu_options.allow_growth = True 18 | sess = tf.Session(config=config) 19 | sess.run(tf.global_variables_initializer()) 20 | 21 | print("Training model... ") 22 | 23 | for epoch in range(N_EPOCH): 24 | users, pos_items, neg_items = data_generator.sample() 25 | _, loss = sess.run([model.updates, model.loss], 26 | feed_dict={model.users: users, model.pos_items: pos_items, 27 | model.neg_items: neg_items}) 28 | 29 | users_to_test = list(data_generator.test_set.keys()) 30 | 31 | ret = test(sess, model, users_to_test) 32 | 33 | 34 | print('Epoch %d training loss %f' % (epoch, loss)) 35 | print('recall_20 %f recall_40 %f recall_60 %f recall_80 %f recall_100 %f' 36 | % (ret[0],ret[1],ret[2],ret[3],ret[4])) 37 | print('map_20 %f map_40 %f map_60 %f map_80 %f map_100 %f' 38 | % (ret[5], ret[6], ret[7], ret[8], ret[9])) 39 | 40 | print("Training model... done!") 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/RecSys/SpectralCF_github/params.py: -------------------------------------------------------------------------------- 1 | MODEL = 'SpectralCF' 2 | DATASET = 'ml-1m' 3 | 4 | EMB_DIM = 16 5 | BATCH_SIZE = 1024 6 | DECAY = 0.001 7 | LAMDA = 1 8 | K = 3 9 | N_EPOCH = 200 10 | LR = 0.001 11 | DROPOUT = 0.0 12 | 13 | DIR = './data/'+DATASET+'/' -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/SIGIR/CMN_github/README.md: -------------------------------------------------------------------------------- 1 | # Collaborative Memory Network for Recommendation Systems 2 | Implementation for 3 | 4 | Travis Ebesu, Bin Shen, Yi Fang. Collaborative Memory Network for Recommendation Systems. In Proceedings of the 41st International ACM SIGIR Conference on Research and Development in Information Retrieval, 2018. 5 | 6 | https://arxiv.org/pdf/1804.10862.pdf 7 | 8 | Bibtex 9 | ``` 10 | @inproceedings{Ebesu:2018:CMN:3209978.3209991, 11 | author = {Ebesu, Travis and Shen, Bin and Fang, Yi}, 12 | title = {Collaborative Memory Network for Recommendation Systems}, 13 | booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}, 14 | series = {SIGIR '18}, 15 | year = {2018}, 16 | isbn = {978-1-4503-5657-2}, 17 | location = {Ann Arbor, MI, USA}, 18 | pages = {515--524}, 19 | numpages = {10}, 20 | url = {http://doi.acm.org/10.1145/3209978.3209991}, 21 | doi = {10.1145/3209978.3209991}, 22 | acmid = {3209991}, 23 | publisher = {ACM}, 24 | address = {New York, NY, USA}, 25 | keywords = {collaborative filtering, deep learning, memory networks}, 26 | } 27 | ``` 28 | 29 | Running Collaborative Memory Network 30 | ``` 31 | python train.py --gpu 0 --dataset data/citeulike-a.npz --pretrain pretrain/citeulike-a_e50.npz 32 | ``` 33 | 34 | 35 | To pretrain the model for initialization 36 | ``` 37 | python pretrain.py --gpu 0 --dataset data/citeulike-a.npz --output pretrain/citeulike-a_e50.npz 38 | ``` 39 | 40 | 41 | **Requirements** 42 | * Python 3.6 43 | * TensorFlow 1.4+ 44 | * dm-sonnet 45 | 46 | 47 | ## Data Format 48 | The structure of the data in the npz file is as follows: 49 | 50 | ``` 51 | train_data = [[user id, item id], ...] 52 | test_data = {userid: (pos_id, [neg_id1, neg_id2, ...]), ...} 53 | ``` 54 | 55 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Conferences/WWW/MultiVAE_github/README.md: -------------------------------------------------------------------------------- 1 | # Variational autoencoders for collaborative filtering 2 | 3 | This notebook accompanies the paper "[Variational autoencoders for collaborative filtering](https://arxiv.org/abs/1802.05814)" by Dawen Liang, Rahul G. Krishnan, Matthew D. Hoffman, and Tony Jebara, in The Web Conference (aka WWW) 2018. 4 | 5 | In this notebook, we show a complete self-contained example of training a variational autoencoder (as well as a denoising autoencoder) with multinomial likelihood (described in the paper) on the public Movielens-20M dataset, including both data preprocessing and model training. 6 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/CythonCompiler/compile_script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 16/07/2017 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | """ 10 | This script is called in a subprocess and compiles the cython source file provided 11 | 12 | python compile_script.py filename.pyx build_ext --inplace 13 | """ 14 | 15 | 16 | try: 17 | from setuptools import setup 18 | from setuptools import Extension 19 | except ImportError: 20 | from distutils.core import setup 21 | from distutils.extension import Extension 22 | 23 | 24 | from Cython.Distutils import build_ext 25 | import numpy, sys, re 26 | 27 | 28 | if len(sys.argv) != 4: 29 | raise ValueError("Wrong number of parameters received. Expected 4, got {}".format(sys.argv)) 30 | 31 | # Get the name of the file to compile 32 | fileToCompile = sys.argv[1] 33 | 34 | # Remove the argument from sys argv in order for it to contain only what setup needs 35 | del sys.argv[1] 36 | 37 | extensionName = re.sub("\.pyx", "", fileToCompile) 38 | 39 | 40 | ext_modules = Extension(extensionName, 41 | [fileToCompile], 42 | extra_compile_args=['-O2'], 43 | include_dirs=[numpy.get_include(),], 44 | ) 45 | 46 | setup( 47 | cmdclass={'build_ext': build_ext}, 48 | ext_modules=[ext_modules] 49 | ) 50 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/DATASET_SPLITS.md: -------------------------------------------------------------------------------- 1 | # Overview of Datasets on Google Cloud 2 | 3 | All splits live in `gs:/reczilla-results/dataset-splits/`. Each sub-directory here contains a different set of splits. 4 | 5 | ## `gs:/reczilla-results/dataset-splits/splits-v3` 6 | 7 | All but one of the directories here contain a single split, created using a random leave-one-out split (one random interaction is reserved for validation, for each dataset). The one exception is the directory `reczilla-results/dataset-splits/splits-v3/AmazonReviewData`, which contains a different sub-directory for each of the amazon splits. 8 | 9 | **Note:** all splits here are in the `DataSplitter_leave_k_out` folders but are created using the `leave_k_out_random` logic (see `-v5` below). 10 | 11 | ## `gs:/reczilla-results/dataset-splits/splits-v5` 12 | 13 | This directory contains one sub-directory for each dataset, including all amazon splits (unlike the `-v3` folder). Each dataset sub-dir contains two sub-dirs, each for a different split: 14 | - `DataSplitter_leave_k_out_last`: leave-last-out: the last interaction from each user is placed in the test set, and all remaining interactions are used for training. 15 | - `DataSplitter_leave_k_out_random`: leave-one-out: one randomly-selected interaction is placed in the test set for each user. **Note:** this is the same split present in `splits-v3`. 16 | 17 | ## `gs:/reczilla-results/dataset-splits/splits-global-timestamp` 18 | 19 | This directory stores global timestamp splits. The splitting is done on the 60th and 80th percentile timestamp to get the train/val/test splits. 20 | 21 | ## Ignore these sub-directories 22 | - `gs:/reczilla-results/dataset-splits/splits-v1` 23 | - `gs:/reczilla-results/dataset-splits/splits-v2` 24 | - `gs:/reczilla-results/dataset-splits/splits-v4` 25 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/DL_Evaluation_TOIS_Additional_material.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/DL_Evaluation_TOIS_Additional_material.pdf -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAllBeautyReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAllBeautyReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_All_Beauty.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_All_Beauty.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAllBeauty/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_All_Beauty.json.gz", 37 | decompressed_file_name = "meta_All_Beauty.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_All_Beauty.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAllCreditCardsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAllCreditCardsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_All_Credit_Cards.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_All_Credit_Cards.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAllCreditCards/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_All_Credit_Cards.json.gz", 37 | decompressed_file_name = "meta_All_Credit_Cards.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_All_Credit_Cards.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAllElectronicsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAllElectronicsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_All_Electronics.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_All_Electronics.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAllElectronics/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_All_Electronics.json.gz", 37 | decompressed_file_name = "meta_All_Electronics.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_All_Electronics.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAlternativeRockReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAlternativeRockReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Alternative_Rock.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Alternative_Rock.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAlternativeRock/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Alternative_Rock.json.gz", 37 | decompressed_file_name = "meta_Alternative_Rock.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Alternative_Rock.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAmazonCoinsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAmazonCoinsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Amazon_Coins.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Amazon_Coins.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAmazonCoins/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Amazon_Coins.json.gz", 37 | decompressed_file_name = "meta_Amazon_Coins.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Amazon_Coins.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAmazonFashionReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAmazonFashionReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Amazon_Fashion.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Amazon_Fashion.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAmazonFashion/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Amazon_Fashion.json.gz", 37 | decompressed_file_name = "meta_Amazon_Fashion.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Amazon_Fashion.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAmazonFireTVReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAmazonFireTVReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Amazon_Fire_TV.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Amazon_Fire_TV.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAmazonFireTV/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Amazon_Fire_TV.json.gz", 37 | decompressed_file_name = "meta_Amazon_Fire_TV.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Amazon_Fire_TV.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAppliancesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAppliancesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Appliances.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Appliances.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAppliances/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Appliances.json.gz", 37 | decompressed_file_name = "meta_Appliances.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Appliances.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAppsforAndroidReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAppsforAndroidReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Apps_for_Android.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Apps_for_Android.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAppsforAndroid/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Apps_for_Android.json.gz", 37 | decompressed_file_name = "meta_Apps_for_Android.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Apps_for_Android.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonAutomotiveReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonAutomotiveReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Automotive.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Automotive.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonAutomotive/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Automotive.json.gz", 37 | decompressed_file_name = "meta_Automotive.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Automotive.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBabyProductsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBabyProductsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Baby_Products.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Baby_Products.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBabyProducts/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Baby_Products.json.gz", 37 | decompressed_file_name = "meta_Baby_Products.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Baby_Products.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBabyReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBabyReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Baby.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Baby.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBaby/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Baby.json.gz", 37 | decompressed_file_name = "meta_Baby.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Baby.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBeautyReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBeautyReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Beauty.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Beauty.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBeauty/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Beauty.json.gz", 37 | decompressed_file_name = "meta_Beauty.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Beauty.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBluesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBluesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Blues.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Blues.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBlues/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Blues.json.gz", 37 | decompressed_file_name = "meta_Blues.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Blues.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBooksReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBooksReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Books.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBooks/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Books.json.gz", 37 | decompressed_file_name = "meta_Books.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Books.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonBuyaKindleReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonBuyaKindleReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Buy_a_Kindle.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Buy_a_Kindle.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonBuyaKindle/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Buy_a_Kindle.json.gz", 37 | decompressed_file_name = "meta_Buy_a_Kindle.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Buy_a_Kindle.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonCDsVinylReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonCDsVinylReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_CDs_and_Vinyl.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_CDs_and_Vinyl.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonCDsVinyl/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_CDs_and_Vinyl.json.gz", 37 | decompressed_file_name = "meta_CDs_and_Vinyl.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_CDs_and_Vinyl.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonCameraPhotoReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonCameraPhotoReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Camera_and_Photo.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Camera_and_Photo.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonCameraPhoto/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Camera_and_Photo.json.gz", 37 | decompressed_file_name = "meta_Camera_and_Photo.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Camera_and_Photo.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonCarElectronicsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonCarElectronicsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Car_Electronics.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Car_Electronics.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonCarElectronics/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Car_Electronics.json.gz", 37 | decompressed_file_name = "meta_Car_Electronics.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Car_Electronics.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonChildrensMusicReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonChildrensMusicReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Children's_Music.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Children's_Music.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonChildrensMusic/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Children's_Music.json.gz", 37 | decompressed_file_name = "meta_Children's_Music.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Children's_Music.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonChristianReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonChristianReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Christian.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Christian.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonChristian/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Christian.json.gz", 37 | decompressed_file_name = "meta_Christian.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Christian.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonClassicRockReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonClassicRockReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Classic_Rock.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Classic_Rock.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonClassicRock/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Classic_Rock.json.gz", 37 | decompressed_file_name = "meta_Classic_Rock.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Classic_Rock.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonClassicalReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonClassicalReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Classical.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Classical.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonClassical/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Classical.json.gz", 37 | decompressed_file_name = "meta_Classical.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Classical.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonCollectibleCoinsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonCollectibleCoinsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Collectible_Coins.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Collectible_Coins.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonCollectibleCoins/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Collectible_Coins.json.gz", 37 | decompressed_file_name = "meta_Collectible_Coins.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Collectible_Coins.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonComputersReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonComputersReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Computers.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Computers.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonComputers/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Computers.json.gz", 37 | decompressed_file_name = "meta_Computers.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Computers.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonCountryReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonCountryReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Country.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Country.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonCountry/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Country.json.gz", 37 | decompressed_file_name = "meta_Country.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Country.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonDavisReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonDavisReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Davis.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Davis.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonDavis/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Davis.json.gz", 37 | decompressed_file_name = "meta_Davis.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Davis.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonDigitalMusicReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonDigitalMusicReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Digital_Music.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Digital_Music.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonDigitalMusic/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Digital_Music.json.gz", 37 | decompressed_file_name = "meta_Digital_Music.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Digital_Music.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonElectronicsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonElectronicsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Electronics.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonElectronics/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Electronics.json.gz", 37 | decompressed_file_name = "meta_Electronics.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Electronics.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonEntertainmentReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonEntertainmentReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Entertainment.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Entertainment.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonEntertainment/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Entertainment.json.gz", 37 | decompressed_file_name = "meta_Entertainment.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Entertainment.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonFolkReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonFolkReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Folk.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Folk.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonFolk/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Folk.json.gz", 37 | decompressed_file_name = "meta_Folk.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Folk.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonGPSNavigationReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonGPSNavigationReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_GPS_and_Navigation.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_GPS_and_Navigation.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonGPSNavigation/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_GPS_and_Navigation.json.gz", 37 | decompressed_file_name = "meta_GPS_and_Navigation.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_GPS_and_Navigation.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonGiftCardsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonGiftCardsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Gift_Cards.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Gift_Cards.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonGiftCards/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Gift_Cards.json.gz", 37 | decompressed_file_name = "meta_Gift_Cards.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Gift_Cards.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonGiftCardsStoreReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonGiftCardsStoreReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Gift_Cards_Store.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Gift_Cards_Store.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonGiftCardsStore/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Gift_Cards_Store.json.gz", 37 | decompressed_file_name = "meta_Gift_Cards_Store.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Gift_Cards_Store.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonGospelReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonGospelReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Gospel.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Gospel.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonGospel/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Gospel.json.gz", 37 | decompressed_file_name = "meta_Gospel.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Gospel.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonHardRockMetalReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonHardRockMetalReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Hard_Rock_and_Metal.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Hard_Rock_and_Metal.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonHardRockMetal/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Hard_Rock_and_Metal.json.gz", 37 | decompressed_file_name = "meta_Hard_Rock_and_Metal.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Hard_Rock_and_Metal.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonHomeImprovementReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonHomeImprovementReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Home_Improvement.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Home_Improvement.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonHomeImprovement/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Home_Improvement.json.gz", 37 | decompressed_file_name = "meta_Home_Improvement.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Home_Improvement.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonHomeKitchenReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonHomeKitchenReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Home_and_Kitchen.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Home_and_Kitchen.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonHomeKitchen/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Home_and_Kitchen.json.gz", 37 | decompressed_file_name = "meta_Home_and_Kitchen.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Home_and_Kitchen.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonInternationalReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonInternationalReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_International.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_International.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonInternational/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_International.json.gz", 37 | decompressed_file_name = "meta_International.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_International.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonJazzReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonJazzReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Jazz.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Jazz.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonJazz/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Jazz.json.gz", 37 | decompressed_file_name = "meta_Jazz.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Jazz.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonKindleStoreReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonKindleStoreReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Kindle_Store.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Kindle_Store.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonKindleStore/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Kindle_Store.json.gz", 37 | decompressed_file_name = "meta_Kindle_Store.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Kindle_Store.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonKitchenDiningReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonKitchenDiningReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Kitchen_and_Dining.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Kitchen_and_Dining.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonKitchenDining/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Kitchen_and_Dining.json.gz", 37 | decompressed_file_name = "meta_Kitchen_and_Dining.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Kitchen_and_Dining.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonLatinMusicReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonLatinMusicReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Latin_Music.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Latin_Music.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonLatinMusic/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Latin_Music.json.gz", 37 | decompressed_file_name = "meta_Latin_Music.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Latin_Music.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonLuxuryBeautyReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonLuxuryBeautyReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Luxury_Beauty.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Luxury_Beauty.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonLuxuryBeauty/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Luxury_Beauty.json.gz", 37 | decompressed_file_name = "meta_Luxury_Beauty.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Luxury_Beauty.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonMicrosoftReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonMicrosoftReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Microsoft.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Microsoft.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonMicrosoft/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Microsoft.json.gz", 37 | decompressed_file_name = "meta_Microsoft.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Microsoft.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonMiscellaneousReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonMiscellaneousReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Miscellaneous.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Miscellaneous.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonMiscellaneous/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Miscellaneous.json.gz", 37 | decompressed_file_name = "meta_Miscellaneous.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Miscellaneous.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonMoviesTVReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonMoviesTVReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Movies_and_TV.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Movies_and_TV.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonMoviesTV/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Movies_and_TV.json.gz", 37 | decompressed_file_name = "meta_Movies_and_TV.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Movies_and_TV.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonNewAgeReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonNewAgeReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_New_Age.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_New_Age.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonNewAge/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_New_Age.json.gz", 37 | decompressed_file_name = "meta_New_Age.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_New_Age.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonNickelodeonReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonNickelodeonReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Nickelodeon.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Nickelodeon.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonNickelodeon/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Nickelodeon.json.gz", 37 | decompressed_file_name = "meta_Nickelodeon.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Nickelodeon.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonOfficeProductsReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonOfficeProductsReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Office_Products.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Office_Products.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonOfficeProducts/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Office_Products.json.gz", 37 | decompressed_file_name = "meta_Office_Products.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Office_Products.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonPetSuppliesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonPetSuppliesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Pet_Supplies.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Pet_Supplies.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonPetSupplies/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Pet_Supplies.json.gz", 37 | decompressed_file_name = "meta_Pet_Supplies.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Pet_Supplies.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonPopReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonPopReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Pop.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Pop.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonPop/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Pop.json.gz", 37 | decompressed_file_name = "meta_Pop.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Pop.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonPublishersReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonPublishersReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Publishers.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Publishers.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonPublishers/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Publishers.json.gz", 37 | decompressed_file_name = "meta_Publishers.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Publishers.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonPurchaseCirclesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonPurchaseCirclesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Purchase_Circles.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Purchase_Circles.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonPurchaseCircles/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Purchase_Circles.json.gz", 37 | decompressed_file_name = "meta_Purchase_Circles.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Purchase_Circles.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonRBReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonRBReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_RandB.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_RandB.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonRB/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_RandB.json.gz", 37 | decompressed_file_name = "meta_RandB.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_RandB.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonRapHipHopReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonRapHipHopReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Rap_and_Hip-Hop.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Rap_and_Hip-Hop.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonRapHipHop/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Rap_and_Hip-Hop.json.gz", 37 | decompressed_file_name = "meta_Rap_and_Hip-Hop.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Rap_and_Hip-Hop.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonRockReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonRockReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Rock.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Rock.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonRock/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Rock.json.gz", 37 | decompressed_file_name = "meta_Rock.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Rock.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonSoftwareReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonSoftwareReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Software.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Software.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonSoftware/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Software.json.gz", 37 | decompressed_file_name = "meta_Software.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Software.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonToysGamesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonToysGamesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Toys_and_Games.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Toys_and_Games.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonToysGames/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Toys_and_Games.json.gz", 37 | decompressed_file_name = "meta_Toys_and_Games.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Toys_and_Games.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonVideoGamesReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonVideoGamesReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Video_Games.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Video_Games.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonVideoGames/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Video_Games.json.gz", 37 | decompressed_file_name = "meta_Video_Games.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Video_Games.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/AmazonReviewData/AmazonWineReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: Sujay Khandagale 6 | """ 7 | 8 | 9 | 10 | from Data_manager.AmazonReviewData._AmazonReviewDataReader import _AmazonReviewDataReader 11 | 12 | 13 | class AmazonWineReader(_AmazonReviewDataReader): 14 | 15 | DATASET_URL_RATING = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Wine.csv" 16 | DATASET_URL_METADATA = "http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Wine.json.gz" 17 | 18 | DATASET_SUBFOLDER = "AmazonReviewData/AmazonWine/" 19 | AVAILABLE_ICM = ["ICM_metadata"] 20 | 21 | 22 | def _get_dataset_name_root(self): 23 | return self.DATASET_SUBFOLDER 24 | 25 | 26 | def _load_from_original_file(self): 27 | 28 | # Load data from original 29 | 30 | self._print("Loading original data") 31 | 32 | dataset_split_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER 33 | 34 | 35 | metadata_path = self._get_ICM_metadata_path(data_folder = dataset_split_folder, 36 | compressed_file_name = "meta_Wine.json.gz", 37 | decompressed_file_name = "meta_Wine.json", 38 | file_url = self.DATASET_URL_METADATA) 39 | 40 | 41 | URM_path = self._get_URM_review_path(data_folder = dataset_split_folder, 42 | file_name = "ratings_Wine.csv", 43 | file_url = self.DATASET_URL_RATING) 44 | 45 | 46 | loaded_dataset = self._load_from_original_file_all_amazon_datasets(URM_path, 47 | metadata_path = metadata_path, 48 | reviews_path = None) 49 | 50 | return loaded_dataset 51 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/TagPreprocessing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 09/01/18 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | 10 | import re 11 | from nltk.stem import PorterStemmer 12 | 13 | import nltk 14 | nltk.download('stopwords') 15 | 16 | 17 | from nltk.corpus import stopwords 18 | 19 | 20 | def tagFilter(originalTag): 21 | 22 | # Remove non alphabetical character and split on spaces 23 | processedTag = re.sub("[^a-zA-Z0-9]", " ", originalTag) 24 | processedTag = re.sub(" +", " ", processedTag) 25 | 26 | processedTag = processedTag.split(" ") 27 | 28 | stopwords_set = set(stopwords.words('english')) 29 | 30 | result = [] 31 | 32 | for tag in processedTag: 33 | 34 | if tag not in stopwords_set: 35 | result.append(tag) 36 | 37 | return result 38 | 39 | 40 | 41 | 42 | def tagFilterAndStemming(originalTag): 43 | 44 | # Remove non alphabetical character and split on spaces 45 | processedTag = re.sub("[^a-zA-Z0-9]", " ", originalTag) 46 | processedTag = re.sub(" +", " ", processedTag) 47 | 48 | processedTag = processedTag.split(" ") 49 | 50 | stopwords_set = set(stopwords.words('english')) 51 | 52 | stemmer = PorterStemmer() 53 | 54 | result = [] 55 | 56 | for tag in processedTag: 57 | 58 | tag_stemmed = stemmer.stem(tag) 59 | 60 | if tag_stemmed not in stopwords_set: 61 | result.append(tag_stemmed) 62 | 63 | return result -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Data_manager/load_and_save_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 20/02/19 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from Base.DataIO import DataIO 10 | 11 | 12 | 13 | def save_data_dict_zip(URM_DICT, ICM_DICT, splitted_data_path, file_name_prefix): 14 | 15 | dataIO = DataIO(folder_path = splitted_data_path) 16 | 17 | URM_DICT["__ICM_available"] = len(ICM_DICT)>0 18 | 19 | dataIO.save_data(data_dict_to_save = URM_DICT, file_name=file_name_prefix + "URM_dict") 20 | 21 | del URM_DICT["__ICM_available"] 22 | 23 | if len(ICM_DICT)>0: 24 | dataIO.save_data(data_dict_to_save = ICM_DICT, file_name=file_name_prefix + "ICM_dict") 25 | 26 | 27 | 28 | 29 | 30 | 31 | def load_data_dict_zip(splitted_data_path, file_name_prefix): 32 | 33 | URM_DICT = {} 34 | ICM_DICT = {} 35 | 36 | 37 | dataIO = DataIO(folder_path = splitted_data_path) 38 | 39 | URM_DICT = dataIO.load_data(file_name=file_name_prefix + "URM_dict") 40 | 41 | if URM_DICT["__ICM_available"]: 42 | ICM_DICT = dataIO.load_data(file_name=file_name_prefix + "ICM_dict") 43 | 44 | 45 | del URM_DICT["__ICM_available"] 46 | 47 | 48 | loaded_data_dict = { 49 | "URM_DICT": URM_DICT, 50 | "ICM_DICT": ICM_DICT, 51 | } 52 | 53 | 54 | return loaded_data_dict 55 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/KNN/ItemKNNCBFRecommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 23/10/17 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from Base.Recommender_utils import check_matrix 10 | from Base.BaseCBFRecommender import BaseItemCBFRecommender 11 | from Base.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender 12 | from Base.IR_feature_weighting import okapi_BM_25, TF_IDF 13 | import numpy as np 14 | 15 | from Base.Similarity.Compute_Similarity import Compute_Similarity 16 | 17 | 18 | class ItemKNNCBFRecommender(BaseItemCBFRecommender, BaseItemSimilarityMatrixRecommender): 19 | """ ItemKNN recommender""" 20 | 21 | RECOMMENDER_NAME = "ItemKNNCBFRecommender" 22 | 23 | FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] 24 | 25 | def __init__(self, URM_train, ICM_train, verbose = True): 26 | super(ItemKNNCBFRecommender, self).__init__(URM_train, ICM_train, verbose = verbose) 27 | 28 | 29 | 30 | def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting = "none", **similarity_args): 31 | 32 | self.topK = topK 33 | self.shrink = shrink 34 | 35 | if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: 36 | raise ValueError("Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'".format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) 37 | 38 | 39 | if feature_weighting == "BM25": 40 | self.ICM_train = self.ICM_train.astype(np.float32) 41 | self.ICM_train = okapi_BM_25(self.ICM_train) 42 | 43 | elif feature_weighting == "TF-IDF": 44 | self.ICM_train = self.ICM_train.astype(np.float32) 45 | self.ICM_train = TF_IDF(self.ICM_train) 46 | 47 | 48 | similarity = Compute_Similarity(self.ICM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity = similarity, **similarity_args) 49 | 50 | self.W_sparse = similarity.compute_similarity() 51 | self.W_sparse = check_matrix(self.W_sparse, format='csr') 52 | 53 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/KNN/ItemKNNCustomSimilarityRecommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 23/10/17 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | 10 | import numpy as np 11 | from Base.Recommender_utils import check_matrix, similarityMatrixTopK 12 | from Base.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender 13 | 14 | 15 | class ItemKNNCustomSimilarityRecommender(BaseItemSimilarityMatrixRecommender): 16 | """ ItemKNN recommender""" 17 | 18 | RECOMMENDER_NAME = "ItemKNNCustomSimilarityRecommender" 19 | 20 | def fit(self, W_sparse, selectTopK = False, topK=100): 21 | 22 | assert W_sparse.shape[0] == W_sparse.shape[1],\ 23 | "ItemKNNCustomSimilarityRecommender: W_sparse matrice is not square. Current shape is {}".format(W_sparse.shape) 24 | 25 | assert self.URM_train.shape[1] == W_sparse.shape[0],\ 26 | "ItemKNNCustomSimilarityRecommender: URM_train and W_sparse matrices are not consistent. " \ 27 | "The number of columns in URM_train must be equal to the rows in W_sparse. " \ 28 | "Current shapes are: URM_train {}, W_sparse {}".format(self.URM_train.shape, W_sparse.shape) 29 | 30 | if selectTopK: 31 | W_sparse = similarityMatrixTopK(W_sparse, k=topK) 32 | 33 | self.W_sparse = check_matrix(W_sparse, format='csr') -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/KNN/ItemKNN_CFCBF_Hybrid_Recommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 23/10/17 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender 10 | 11 | import scipy.sparse as sps 12 | import numpy as np 13 | 14 | 15 | class ItemKNN_CFCBF_Hybrid_Recommender(ItemKNNCBFRecommender): 16 | """ ItemKNN_CFCBF_Hybrid_Recommender""" 17 | 18 | RECOMMENDER_NAME = "ItemKNN_CFCBF_HybridRecommender" 19 | 20 | def fit(self, ICM_weight = 1.0, **fit_args): 21 | 22 | self.ICM_train = self.ICM_train*ICM_weight 23 | self.ICM_train = sps.hstack([self.ICM_train, self.URM_train.T], format='csr') 24 | 25 | super(ItemKNN_CFCBF_Hybrid_Recommender, self).fit(**fit_args) 26 | 27 | 28 | def _get_cold_item_mask(self): 29 | return np.logical_and(self._cold_item_CBF_mask, self._cold_item_mask) 30 | 31 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/KNN/UserKNNCBFRecommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 13/03/19 5 | 6 | @author: Simone Boglio 7 | """ 8 | 9 | from Base.Recommender_utils import check_matrix 10 | from Base.BaseCBFRecommender import BaseUserCBFRecommender 11 | from Base.BaseSimilarityMatrixRecommender import BaseUserSimilarityMatrixRecommender 12 | from Base.IR_feature_weighting import okapi_BM_25, TF_IDF 13 | import numpy as np 14 | 15 | from Base.Similarity.Compute_Similarity import Compute_Similarity 16 | 17 | 18 | class UserKNNCBFRecommender(BaseUserCBFRecommender, BaseUserSimilarityMatrixRecommender): 19 | """ UserKNN recommender""" 20 | 21 | RECOMMENDER_NAME = "UserKNNCBFRecommender" 22 | 23 | FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] 24 | 25 | def __init__(self, URM_train, UCM_train, verbose = True): 26 | super(UserKNNCBFRecommender, self).__init__(URM_train, UCM_train, verbose = verbose) 27 | 28 | 29 | def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting = "none", **similarity_args): 30 | 31 | self.topK = topK 32 | self.shrink = shrink 33 | 34 | if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: 35 | raise ValueError("Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'".format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) 36 | 37 | 38 | if feature_weighting == "BM25": 39 | self.UCM_train = self.UCM_train.astype(np.float32) 40 | self.UCM_train = okapi_BM_25(self.UCM_train) 41 | 42 | elif feature_weighting == "TF-IDF": 43 | self.UCM_train = self.UCM_train.astype(np.float32) 44 | self.UCM_train = TF_IDF(self.UCM_train) 45 | 46 | 47 | similarity = Compute_Similarity(self.UCM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity = similarity, **similarity_args) 48 | 49 | self.W_sparse = similarity.compute_similarity() 50 | self.W_sparse = check_matrix(self.W_sparse, format='csr') 51 | 52 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/KNN/UserKNN_CFCBF_Hybrid_Recommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 23/10/17 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | from Base.BaseSimilarityMatrixRecommender import BaseSimilarityMatrixRecommender 10 | from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender 11 | 12 | import scipy.sparse as sps 13 | import numpy as np 14 | 15 | 16 | class UserKNN_CFCBF_Hybrid_Recommender(UserKNNCBFRecommender, BaseSimilarityMatrixRecommender): 17 | """ UserKNN_CFCBF_Hybrid_Recommender""" 18 | 19 | RECOMMENDER_NAME = "UserKNN_CFCBF_Hybrid_Recommender" 20 | 21 | def fit(self, UCM_weight = 1.0, **fit_args): 22 | 23 | self.UCM_train = self.UCM_train*UCM_weight 24 | self.UCM_train = sps.hstack([self.UCM_train, self.URM_train], format='csr') 25 | 26 | super(UserKNN_CFCBF_Hybrid_Recommender, self).fit(**fit_args) 27 | 28 | 29 | def _get_cold_user_mask(self): 30 | return np.logical_and(self._cold_user_CBF_mask, self._cold_user_mask) -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Metafeatures/Basic.py: -------------------------------------------------------------------------------- 1 | from Metafeatures.utils import register_func 2 | from collections import OrderedDict 3 | 4 | feature_func_lookup = {} 5 | 6 | feature_list = [ 7 | ("num_users", 8 | OrderedDict()), 9 | ("num_items", 10 | OrderedDict()), 11 | ("num_interactions", 12 | OrderedDict()), 13 | ("sparsity", 14 | OrderedDict()), 15 | ("item_user_ratio", 16 | OrderedDict()) 17 | ] 18 | 19 | # Number of users 20 | @register_func(feature_func_lookup) 21 | def num_users(train_set): 22 | """ 23 | Number of users 24 | Args: 25 | train_set: train set as URM. 26 | 27 | Returns: 28 | Dictionary with the result 29 | """ 30 | return {"": train_set.shape[0]} 31 | 32 | # Number of items 33 | @register_func(feature_func_lookup) 34 | def num_items(train_set): 35 | """ 36 | Number of items 37 | Args: 38 | train_set: train set as URM. 39 | 40 | Returns: 41 | Dictionary with the result 42 | """ 43 | return {"": train_set.shape[1]} 44 | 45 | 46 | @register_func(feature_func_lookup) 47 | def num_interactions(train_set): 48 | """ 49 | Number of interactions 50 | Args: 51 | train_set: train set as URM. 52 | 53 | Returns: 54 | Dictionary with the result 55 | """ 56 | return {"": train_set.nnz} 57 | 58 | @register_func(feature_func_lookup) 59 | def sparsity(train_set): 60 | """ 61 | Sparsity 62 | Args: 63 | train_set: train set as URM. 64 | 65 | Returns: 66 | Dictionary with the result 67 | """ 68 | return {"": 1 - train_set.nnz / (train_set.shape[0]*train_set.shape[1])} 69 | 70 | @register_func(feature_func_lookup) 71 | def item_user_ratio(train_set): 72 | """ 73 | Item to user ratio 74 | Args: 75 | train_set: train set as URM. 76 | 77 | Returns: 78 | Dictionary with the result 79 | """ 80 | return {"": train_set.shape[1] / train_set.shape[0]} -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Metafeatures/Metafeatures.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a26c5c17e7ee6946b411854d0f483eaf96074091c1d8c9e06361b48473fdaf54 3 | size 1052155 4 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Metafeatures/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Guide to metafeatures module 3 | 4 | ## Overview 5 | This module provides functionality to extract metafeatures for a dataset split (using only the training set). The main entrypoint for the functions in this module is `Featurizer.py`. The functions in this script can be used to extract metafeatures for the full metadataset and save them in a .csv file, and also to extract metafeatures given a new dataset. 6 | 7 | ## Using the featurizer 8 | 9 | To extract all the metafeatures for all of the datasets: 10 | 1. Run `fetch_data.sh`, which gets all dataset splits from the Google Cloud bucket and places them in a local folder, `../all_data`. 11 | 2. Run `Featurizer.py`, which extracts metafeatures for all dataset splits in `../all_data`. The resulting metafeatures are saved to `Metafeatures.csv` 12 | 13 | To extract metafeatures for a dataset split specified in some location, use `featurize_dataset_split()` within `Featurlizer.py`. 14 | 15 | ## Implementing new features 16 | 17 | This section is only relevant if you wish to add metafeatures to the featurizer. 18 | 19 | `Featurizer.py` keeps track of metafeatures to extract in two data structures: `feature_func_lookup` and `all_features`. A metafeature is obtained by calling a function which returns a dictionary with one or several metafeature values. If some metafeature is obtained by calling a function named `foo` with arguments given in an OrderedDict `kwargs`, then there will be an entry `("foo", kwargs)` in `all_features` (the use of an OrderedDict is so that the feature can be converted into a string consistently). Furthermore, `feature_func_lookup` must contain an entry with the string `"foo"` mapping to function `foo`. 20 | 21 | New features may be implemented by following this general workflow, as long as `all_features` and `feature_func_lookup` in `Featurizer.py` end up containing the necessary entries. The main sets of features currently implemented are given in `Basic.py`, `Landmarkers.py`, and `DistributionFeatures.py`, from which `all_features` and `feature_func_lookup` are currently populated. -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Metafeatures/fetch_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Destination folder 4 | dest=../all_data 5 | # bucket where split data is read. we expect split data to be in bucket_base// 6 | bucket_base=gs://reczilla-results/dataset-splits 7 | 8 | mkdir $dest 9 | 10 | for version in splits-v3 splits-v5 11 | do 12 | mkdir $dest/$version 13 | gsutil cp -r $bucket_base/$version/* $dest/$version 14 | done 15 | 16 | #bucket_base=gs://reczilla-results/dataset-splits/splits-v3 17 | 18 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Metafeatures/utils.py: -------------------------------------------------------------------------------- 1 | def register_func(registry): 2 | """ 3 | Adds the function to the lookup dictionary "registry", accessible by function name. 4 | Args: 5 | registry: 6 | 7 | Returns: 8 | 9 | """ 10 | def register_func_decorator(func): 11 | registry[func.__name__] = func 12 | return func 13 | return register_func_decorator 14 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/fetch_metadata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dest=../metadatasets 4 | 5 | mkdir $dest 6 | 7 | gsutil cp -r gs://reczilla-results/meta-datasets/* $dest -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/fig.png -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/plots/generate_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib import rc 3 | rc("text", usetex=False) 4 | plt.style.use(['science','ieee','no-latex']) 5 | 6 | x = [2, 8, 14, 20] 7 | y = [11.4, 11.3, 10.6, 0.0] 8 | 9 | plt.plot(x, y, color='blue') 10 | plt.xlabel("# training datasets") 11 | plt.ylabel("Percentage diff. from best") 12 | 13 | plt.savefig('fig.png') -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/plots/plot_files/perf_vs_datasets_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/plots/plot_files/perf_vs_datasets_plot.pdf -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/plots/plot_files/perf_vs_feats_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/plots/plot_files/perf_vs_feats_plot.pdf -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/ReczillaClassifier/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | def print_special(str, logger): 4 | prnt = f"\n{datetime.now()}: {str}" 5 | print(prnt) 6 | logger.info(prnt) 7 | 8 | def get_logger(logger_name): 9 | import logging 10 | logging.basicConfig(filename=f'ReczillaClassifier/logs/{logger_name}.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) 11 | logger = logging.getLogger() 12 | return logger -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Slides/RecSys2019_DeepLearning_Evaluation_Poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Slides/RecSys2019_DeepLearning_Evaluation_Poster.pdf -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Slides/RecSys2019_DeepLearning_Evaluation_Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/Slides/RecSys2019_DeepLearning_Evaluation_Slides.pdf -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Utils/assertions_on_data_for_experiments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 15/12/2018 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | 10 | import numpy as np 11 | 12 | def assert_implicit_data(URM_list): 13 | """ 14 | Checks whether the URM in the list only contain implicit data in the form 1 or 0 15 | :param URM_list: 16 | :return: 17 | """ 18 | 19 | for URM in URM_list: 20 | 21 | assert np.all(URM.data == np.ones_like(URM.data)), "assert_implicit_data: URM is not implicit as it contains data other than 1.0" 22 | 23 | 24 | print("Assertion assert_implicit_data: Passed") 25 | 26 | 27 | 28 | def assert_disjoint_matrices(URM_list): 29 | """ 30 | Checks whether the URM in the list have an empty intersection, therefore there is no data point contained in more than one 31 | URM at a time 32 | :param URM_list: 33 | :return: 34 | """ 35 | 36 | URM_implicit_global = None 37 | 38 | cumulative_nnz = 0 39 | 40 | for URM in URM_list: 41 | 42 | cumulative_nnz += URM.nnz 43 | URM_implicit = URM.copy() 44 | URM_implicit.data = np.ones_like(URM_implicit.data) 45 | 46 | if URM_implicit_global is None: 47 | URM_implicit_global = URM_implicit 48 | 49 | else: 50 | URM_implicit_global += URM_implicit 51 | 52 | 53 | assert cumulative_nnz == URM_implicit_global.nnz, \ 54 | "assert_disjoint_matrices: URM in list are not disjoint, {} data points are in more than one URM".format(cumulative_nnz-URM_implicit_global.nnz) 55 | 56 | 57 | print("Assertion assert_disjoint_matrices: Passed") -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Utils/seconds_to_biggest_unit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on 30/03/2019 5 | 6 | @author: Maurizio Ferrari Dacrema 7 | """ 8 | 9 | 10 | def seconds_to_biggest_unit(time_in_seconds, data_array = None): 11 | 12 | conversion_factor_list = [ 13 | ("sec", 1), 14 | ("min", 60), 15 | ("hour", 60), 16 | ("day", 24), 17 | ("year", 365), 18 | ] 19 | 20 | unit_index = 0 21 | temp_time_value = time_in_seconds 22 | new_time_value = time_in_seconds 23 | new_time_unit = "sec" 24 | 25 | while temp_time_value >= 1.0 and unit_index < len(conversion_factor_list)-1: 26 | 27 | temp_time_value = temp_time_value/conversion_factor_list[unit_index+1][1] 28 | 29 | if temp_time_value >= 1.0: 30 | unit_index += 1 31 | new_time_value = temp_time_value 32 | new_time_unit = conversion_factor_list[unit_index][0] 33 | 34 | if data_array is not None: 35 | data_array /= conversion_factor_list[unit_index+1][1] 36 | 37 | if data_array is not None: 38 | return new_time_value, new_time_unit, data_array 39 | 40 | else: 41 | return new_time_value, new_time_unit 42 | 43 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/Utils/update_old_reczilla_results.py: -------------------------------------------------------------------------------- 1 | # first, download all old reczilla datasets: 2 | # 3 | # run the following command to pull all results: 4 | # gsutil -m rsync gs://reczilla-results/inbox ./inbox -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/data_reader_splitter_class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/data_reader_splitter_class -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_URM_only_warm_users_use_validation_set.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_URM_only_warm_users_use_validation_set.zip -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_mappers_only_warm_users_use_validation_set.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_mappers_only_warm_users_use_validation_set.zip -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_parameters_only_warm_users_use_validation_set.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/RecSys2019_DeepLearning_Evaluation/reczilla_examples/example_split/split_parameters_only_warm_users_use_validation_set.zip -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29.6 2 | dm-sonnet==1.29 3 | h5py==2.9.0 4 | Keras==2.2.4 5 | Keras-Applications==1.0.7 6 | Keras-Preprocessing==1.0.9 7 | matplotlib==3.0.3 8 | nltk==3.4.5 9 | nose==1.3.7 10 | numpy==1.16.2 11 | pandas==0.24.2 12 | scikit-learn==0.20.3 13 | scikit-optimize==0.9 14 | scipy==1.2.1 15 | seaborn==0.9.0 16 | scikit-surprise==1.1.1 17 | tensorboard==1.13.1 18 | tensorflow==1.13.1 19 | tensorflow-estimator==1.13.0 20 | tensorflow-probability==0.6.0 21 | tornado==6.0.2 22 | tqdm==4.31.1 23 | wrapt==1.11.1 24 | -------------------------------------------------------------------------------- /RecSys2019_DeepLearning_Evaluation/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29.6 2 | h5py==2.9.0 3 | matplotlib==3.0.3 4 | nltk==3.4.5 5 | numpy==1.16.2 6 | pandas==0.24.2 7 | scikit-learn==0.20.3 8 | scikit-optimize==0.5.2 9 | scipy==1.2.1 10 | tqdm==4.31.1 11 | eli5==0.9.0 12 | -------------------------------------------------------------------------------- /ReczillaModels/item_hit_cov.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/ReczillaModels/item_hit_cov.pickle -------------------------------------------------------------------------------- /ReczillaModels/mrr_10.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/ReczillaModels/mrr_10.pickle -------------------------------------------------------------------------------- /ReczillaModels/prec_10.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/ReczillaModels/prec_10.pickle -------------------------------------------------------------------------------- /ReczillaModels/time_on_train.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/ReczillaModels/time_on_train.pickle -------------------------------------------------------------------------------- /img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/img/logo.png -------------------------------------------------------------------------------- /img/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/img/logo2.png -------------------------------------------------------------------------------- /img/logo3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/img/logo3.png -------------------------------------------------------------------------------- /img/reczilla_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naszilla/reczilla/efacb97c331182b919f23c7af9435c4015b76241/img/reczilla_overview.png -------------------------------------------------------------------------------- /notebooks/tables/table_3_final.csv: -------------------------------------------------------------------------------- 1 | \rot{Item-KNN},\rot{P3alpha},\rot{SLIM-BPR},\rot{EASE-R},\rot{RP3beta},\rot{SVD},\rot{SLIM-ElasticNet},\rot{iALS},\rot{NMF},\rot{User-KNN},\rot{MF-Funk},\rot{TopPop},\rot{MF-Asy},\rot{MF-BPR},\rot{Mult-VAE},\rot{U-neural},\rot{GlobalEffects},\rot{CoClustering},\rot{Random},\rot{SlopeOne} 2 | 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,9.0,7.0 3 | 14.0,18.0,14.0,18.0,17.0,16.0,17.0,19.0,14.0,17.0,18.0,19.0,16.0,17.0,20.0,20.0,20.0,19.0,20.0,20.0 4 | 2.3,4.2,4.7,5.3,6.0,6.0,7.0,7.0,7.1,7.6,9.4,10.4,10.7,11.2,11.7,12.3,13.3,14.9,16.2,16.7 5 | 59,57,56,27,58,59,36,56,57,59,58,59,53,58,45,18,59,57,59,25 6 | -------------------------------------------------------------------------------- /notebooks/tables/table_3_final.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lllllllllllllllllllll} 2 | \toprule 3 | {} & \rot{Item-KNN} & \rot{P3alpha} & \rot{SLIM-BPR} & \rot{EASE-R} & \rot{RP3beta} & \rot{SVD} & \rot{SLIM-ElasticNet} & \rot{iALS} & \rot{NMF} & \rot{User-KNN} & \rot{MF-Funk} & \rot{TopPop} & \rot{MF-Asy} & \rot{MF-BPR} & \rot{Mult-VAE} & \rot{U-neural} & \rot{GlobalEffects} & \rot{CoClustering} & \rot{Random} & \rot{SlopeOne} \\ 4 | \midrule 5 | Min. & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 1 & 2 & 1 & 9 & 7 \\ 6 | Max. & 14 & 18 & 14 & 18 & 17 & 16 & 17 & 19 & 14 & 17 & 18 & 19 & 16 & 17 & 20 & 20 & 20 & 19 & 20 & 20 \\ 7 | Mean & 2.3 & 4.2 & 4.7 & 5.3 & 6 & 6 & 7 & 7 & 7.1 & 7.6 & 9.4 & 10.4 & 10.7 & 11.2 & 11.7 & 12.3 & 13.3 & 14.9 & 16.2 & 16.7 \\ 8 | Count & 59 & 57 & 56 & 27 & 58 & 59 & 36 & 56 & 57 & 59 & 58 & 59 & 53 & 58 & 45 & 18 & 59 & 57 & 59 & 25 \\ 9 | \bottomrule 10 | \end{tabular} 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.0.0 2 | astor==0.8.1 3 | certifi==2020.6.20 4 | contextlib2==21.6.0 5 | cycler==0.11.0 6 | Cython==0.29.6 7 | dataclasses==0.8 8 | dm-sonnet==1.29 9 | gast==0.5.3 10 | grpcio==1.44.0 11 | h5py==2.9.0 12 | importlib-metadata==4.8.3 13 | joblib==1.1.0 14 | Keras==2.2.4 15 | Keras-Applications==1.0.7 16 | Keras-Preprocessing==1.0.9 17 | kiwisolver==1.3.1 18 | Markdown==3.3.6 19 | matplotlib==3.0.3 20 | mock==4.0.3 21 | nltk==3.4.5 22 | nose==1.3.7 23 | numpy==1.16.2 24 | pandas==0.24.2 25 | protobuf==3.19.4 26 | pyaml==21.10.1 27 | pyparsing==3.0.7 28 | python-dateutil==2.8.2 29 | pytz==2021.3 30 | PyYAML==6.0 31 | scikit-learn==0.20.3 32 | scikit-optimize==0.9.0 33 | scikit-surprise==1.1.1 34 | scipy==1.2.1 35 | seaborn==0.9.0 36 | semantic-version==2.9.0 37 | six==1.16.0 38 | tensorboard==1.13.1 39 | tensorflow==1.13.1 40 | tensorflow-estimator==1.13.0 41 | tensorflow-probability==0.6.0 42 | termcolor==1.1.0 43 | tornado==6.0.2 44 | tqdm==4.31.1 45 | typing_extensions==4.1.1 46 | Werkzeug==2.0.3 47 | wrapt==1.11.1 48 | zipp==3.6.0 -------------------------------------------------------------------------------- /run_reczilla_inference.sh: -------------------------------------------------------------------------------- 1 | cd RecSys2019_DeepLearning_Evaluation 2 | 3 | python -m ReczillaClassifier.run_reczilla \ 4 | --dataset_split_path="all_data/splits-v5/AmazonGiftCards/DataSplitter_leave_k_out_last" \ 5 | --metamodel_filepath="../ReczillaModels/prec_10.pickle" \ 6 | --rec_model_save_path="../prec_10_" 7 | 8 | read -p "Press enter to continue" 9 | 10 | python -m ReczillaClassifier.run_reczilla \ 11 | --dataset_split_path="all_data/splits-v5/AmazonGiftCards/DataSplitter_leave_k_out_last" \ 12 | --metamodel_filepath="../ReczillaModels/time_on_train.pickle" \ 13 | --rec_model_save_path="../train_time_" 14 | 15 | cd .. -------------------------------------------------------------------------------- /scripts/alg_list.txt: -------------------------------------------------------------------------------- 1 | ItemKNNCF_asymmetric 2 | ItemKNNCF_tversky 3 | ItemKNNCF_euclidean 4 | ItemKNNCF_cosine 5 | ItemKNNCF_jaccard 6 | ItemKNNCF_dice 7 | UserKNNCF_asymmetric 8 | UserKNNCF_tversky 9 | UserKNNCF_euclidean 10 | UserKNNCF_cosine 11 | UserKNNCF_jaccard 12 | UserKNNCF_dice 13 | TopPop 14 | GlobalEffects 15 | Random 16 | P3alphaRecommender 17 | RP3betaRecommender 18 | MatrixFactorization_FunkSVD_Cython 19 | MatrixFactorization_AsySVD_Cython 20 | MatrixFactorization_BPR_Cython 21 | IALSRecommender 22 | PureSVDRecommender 23 | NMFRecommender 24 | SLIM_BPR_Cython 25 | SLIMElasticNetRecommender 26 | EASE_R_Recommender 27 | Mult_VAE_RecommenderWrapper 28 | DELF_EF_RecommenderWrapper 29 | CoClustering 30 | SlopeOne -------------------------------------------------------------------------------- /scripts/dataset_list.txt: -------------------------------------------------------------------------------- 1 | Anime 2 | BookCrossing 3 | CiaoDVD 4 | Dating 5 | Epinions 6 | FilmTrust 7 | Frappe 8 | GoogleLocalReviews 9 | Gowalla 10 | Jester2 11 | LastFM 12 | MarketBiasAmazon 13 | MarketBiasModCloth 14 | MovieTweetings 15 | Movielens100K 16 | Movielens10M 17 | Movielens1M 18 | Movielens20M 19 | MovielensHetrec2011 20 | NetflixPrize 21 | Recipes 22 | Wikilens -------------------------------------------------------------------------------- /scripts/neural_methods/manually_run_gpu_experiment.sh: -------------------------------------------------------------------------------- 1 | 2 | ################################# 3 | # define args here 4 | 5 | bucket_base=gs://reczilla-results/dataset-splits/splits-v5 6 | 7 | # name of the instance 8 | instance_name=manual-gpu-test 9 | 10 | # experiment args 11 | time_limit=72000 # 20 hrs in seconds 12 | dataset_name=Movielens1M 13 | split_type=DataSplitter_leave_k_out_last 14 | alg_name=INeuRec_RecommenderWrapper 15 | alg_seed=0 16 | num_samples=1 17 | param_seed=3 18 | experiment_name=manual-gpu-test 19 | split_path_on_bucket=${bucket_base}/${dataset_name}/${split_type} 20 | 21 | # put these all in a string 22 | args_str="\ 23 | ${time_limit} \ 24 | ${dataset_name}Reader \ 25 | ${split_type} \ 26 | ${alg_name} \ 27 | /home/shared/split \ 28 | ${alg_seed} \ 29 | ${param_seed} \ 30 | ${num_samples} 31 | /home/shared \ 32 | ${experiment_name} \ 33 | ${split_path_on_bucket}" 34 | 35 | ################################# 36 | 37 | 38 | # constants 39 | image_family=reczilla 40 | zone=us-central1-a 41 | project=research-collab-naszilla 42 | ACCELERATOR_TYPE=nvidia-tesla-t4 43 | ACCELERATOR_COUNT=1 44 | 45 | # create instance 46 | gcloud compute instances create $instance_name --zone=$zone \ 47 | --project=$project --image-family=$image_family \ 48 | --machine-type=n1-highmem-2 \ 49 | --accelerator type=${ACCELERATOR_TYPE},count=${ACCELERATOR_COUNT} \ 50 | --maintenance-policy TERMINATE \ 51 | --scopes=https://www.googleapis.com/auth/devstorage.read_write 52 | 53 | instance_repo_dir=/home/shared/reczilla 54 | instance_script_location=${instance_repo_dir}/scripts/run_experiment_on_instance.sh 55 | 56 | # attempt to run experiment (or, just ssh in and run the commands below) 57 | gcloud compute ssh --ssh-flag="-A" ${instance_name} --zone=${zone} --project=${project} \ 58 | --command="\ 59 | export ARGS=\"${args_str}\"; \ 60 | export SPLIT_PATH_ON_BUCKET=${split_path_on_bucket}; \ 61 | chmod +x ${instance_script_location}; \ 62 | /bin/bash ${instance_script_location}" 63 | -------------------------------------------------------------------------------- /scripts/update_reczilla_image.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # this script creates a new image in the reczilla family, with the latest codebase. from naszilla/reczilla 4 | 5 | # this assumes that the disk name is the same as the instance name, which is usually true. 6 | 7 | # NOTE: we recommend that you run each of the commands here manually, rather than executing this script directly. We have experienced some issues while executing this script directly. 8 | 9 | zone=us-central1-a 10 | instance=update-reczilla 11 | project=research-collab-naszilla 12 | family=reczilla 13 | service_account=default-compute-instance@research-collab-naszilla.iam.gserviceaccount.com 14 | 15 | echo "creating instance ${instance}..." 16 | # create an instance from the latest reczilla-family image 17 | gcloud compute instances create $instance --zone=$zone \ 18 | --project=$project --image-family=$family \ 19 | --service-account $service_account \ 20 | --scopes=https://www.googleapis.com/auth/devstorage.read_write 21 | 22 | 23 | sleep 10 24 | 25 | echo "finished creating instance ${instance}." 26 | 27 | echo "updating code on instance ${instance}..." 28 | 29 | # ssh in, and update the code 30 | gcloud compute ssh --ssh-flag="-A" ${instance} --zone=${zone} --project=${project} \ 31 | --command="\ 32 | cd /home/shared/reczilla; \ 33 | git pull" 34 | 35 | 36 | echo "finished updating code." 37 | 38 | # stop the instance - better writing 39 | gcloud compute instances stop $instance 40 | 41 | sleep 10 42 | 43 | # create a name for the new image with today's date 44 | new_image_name=reczilla-$(date +"%m%d%y") 45 | 46 | echo "creating image ${new_image_name}..." 47 | 48 | # create a new image from this instance, and add it to the reczilla family 49 | gcloud compute images create $new_image_name \ 50 | --source-disk $instance \ 51 | --project=$project \ 52 | --source-disk-zone $zone \ 53 | --family $family --force 54 | 55 | echo "finished creating image, deleting instance" 56 | 57 | # delete the instance 58 | printf "Y" | gcloud compute instances delete ${instance} --zone=${zone} --project=$project 59 | -------------------------------------------------------------------------------- /train_reczilla_models.sh: -------------------------------------------------------------------------------- 1 | mkdir -p ReczillaModels 2 | cd RecSys2019_DeepLearning_Evaluation 3 | 4 | python -m ReczillaClassifier.run_reczilla \ 5 | --train_meta \ 6 | --metamodel_filepath="../ReczillaModels/prec_10.pickle" \ 7 | --target_metric="PRECISION_cut_10" \ 8 | --num_algorithms=10 \ 9 | --num_metafeatures=10 10 | 11 | python -m ReczillaClassifier.run_reczilla \ 12 | --train_meta \ 13 | --metamodel_filepath="../ReczillaModels/time_on_train.pickle" \ 14 | --target_metric="time_on_train" \ 15 | --num_algorithms=10 \ 16 | --num_metafeatures=10 17 | 18 | python -m ReczillaClassifier.run_reczilla \ 19 | --train_meta \ 20 | --metamodel_filepath="../ReczillaModels/mrr_10.pickle" \ 21 | --target_metric="MRR_cut_10" \ 22 | --num_algorithms=10 \ 23 | --num_metafeatures=10 24 | 25 | python -m ReczillaClassifier.run_reczilla \ 26 | --train_meta \ 27 | --metamodel_filepath="../ReczillaModels/item_hit_cov.pickle" \ 28 | --target_metric="COVERAGE_ITEM_HIT_cut_10" \ 29 | --num_algorithms=10 \ 30 | --num_metafeatures=10 31 | 32 | cd .. --------------------------------------------------------------------------------